2013-03-01 20:19:20 +00:00
|
|
|
/*
|
|
|
|
* This file is part of mpv.
|
|
|
|
*
|
2016-01-19 17:36:34 +00:00
|
|
|
* mpv is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License as published by the Free Software Foundation; either
|
|
|
|
* version 2.1 of the License, or (at your option) any later version.
|
2013-03-01 20:19:20 +00:00
|
|
|
*
|
|
|
|
* mpv is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
2016-01-19 17:36:34 +00:00
|
|
|
* GNU Lesser General Public License for more details.
|
2013-03-01 20:19:20 +00:00
|
|
|
*
|
2016-01-19 17:36:34 +00:00
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
|
|
* License along with mpv. If not, see <http://www.gnu.org/licenses/>.
|
2013-03-01 20:19:20 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include <assert.h>
|
2013-03-30 03:01:17 +00:00
|
|
|
#include <math.h>
|
2016-04-16 16:14:32 +00:00
|
|
|
#include <stdarg.h>
|
2013-03-30 03:01:17 +00:00
|
|
|
#include <stdbool.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <assert.h>
|
|
|
|
|
|
|
|
#include <libavutil/common.h>
|
2015-03-27 12:27:40 +00:00
|
|
|
#include <libavutil/lfg.h>
|
2013-03-01 20:19:20 +00:00
|
|
|
|
2015-08-29 02:12:56 +00:00
|
|
|
#include "video.h"
|
2013-03-30 03:01:17 +00:00
|
|
|
|
2014-08-29 10:09:04 +00:00
|
|
|
#include "misc/bstr.h"
|
2015-09-09 18:40:04 +00:00
|
|
|
#include "options/m_config.h"
|
2015-11-28 18:59:11 +00:00
|
|
|
#include "common/global.h"
|
|
|
|
#include "options/options.h"
|
2015-08-29 02:12:56 +00:00
|
|
|
#include "utils.h"
|
|
|
|
#include "hwdec.h"
|
|
|
|
#include "osd.h"
|
vo_opengl: start work on rendering API abstraction
This starts work on moving OpenGL-specific code out of the general
renderer code, so that we can support other other GPU APIs. This is in
a very early stage and it's only a proof of concept. It's unknown
whether this will succeed or result in other backends.
For now, the GL rendering API ("ra") and its only provider (ra_gl) does
texture creation/upload/destruction only. And it's used for the main
video texture only. All other code is still hardcoded to GL.
There is some duplication with ra_format and gl_format handling. In the
end, only the ra variants will be needed (plus the gl_format table of
course). For now, this is simpler, because for some reason lots of hwdec
code still requires the GL variants, and would have to be updated to
use the ra ones.
Currently, the video.c code accesses private ra_gl fields. In the end,
it should not do that of course, and it would not include ra_gl.h.
Probably adds bugs, but you can keep them.
2017-07-26 09:19:51 +00:00
|
|
|
#include "ra.h"
|
2015-09-23 20:13:03 +00:00
|
|
|
#include "stream/stream.h"
|
2015-09-05 12:03:00 +00:00
|
|
|
#include "video_shaders.h"
|
2016-04-20 23:33:13 +00:00
|
|
|
#include "user_shaders.h"
|
2015-08-28 23:10:30 +00:00
|
|
|
#include "video/out/filter_kernels.h"
|
|
|
|
#include "video/out/aspect.h"
|
|
|
|
#include "video/out/dither.h"
|
|
|
|
#include "video/out/vo.h"
|
2013-03-01 20:19:20 +00:00
|
|
|
|
2015-01-20 20:46:19 +00:00
|
|
|
// scale/cscale arguments that map directly to shader filter routines.
|
2013-03-01 20:19:20 +00:00
|
|
|
// Note that the convolution filters are not included in this list.
|
2014-06-10 21:56:05 +00:00
|
|
|
static const char *const fixed_scale_filters[] = {
|
2013-03-01 20:19:20 +00:00
|
|
|
"bilinear",
|
|
|
|
"bicubic_fast",
|
2015-03-15 05:27:11 +00:00
|
|
|
"oversample",
|
2013-03-01 20:19:20 +00:00
|
|
|
NULL
|
|
|
|
};
|
2015-03-15 06:11:51 +00:00
|
|
|
static const char *const fixed_tscale_filters[] = {
|
2015-07-11 11:55:45 +00:00
|
|
|
"oversample",
|
2016-07-19 18:12:33 +00:00
|
|
|
"linear",
|
2015-03-15 06:11:51 +00:00
|
|
|
NULL
|
|
|
|
};
|
2013-03-01 20:19:20 +00:00
|
|
|
|
|
|
|
// must be sorted, and terminated with 0
|
2014-12-08 16:08:26 +00:00
|
|
|
int filter_sizes[] =
|
|
|
|
{2, 4, 6, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 0};
|
2017-09-27 22:07:42 +00:00
|
|
|
int tscale_sizes[] = {2, 4, 6, 8, 0};
|
2013-03-01 20:19:20 +00:00
|
|
|
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
struct vertex_pt {
|
|
|
|
float x, y;
|
|
|
|
};
|
|
|
|
|
2013-03-01 20:19:20 +00:00
|
|
|
struct texplane {
|
2017-08-04 13:47:50 +00:00
|
|
|
struct ra_tex *tex;
|
2013-03-28 19:40:19 +00:00
|
|
|
int w, h;
|
2016-07-03 14:09:26 +00:00
|
|
|
bool flipped;
|
2013-03-01 20:19:20 +00:00
|
|
|
};
|
|
|
|
|
2013-03-28 19:40:19 +00:00
|
|
|
struct video_image {
|
2013-03-28 20:02:53 +00:00
|
|
|
struct texplane planes[4];
|
2015-01-22 17:29:37 +00:00
|
|
|
struct mp_image *mpi; // original input image
|
2016-11-01 12:06:48 +00:00
|
|
|
uint64_t id; // unique ID identifying mpi contents
|
vo_opengl: refactor how hwdec interop exports textures
Rename gl_hwdec_driver.map_image to map_frame, and let it fill out a
struct gl_hwdec_frame describing the exact texture layout. This gives
more flexibility to what the hwdec interop can export. In particular, it
can export strange component orders/permutations and textures with
padded size. (The latter originating from cropped video.)
The way gl_hwdec_frame works is in the spirit of the rest of the
vo_opengl video processing code, which tends to put as much information
in immediate state (as part of the dataflow), instead of declaring it
globally. To some degree this duplicates the texplane and img_tex
structs, but until we somehow unify those, it's better to give the hwdec
state its own struct. The fact that changing the hwdec struct would
require changes and testing on at least 4 platform/GPU combinations
makes duplicating it almost a requirement to avoid pain later.
Make gl_hwdec_driver.reinit set the new image format and remove the
gl_hwdec.converted_imgfmt field.
Likewise, gl_hwdec.gl_texture_target is replaced with
gl_hwdec_plane.gl_target.
Split out a init_image_desc function from init_format. The latter is not
called in the hwdec case at all anymore. Setting up most of struct
texplane is also completely separate in the hwdec and normal cases.
video.c does not check whether the hwdec "mapped" image format is
supported. This should not really happen anyway, and if it does, the
hwdec interop backend must fail at creation time, so this is not an
issue.
2016-05-10 16:29:10 +00:00
|
|
|
bool hwdec_mapped;
|
2013-03-28 19:40:19 +00:00
|
|
|
};
|
|
|
|
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
enum plane_type {
|
|
|
|
PLANE_NONE = 0,
|
|
|
|
PLANE_RGB,
|
|
|
|
PLANE_LUMA,
|
|
|
|
PLANE_CHROMA,
|
|
|
|
PLANE_ALPHA,
|
|
|
|
PLANE_XYZ,
|
2014-11-23 19:06:05 +00:00
|
|
|
};
|
|
|
|
|
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number
of changes and improvements:
1. mpv users can now introspect the vo_opengl passes, which is something
that has been requested multiple times.
2. performance data is now measured per-pass, which helps both
development and debugging.
3. since adding more passes is cheap, we can now report information for
more passes (e.g. the blit pass, and the osd pass). Note: we also
switch to nanosecond scale, to be able to measure these passes
better.
4. `--user-shaders` authors can now describe their own passes, helping
users both identify which user shaders are active at any given time
as well as helping shader authors identify performance issues.
5. the timing data per pass is now exported as a full list of samples,
so projects like Argon-/mpv-stats can immediately read out all of the
samples and render a graph without having to manually poll this
option constantly.
Due to gl_timer's design being complicated (directly reading performance
data would block, so we delay the actual read-back until the next _start
command), it's vital not to conflate different passes that might be
doing different things from one frame to another. To accomplish this,
the actual timers are stored as part of the gl_shader_cache's sc_entry,
which makes them unique for that exact shader.
Starting and stopping the time measurement is easy to unify with the
gl_sc architecture, because the existing API already relies on a
"generate, render, reset" flow, so we can just put timer_start and
timer_stop in sc_generate and sc_reset, respectively.
The ugliest thing about this code is that due to the need to keep pass
information relatively stable in between frames, we need to distinguish
between "new" and "redrawn" frames, which bloats the code somewhat and
also feels hacky and vo_opengl-specific. (But then again, this entire
thing is vo_opengl-specific)
2017-06-29 15:00:06 +00:00
|
|
|
static const char *plane_names[] = {
|
|
|
|
[PLANE_NONE] = "unknown",
|
|
|
|
[PLANE_RGB] = "rgb",
|
|
|
|
[PLANE_LUMA] = "luma",
|
|
|
|
[PLANE_CHROMA] = "chroma",
|
|
|
|
[PLANE_ALPHA] = "alpha",
|
|
|
|
[PLANE_XYZ] = "xyz",
|
|
|
|
};
|
|
|
|
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
// A self-contained description of a source image which can be bound to a
|
|
|
|
// texture unit and sampled from. Contains metadata about how it's to be used
|
2017-09-20 08:45:33 +00:00
|
|
|
struct image {
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
enum plane_type type; // must be set to something non-zero
|
|
|
|
int components; // number of relevant coordinates
|
|
|
|
float multiplier; // multiplier to be used when sampling
|
2017-08-04 13:47:50 +00:00
|
|
|
struct ra_tex *tex;
|
2016-04-16 16:14:32 +00:00
|
|
|
int w, h; // logical size (after transformation)
|
2016-04-08 20:21:31 +00:00
|
|
|
struct gl_transform transform; // rendering transformation
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
};
|
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
// A named image, for user scripting purposes
|
|
|
|
struct saved_img {
|
2016-04-16 16:14:32 +00:00
|
|
|
const char *name;
|
2017-09-20 08:45:33 +00:00
|
|
|
struct image img;
|
2016-04-16 16:14:32 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
// A texture hook. This is some operation that transforms a named texture as
|
|
|
|
// soon as it's generated
|
|
|
|
struct tex_hook {
|
2017-07-10 20:52:39 +00:00
|
|
|
const char *save_tex;
|
|
|
|
const char *hook_tex[SHADER_MAX_HOOKS];
|
2017-09-27 21:38:54 +00:00
|
|
|
const char *bind_tex[SHADER_MAX_BINDS];
|
2016-04-16 16:14:32 +00:00
|
|
|
int components; // how many components are relevant (0 = same as input)
|
2017-07-10 20:52:39 +00:00
|
|
|
void *priv; // this gets talloc_freed when the tex_hook is removed
|
2017-09-20 08:45:33 +00:00
|
|
|
void (*hook)(struct gl_video *p, struct image img, // generates GLSL
|
2016-04-16 16:14:32 +00:00
|
|
|
struct gl_transform *trans, void *priv);
|
2017-09-20 08:45:33 +00:00
|
|
|
bool (*cond)(struct gl_video *p, struct image img, void *priv);
|
2016-04-16 16:14:32 +00:00
|
|
|
};
|
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
struct surface {
|
|
|
|
struct ra_tex *tex;
|
2016-11-01 12:06:48 +00:00
|
|
|
uint64_t id;
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
double pts;
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
};
|
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
#define SURFACES_MAX 10
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
|
2015-09-23 20:13:03 +00:00
|
|
|
struct cached_file {
|
|
|
|
char *path;
|
2016-04-20 23:33:13 +00:00
|
|
|
struct bstr body;
|
2015-09-23 20:13:03 +00:00
|
|
|
};
|
|
|
|
|
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number
of changes and improvements:
1. mpv users can now introspect the vo_opengl passes, which is something
that has been requested multiple times.
2. performance data is now measured per-pass, which helps both
development and debugging.
3. since adding more passes is cheap, we can now report information for
more passes (e.g. the blit pass, and the osd pass). Note: we also
switch to nanosecond scale, to be able to measure these passes
better.
4. `--user-shaders` authors can now describe their own passes, helping
users both identify which user shaders are active at any given time
as well as helping shader authors identify performance issues.
5. the timing data per pass is now exported as a full list of samples,
so projects like Argon-/mpv-stats can immediately read out all of the
samples and render a graph without having to manually poll this
option constantly.
Due to gl_timer's design being complicated (directly reading performance
data would block, so we delay the actual read-back until the next _start
command), it's vital not to conflate different passes that might be
doing different things from one frame to another. To accomplish this,
the actual timers are stored as part of the gl_shader_cache's sc_entry,
which makes them unique for that exact shader.
Starting and stopping the time measurement is easy to unify with the
gl_sc architecture, because the existing API already relies on a
"generate, render, reset" flow, so we can just put timer_start and
timer_stop in sc_generate and sc_reset, respectively.
The ugliest thing about this code is that due to the need to keep pass
information relatively stable in between frames, we need to distinguish
between "new" and "redrawn" frames, which bloats the code somewhat and
also feels hacky and vo_opengl-specific. (But then again, this entire
thing is vo_opengl-specific)
2017-06-29 15:00:06 +00:00
|
|
|
struct pass_info {
|
|
|
|
struct bstr desc;
|
|
|
|
struct mp_pass_perf perf;
|
|
|
|
};
|
|
|
|
|
2017-07-23 07:41:51 +00:00
|
|
|
struct dr_buffer {
|
2017-08-05 20:29:48 +00:00
|
|
|
struct ra_buf *buf;
|
vo_opengl: start work on rendering API abstraction
This starts work on moving OpenGL-specific code out of the general
renderer code, so that we can support other other GPU APIs. This is in
a very early stage and it's only a proof of concept. It's unknown
whether this will succeed or result in other backends.
For now, the GL rendering API ("ra") and its only provider (ra_gl) does
texture creation/upload/destruction only. And it's used for the main
video texture only. All other code is still hardcoded to GL.
There is some duplication with ra_format and gl_format handling. In the
end, only the ra variants will be needed (plus the gl_format table of
course). For now, this is simpler, because for some reason lots of hwdec
code still requires the GL variants, and would have to be updated to
use the ra ones.
Currently, the video.c code accesses private ra_gl fields. In the end,
it should not do that of course, and it would not include ra_gl.h.
Probably adds bugs, but you can keep them.
2017-07-26 09:19:51 +00:00
|
|
|
// The mpi reference will keep the data from being recycled (or from other
|
|
|
|
// references gaining write access) while the GPU is accessing the buffer.
|
2017-07-23 07:41:51 +00:00
|
|
|
struct mp_image *mpi;
|
|
|
|
};
|
|
|
|
|
2013-03-01 20:19:20 +00:00
|
|
|
struct gl_video {
|
vo_opengl: start work on rendering API abstraction
This starts work on moving OpenGL-specific code out of the general
renderer code, so that we can support other other GPU APIs. This is in
a very early stage and it's only a proof of concept. It's unknown
whether this will succeed or result in other backends.
For now, the GL rendering API ("ra") and its only provider (ra_gl) does
texture creation/upload/destruction only. And it's used for the main
video texture only. All other code is still hardcoded to GL.
There is some duplication with ra_format and gl_format handling. In the
end, only the ra variants will be needed (plus the gl_format table of
course). For now, this is simpler, because for some reason lots of hwdec
code still requires the GL variants, and would have to be updated to
use the ra ones.
Currently, the video.c code accesses private ra_gl fields. In the end,
it should not do that of course, and it would not include ra_gl.h.
Probably adds bugs, but you can keep them.
2017-07-26 09:19:51 +00:00
|
|
|
struct ra *ra;
|
2013-03-01 20:19:20 +00:00
|
|
|
|
2015-03-27 12:27:40 +00:00
|
|
|
struct mpv_global *global;
|
2013-07-31 19:44:21 +00:00
|
|
|
struct mp_log *log;
|
2013-03-01 20:19:20 +00:00
|
|
|
struct gl_video_opts opts;
|
2016-09-02 13:59:40 +00:00
|
|
|
struct m_config_cache *opts_cache;
|
2016-02-13 14:33:00 +00:00
|
|
|
struct gl_lcms *cms;
|
2013-03-01 20:19:20 +00:00
|
|
|
|
2016-06-14 08:35:43 +00:00
|
|
|
int fb_depth; // actual bits available in GL main framebuffer
|
2017-08-11 11:02:13 +00:00
|
|
|
struct m_color clear_color;
|
|
|
|
bool force_clear_color;
|
2013-03-01 20:19:20 +00:00
|
|
|
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
struct gl_shader_cache *sc;
|
|
|
|
|
2014-06-15 18:46:57 +00:00
|
|
|
struct osd_state *osd_state;
|
2013-03-01 20:19:20 +00:00
|
|
|
struct mpgl_osd *osd;
|
2014-06-15 18:46:57 +00:00
|
|
|
double osd_pts;
|
2013-03-01 20:19:20 +00:00
|
|
|
|
2017-07-29 19:22:11 +00:00
|
|
|
struct ra_tex *lut_3d_texture;
|
2013-03-01 20:19:20 +00:00
|
|
|
bool use_lut_3d;
|
2016-07-25 08:18:41 +00:00
|
|
|
int lut_3d_size[3];
|
2013-03-01 20:19:20 +00:00
|
|
|
|
2017-07-29 18:14:48 +00:00
|
|
|
struct ra_tex *dither_texture;
|
2013-03-01 20:19:20 +00:00
|
|
|
|
2015-01-29 18:53:49 +00:00
|
|
|
struct mp_image_params real_image_params; // configured format
|
|
|
|
struct mp_image_params image_params; // texture format (mind hwdec case)
|
vo_opengl: start work on rendering API abstraction
This starts work on moving OpenGL-specific code out of the general
renderer code, so that we can support other other GPU APIs. This is in
a very early stage and it's only a proof of concept. It's unknown
whether this will succeed or result in other backends.
For now, the GL rendering API ("ra") and its only provider (ra_gl) does
texture creation/upload/destruction only. And it's used for the main
video texture only. All other code is still hardcoded to GL.
There is some duplication with ra_format and gl_format handling. In the
end, only the ra variants will be needed (plus the gl_format table of
course). For now, this is simpler, because for some reason lots of hwdec
code still requires the GL variants, and would have to be updated to
use the ra ones.
Currently, the video.c code accesses private ra_gl fields. In the end,
it should not do that of course, and it would not include ra_gl.h.
Probably adds bugs, but you can keep them.
2017-07-26 09:19:51 +00:00
|
|
|
struct ra_imgfmt_desc ra_format; // texture format
|
2015-01-29 14:50:21 +00:00
|
|
|
int plane_count;
|
2013-03-28 19:40:19 +00:00
|
|
|
|
2017-06-30 15:13:58 +00:00
|
|
|
bool is_gray;
|
2013-07-18 11:52:38 +00:00
|
|
|
bool has_alpha;
|
|
|
|
char color_swizzle[5];
|
2016-01-26 19:47:32 +00:00
|
|
|
bool use_integer_conversion;
|
2013-03-01 20:19:20 +00:00
|
|
|
|
2013-03-28 19:40:19 +00:00
|
|
|
struct video_image image;
|
2013-03-01 20:19:20 +00:00
|
|
|
|
2017-07-23 07:41:51 +00:00
|
|
|
struct dr_buffer *dr_buffers;
|
|
|
|
int num_dr_buffers;
|
|
|
|
|
|
|
|
bool using_dr_path;
|
|
|
|
|
2015-11-19 20:22:24 +00:00
|
|
|
bool dumb_mode;
|
2016-01-26 19:47:32 +00:00
|
|
|
bool forced_dumb_mode;
|
2015-11-19 20:22:24 +00:00
|
|
|
|
2017-09-27 22:07:42 +00:00
|
|
|
// Cached vertex array, to avoid re-allocation per frame. For simplicity,
|
|
|
|
// our vertex format is simply a list of `vertex_pt`s, since this greatly
|
|
|
|
// simplifies offset calculation at the cost of (unneeded) flexibility.
|
|
|
|
struct vertex_pt *tmp_vertex;
|
|
|
|
struct ra_renderpass_input *vao;
|
|
|
|
int vao_len;
|
|
|
|
|
2017-08-04 11:48:37 +00:00
|
|
|
const struct ra_format *fbo_format;
|
2017-09-20 08:45:33 +00:00
|
|
|
struct ra_tex *merge_tex[4];
|
|
|
|
struct ra_tex *scale_tex[4];
|
|
|
|
struct ra_tex *integer_tex[4];
|
|
|
|
struct ra_tex *indirect_tex;
|
|
|
|
struct ra_tex *blend_subs_tex;
|
|
|
|
struct ra_tex *screen_tex;
|
|
|
|
struct ra_tex *output_tex;
|
|
|
|
struct ra_tex *vdpau_deinterleave_tex[2];
|
2017-09-27 21:38:54 +00:00
|
|
|
struct ra_tex **hook_textures;
|
|
|
|
int num_hook_textures;
|
|
|
|
int idx_hook_textures;
|
|
|
|
|
2017-08-05 20:29:48 +00:00
|
|
|
struct ra_buf *hdr_peak_ssbo;
|
2017-09-20 08:45:33 +00:00
|
|
|
struct surface surfaces[SURFACES_MAX];
|
2015-10-26 22:43:48 +00:00
|
|
|
|
2017-07-10 23:59:21 +00:00
|
|
|
// user pass descriptions and textures
|
2017-09-27 21:38:54 +00:00
|
|
|
struct tex_hook *tex_hooks;
|
|
|
|
int num_tex_hooks;
|
|
|
|
struct gl_user_shader_tex *user_textures;
|
|
|
|
int num_user_textures;
|
2017-07-10 23:59:21 +00:00
|
|
|
|
2015-03-13 18:30:31 +00:00
|
|
|
int surface_idx;
|
|
|
|
int surface_now;
|
2015-07-02 11:17:20 +00:00
|
|
|
int frames_drawn;
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
bool is_interpolated;
|
2017-09-20 08:45:33 +00:00
|
|
|
bool output_tex_valid;
|
2013-03-01 20:19:20 +00:00
|
|
|
|
2016-03-05 08:42:57 +00:00
|
|
|
// state for configured scalers
|
|
|
|
struct scaler scaler[SCALER_COUNT];
|
2013-03-01 20:19:20 +00:00
|
|
|
|
video: redo video equalizer option handling
I really wouldn't care much about this, but some parts of the core code
are under HAVE_GPL, so there's some need to get rid of it. Simply turn
the video equalizer from its current fine-grained handling with vf/vo
fallbacks into global options. This makes updating them much simpler.
This removes any possibility of applying video equalizers in filters,
which affects vf_scale, and the previously removed vf_eq. Not a big
loss, since the preferred VOs have this builtin.
Remove video equalizer handling from vo_direct3d, vo_sdl, vo_vaapi, and
vo_xv. I'm not going to waste my time on these legacy VOs.
vo.eq_opts_cache exists _only_ to send a VOCTRL_SET_EQUALIZER, which
exists _only_ to trigger a redraw. This seems silly, but for now I feel
like this is less of a pain. The rest of the equalizer using code is
self-updating.
See commit 96b906a51d5 for how some video equalizer code was GPL only.
Some command line option names and ranges can probably be traced back to
a GPL only committer, but we don't consider these copyrightable.
2017-08-22 15:01:35 +00:00
|
|
|
struct mp_csp_equalizer_state *video_eq;
|
2013-03-01 20:19:20 +00:00
|
|
|
|
|
|
|
struct mp_rect src_rect; // displayed part of the source video
|
|
|
|
struct mp_rect dst_rect; // video rectangle on output window
|
|
|
|
struct mp_osd_res osd_rect; // OSD size/margins
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
|
|
|
|
// temporary during rendering
|
2017-07-29 18:41:50 +00:00
|
|
|
struct compute_info pass_compute; // compute shader metadata for this pass
|
2017-09-27 22:07:42 +00:00
|
|
|
struct image *pass_imgs; // bound images for this pass
|
|
|
|
int num_pass_imgs;
|
2017-09-27 21:38:54 +00:00
|
|
|
struct saved_img *saved_imgs; // saved (named) images for this frame
|
|
|
|
int num_saved_imgs;
|
|
|
|
|
|
|
|
// effective current texture metadata - this will essentially affect the
|
|
|
|
// next render pass target, as well as implicitly tracking what needs to
|
|
|
|
// be done with the image
|
2015-10-23 17:52:03 +00:00
|
|
|
int texture_w, texture_h;
|
2015-10-26 22:43:48 +00:00
|
|
|
struct gl_transform texture_offset; // texture transform without rotation
|
2016-03-05 11:38:51 +00:00
|
|
|
int components;
|
2015-03-15 21:52:34 +00:00
|
|
|
bool use_linear;
|
2015-03-15 23:09:36 +00:00
|
|
|
float user_gamma;
|
2013-03-01 20:19:20 +00:00
|
|
|
|
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number
of changes and improvements:
1. mpv users can now introspect the vo_opengl passes, which is something
that has been requested multiple times.
2. performance data is now measured per-pass, which helps both
development and debugging.
3. since adding more passes is cheap, we can now report information for
more passes (e.g. the blit pass, and the osd pass). Note: we also
switch to nanosecond scale, to be able to measure these passes
better.
4. `--user-shaders` authors can now describe their own passes, helping
users both identify which user shaders are active at any given time
as well as helping shader authors identify performance issues.
5. the timing data per pass is now exported as a full list of samples,
so projects like Argon-/mpv-stats can immediately read out all of the
samples and render a graph without having to manually poll this
option constantly.
Due to gl_timer's design being complicated (directly reading performance
data would block, so we delay the actual read-back until the next _start
command), it's vital not to conflate different passes that might be
doing different things from one frame to another. To accomplish this,
the actual timers are stored as part of the gl_shader_cache's sc_entry,
which makes them unique for that exact shader.
Starting and stopping the time measurement is easy to unify with the
gl_sc architecture, because the existing API already relies on a
"generate, render, reset" flow, so we can just put timer_start and
timer_stop in sc_generate and sc_reset, respectively.
The ugliest thing about this code is that due to the need to keep pass
information relatively stable in between frames, we need to distinguish
between "new" and "redrawn" frames, which bloats the code somewhat and
also feels hacky and vo_opengl-specific. (But then again, this entire
thing is vo_opengl-specific)
2017-06-29 15:00:06 +00:00
|
|
|
// pass info / metrics
|
2017-09-27 21:38:54 +00:00
|
|
|
struct pass_info pass_fresh[VO_PASS_PERF_MAX];
|
|
|
|
struct pass_info pass_redraw[VO_PASS_PERF_MAX];
|
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number
of changes and improvements:
1. mpv users can now introspect the vo_opengl passes, which is something
that has been requested multiple times.
2. performance data is now measured per-pass, which helps both
development and debugging.
3. since adding more passes is cheap, we can now report information for
more passes (e.g. the blit pass, and the osd pass). Note: we also
switch to nanosecond scale, to be able to measure these passes
better.
4. `--user-shaders` authors can now describe their own passes, helping
users both identify which user shaders are active at any given time
as well as helping shader authors identify performance issues.
5. the timing data per pass is now exported as a full list of samples,
so projects like Argon-/mpv-stats can immediately read out all of the
samples and render a graph without having to manually poll this
option constantly.
Due to gl_timer's design being complicated (directly reading performance
data would block, so we delay the actual read-back until the next _start
command), it's vital not to conflate different passes that might be
doing different things from one frame to another. To accomplish this,
the actual timers are stored as part of the gl_shader_cache's sc_entry,
which makes them unique for that exact shader.
Starting and stopping the time measurement is easy to unify with the
gl_sc architecture, because the existing API already relies on a
"generate, render, reset" flow, so we can just put timer_start and
timer_stop in sc_generate and sc_reset, respectively.
The ugliest thing about this code is that due to the need to keep pass
information relatively stable in between frames, we need to distinguish
between "new" and "redrawn" frames, which bloats the code somewhat and
also feels hacky and vo_opengl-specific. (But then again, this entire
thing is vo_opengl-specific)
2017-06-29 15:00:06 +00:00
|
|
|
struct pass_info *pass;
|
|
|
|
int pass_idx;
|
2017-08-05 16:20:45 +00:00
|
|
|
struct timer_pool *upload_timer;
|
|
|
|
struct timer_pool *blit_timer;
|
2017-08-05 16:59:28 +00:00
|
|
|
struct timer_pool *osd_timer;
|
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number
of changes and improvements:
1. mpv users can now introspect the vo_opengl passes, which is something
that has been requested multiple times.
2. performance data is now measured per-pass, which helps both
development and debugging.
3. since adding more passes is cheap, we can now report information for
more passes (e.g. the blit pass, and the osd pass). Note: we also
switch to nanosecond scale, to be able to measure these passes
better.
4. `--user-shaders` authors can now describe their own passes, helping
users both identify which user shaders are active at any given time
as well as helping shader authors identify performance issues.
5. the timing data per pass is now exported as a full list of samples,
so projects like Argon-/mpv-stats can immediately read out all of the
samples and render a graph without having to manually poll this
option constantly.
Due to gl_timer's design being complicated (directly reading performance
data would block, so we delay the actual read-back until the next _start
command), it's vital not to conflate different passes that might be
doing different things from one frame to another. To accomplish this,
the actual timers are stored as part of the gl_shader_cache's sc_entry,
which makes them unique for that exact shader.
Starting and stopping the time measurement is easy to unify with the
gl_sc architecture, because the existing API already relies on a
"generate, render, reset" flow, so we can just put timer_start and
timer_stop in sc_generate and sc_reset, respectively.
The ugliest thing about this code is that due to the need to keep pass
information relatively stable in between frames, we need to distinguish
between "new" and "redrawn" frames, which bloats the code somewhat and
also feels hacky and vo_opengl-specific. (But then again, this entire
thing is vo_opengl-specific)
2017-06-29 15:00:06 +00:00
|
|
|
|
2015-03-27 12:27:40 +00:00
|
|
|
int frames_uploaded;
|
2013-03-01 20:19:20 +00:00
|
|
|
int frames_rendered;
|
2015-03-27 12:27:40 +00:00
|
|
|
AVLFG lfg;
|
2013-03-01 20:19:20 +00:00
|
|
|
|
2013-05-25 23:48:39 +00:00
|
|
|
// Cached because computing it can take relatively long
|
|
|
|
int last_dither_matrix_size;
|
|
|
|
float *last_dither_matrix;
|
|
|
|
|
2016-05-16 10:46:29 +00:00
|
|
|
struct cached_file *files;
|
2015-09-23 20:13:03 +00:00
|
|
|
int num_files;
|
|
|
|
|
vo_gpu: make it possible to load multiple hwdec interop drivers
Make the VO<->decoder interface capable of supporting multiple hwdec
APIs at once. The main gain is that this simplifies autoprobing a lot.
Before this change, it could happen that the VO loaded the "wrong" hwdec
API, and the decoder was stuck with the choice (breaking hw decoding).
With the change applied, the VO simply loads all available APIs, so
autoprobing trickery is left entirely to the decoder.
In the past, we were quite careful about not accidentally loading the
wrong interop drivers. This was in part to make sure autoprobing works,
but also because libva had this obnoxious bug of dumping garbage to
stderr when using the API. libva was fixed, so this is not a problem
anymore.
The --opengl-hwdec-interop option is changed in various ways (again...),
and renamed to --gpu-hwdec-interop. It does not have much use anymore,
other than debugging. It's notable that the order in the hwdec interop
array ra_hwdec_drivers[] still matters if multiple drivers support the
same image formats, so the option can explicitly force one, if that
should ever be necessary, or more likely, for debugging. One example are
the ra_hwdec_d3d11egl and ra_hwdec_d3d11eglrgb drivers, which both
support d3d11 input.
vo_gpu now always loads the interop lazily by default, but when it does,
it loads them all. vo_opengl_cb now always loads them when the GL
context handle is initialized. I don't expect that this causes any
problems.
It's now possible to do things like changing between vdpau and nvdec
decoding at runtime.
This is also preparation for cleaning up vd_lavc.c hwdec autoprobing.
It's another reason why hwdec_devices_request_all() does not take a
hwdec type anymore.
2017-12-01 04:05:00 +00:00
|
|
|
bool hwdec_interop_loading_done;
|
|
|
|
struct ra_hwdec **hwdecs;
|
|
|
|
int num_hwdecs;
|
|
|
|
|
vo_opengl: separate hwdec context and mapping, port it to use ra
This does two separate rather intrusive things:
1. Make the hwdec context (which does initialization, provides the
device to the decoder, and other basic state) and frame mapping
(getting textures from a mp_image) separate. This is more
flexible, and you could map multiple images at once. It will
help removing some hwdec special-casing from video.c.
2. Switch all hwdec API use to ra. Of course all code is still
GL specific, but in theory it would be possible to support other
backends. The most important change is that the hwdec interop
returns ra objects, instead of anything GL specific. This removes
the last dependency on GL-specific header files from video.c.
I'm mixing these separate changes because both requires essentially
rewriting all the glue code, so better do them at once. For the same
reason, this change isn't done incrementally.
hwdec_ios.m is untested, since I can't test it. Apart from superficial
mistakes, this also requires dealing with Apple's texture format
fuckups: they force you to use GL_LUMINANCE[_ALPHA] instead of GL_RED
and GL_RG. We also need to report the correct format via ra_tex to
the renderer, which is done by find_la_variant(). It's unknown whether
this works correctly.
hwdec_rpi.c as well as vo_rpi.c are still broken. (I need to pull my
RPI out of a dusty pile of devices and cables, so, later.)
2017-08-10 15:48:33 +00:00
|
|
|
struct ra_hwdec_mapper *hwdec_mapper;
|
vo_gpu: make it possible to load multiple hwdec interop drivers
Make the VO<->decoder interface capable of supporting multiple hwdec
APIs at once. The main gain is that this simplifies autoprobing a lot.
Before this change, it could happen that the VO loaded the "wrong" hwdec
API, and the decoder was stuck with the choice (breaking hw decoding).
With the change applied, the VO simply loads all available APIs, so
autoprobing trickery is left entirely to the decoder.
In the past, we were quite careful about not accidentally loading the
wrong interop drivers. This was in part to make sure autoprobing works,
but also because libva had this obnoxious bug of dumping garbage to
stderr when using the API. libva was fixed, so this is not a problem
anymore.
The --opengl-hwdec-interop option is changed in various ways (again...),
and renamed to --gpu-hwdec-interop. It does not have much use anymore,
other than debugging. It's notable that the order in the hwdec interop
array ra_hwdec_drivers[] still matters if multiple drivers support the
same image formats, so the option can explicitly force one, if that
should ever be necessary, or more likely, for debugging. One example are
the ra_hwdec_d3d11egl and ra_hwdec_d3d11eglrgb drivers, which both
support d3d11 input.
vo_gpu now always loads the interop lazily by default, but when it does,
it loads them all. vo_opengl_cb now always loads them when the GL
context handle is initialized. I don't expect that this causes any
problems.
It's now possible to do things like changing between vdpau and nvdec
decoding at runtime.
This is also preparation for cleaning up vd_lavc.c hwdec autoprobing.
It's another reason why hwdec_devices_request_all() does not take a
hwdec type anymore.
2017-12-01 04:05:00 +00:00
|
|
|
struct ra_hwdec *hwdec_overlay;
|
2013-11-03 23:00:18 +00:00
|
|
|
bool hwdec_active;
|
2015-11-28 18:59:11 +00:00
|
|
|
|
|
|
|
bool dsi_warned;
|
2016-05-19 10:18:48 +00:00
|
|
|
bool broken_frame; // temporary error state
|
2013-03-01 20:19:20 +00:00
|
|
|
};
|
|
|
|
|
2016-09-02 13:59:40 +00:00
|
|
|
static const struct gl_video_opts gl_video_opts_def = {
|
2016-05-15 22:14:02 +00:00
|
|
|
.dither_algo = DITHER_FRUIT,
|
2013-03-01 20:19:20 +00:00
|
|
|
.dither_depth = -1,
|
2013-05-25 23:48:39 +00:00
|
|
|
.dither_size = 6,
|
2015-07-20 17:09:22 +00:00
|
|
|
.temporal_dither_period = 1,
|
2017-08-04 11:48:37 +00:00
|
|
|
.fbo_format = "auto",
|
2015-01-06 09:47:26 +00:00
|
|
|
.sigmoid_center = 0.75,
|
|
|
|
.sigmoid_slope = 6.5,
|
vo_opengl: refactor scaler configuration
This merges all of the scaler-related options into a single
configuration struct, and also cleans up the way they're passed through
the code. (For example, the scaler index is no longer threaded through
pass_sample, just the scaler configuration itself, and there's no longer
duplication of the params etc.)
In addition, this commit makes scale-down more principled, and turns it
into a scaler in its own right - so there's no longer an ugly separation
between scale and scale-down in the code.
Finally, the radius stuff has been made more proper - filters always
have a radius now (there's no more radius -1), and get a new .resizable
attribute instead for when it's tunable.
User-visible changes:
1. scale-down has been renamed dscale and now has its own set of config
options (dscale-param1, dscale-radius) etc., instead of reusing
scale-param1 (which was arguably a bug).
2. The default radius is no longer fixed at 3, but instead uses that
filter's preferred radius by default. (Scalers with a default radius
other than 3 include sinc, gaussian, box and triangle)
3. scale-radius etc. now goes down to 0.5, rather than 1.0. 0.5 is the
smallest radius that theoretically makes sense, and indeed it's used
by at least one filter (nearest).
Apart from that, it should just be internal changes only.
Note that this sets up for the refactor discussed in #1720, which would
be to merge scaler and window configurations (include parameters etc.)
into a single, simplified string. In the code, this would now basically
just mean getting rid of all the OPT_FLOATRANGE etc. lines related to
scalers and replacing them by a single function that parses a string and
updates the struct scaler_config as appropriate.
2015-03-26 00:55:32 +00:00
|
|
|
.scaler = {
|
2017-07-03 09:23:48 +00:00
|
|
|
{{"bilinear", .params={NAN, NAN}}, {.params = {NAN, NAN}},
|
|
|
|
.cutoff = 0.001}, // scale
|
|
|
|
{{NULL, .params={NAN, NAN}}, {.params = {NAN, NAN}},
|
|
|
|
.cutoff = 0.001}, // dscale
|
|
|
|
{{"bilinear", .params={NAN, NAN}}, {.params = {NAN, NAN}},
|
|
|
|
.cutoff = 0.001}, // cscale
|
|
|
|
{{"mitchell", .params={NAN, NAN}}, {.params = {NAN, NAN}},
|
2015-11-29 12:04:01 +00:00
|
|
|
.clamp = 1, }, // tscale
|
vo_opengl: refactor scaler configuration
This merges all of the scaler-related options into a single
configuration struct, and also cleans up the way they're passed through
the code. (For example, the scaler index is no longer threaded through
pass_sample, just the scaler configuration itself, and there's no longer
duplication of the params etc.)
In addition, this commit makes scale-down more principled, and turns it
into a scaler in its own right - so there's no longer an ugly separation
between scale and scale-down in the code.
Finally, the radius stuff has been made more proper - filters always
have a radius now (there's no more radius -1), and get a new .resizable
attribute instead for when it's tunable.
User-visible changes:
1. scale-down has been renamed dscale and now has its own set of config
options (dscale-param1, dscale-radius) etc., instead of reusing
scale-param1 (which was arguably a bug).
2. The default radius is no longer fixed at 3, but instead uses that
filter's preferred radius by default. (Scalers with a default radius
other than 3 include sinc, gaussian, box and triangle)
3. scale-radius etc. now goes down to 0.5, rather than 1.0. 0.5 is the
smallest radius that theoretically makes sense, and indeed it's used
by at least one filter (nearest).
Apart from that, it should just be internal changes only.
Note that this sets up for the refactor discussed in #1720, which would
be to merge scaler and window configurations (include parameters etc.)
into a single, simplified string. In the code, this would now basically
just mean getting rid of all the OPT_FLOATRANGE etc. lines related to
scalers and replacing them by a single function that parses a string and
updates the struct scaler_config as appropriate.
2015-03-26 00:55:32 +00:00
|
|
|
},
|
2016-01-25 20:35:39 +00:00
|
|
|
.scaler_resizes_only = 1,
|
2015-12-06 16:22:41 +00:00
|
|
|
.scaler_lut_size = 6,
|
2016-01-27 20:07:17 +00:00
|
|
|
.interpolation_threshold = 0.0001,
|
2016-05-15 22:14:02 +00:00
|
|
|
.alpha_mode = ALPHA_BLEND_TILES,
|
2014-12-09 20:34:01 +00:00
|
|
|
.background = {0, 0, 0, 255},
|
2015-02-03 16:12:04 +00:00
|
|
|
.gamma = 1.0f,
|
2018-02-03 13:45:01 +00:00
|
|
|
.tone_mapping = TONE_MAPPING_HABLE,
|
2016-05-16 00:44:30 +00:00
|
|
|
.tone_mapping_param = NAN,
|
2018-02-03 13:45:01 +00:00
|
|
|
.tone_mapping_desat = 0.5,
|
2016-10-21 15:23:26 +00:00
|
|
|
.early_flush = -1,
|
2017-12-10 21:54:32 +00:00
|
|
|
.hwdec_interop = "auto",
|
2013-03-01 20:19:20 +00:00
|
|
|
};
|
|
|
|
|
2013-12-21 19:03:36 +00:00
|
|
|
static int validate_scaler_opt(struct mp_log *log, const m_option_t *opt,
|
|
|
|
struct bstr name, struct bstr param);
|
2013-03-01 20:19:20 +00:00
|
|
|
|
vo_opengl: separate kernel and window
This makes the core much more elegant, reusable, reconfigurable and also
allows us to more easily add aliases for specific configurations.
Furthermore, this lets us apply a generic blur factor / window function
to arbitrary filters, so we can finally "mix and match" in order to
fine-tune windowing functions.
A few notes are in order:
1. The current system for configuring scalers is ugly and rapidly
getting unwieldy. I modified the man page to make it a bit more
bearable, but long-term we have to do something about it; especially
since..
2. There's currently no way to affect the blur factor or parameters of
the window functions themselves. For example, I can't actually
fine-tune the kaiser window's param1, since there's simply no way to
do so in the current API - even though filter_kernels.c supports it
just fine!
3. This removes some lesser used filters (especially those which are
purely window functions to begin with). If anybody asks, you can get
eg. the old behavior of scale=hanning by using
scale=box:scale-window=hanning:scale-radius=1 (and yes, the result is
just as terrible as that sounds - which is why nobody should have
been using them in the first place).
4. This changes the semantics of the "triangle" scaler slightly - it now
has an arbitrary radius. This can possibly produce weird results for
people who were previously using scale-down=triangle, especially if
in combination with scale-radius (for the usual upscaling). The
correct fix for this is to use scale-down=bilinear_slow instead,
which is an alias for triangle at radius 1.
In regards to the last point, in future I want to make it so that
filters have a filter-specific "preferred radius" (for the ones that
are arbitrarily tunable), once the configuration system for filters has
been redesigned (in particular in a way that will let us separate scale
and scale-down cleanly). That way, "triangle" can simply have the
preferred radius of 1 by default, while still being tunable. (Rather
than the default radius being hard-coded to 3 always)
2015-03-25 03:40:28 +00:00
|
|
|
static int validate_window_opt(struct mp_log *log, const m_option_t *opt,
|
|
|
|
struct bstr name, struct bstr param);
|
|
|
|
|
2013-03-01 20:19:20 +00:00
|
|
|
#define OPT_BASE_STRUCT struct gl_video_opts
|
2015-08-29 01:24:15 +00:00
|
|
|
|
|
|
|
#define SCALER_OPTS(n, i) \
|
|
|
|
OPT_STRING_VALIDATE(n, scaler[i].kernel.name, 0, validate_scaler_opt), \
|
|
|
|
OPT_FLOAT(n"-param1", scaler[i].kernel.params[0], 0), \
|
|
|
|
OPT_FLOAT(n"-param2", scaler[i].kernel.params[1], 0), \
|
|
|
|
OPT_FLOAT(n"-blur", scaler[i].kernel.blur, 0), \
|
2017-07-03 09:23:48 +00:00
|
|
|
OPT_FLOATRANGE(n"-cutoff", scaler[i].cutoff, 0, 0.0, 1.0), \
|
2016-10-26 14:32:57 +00:00
|
|
|
OPT_FLOATRANGE(n"-taper", scaler[i].kernel.taper, 0, 0.0, 1.0), \
|
2015-08-29 01:24:15 +00:00
|
|
|
OPT_FLOAT(n"-wparam", scaler[i].window.params[0], 0), \
|
2016-10-26 14:32:57 +00:00
|
|
|
OPT_FLOAT(n"-wblur", scaler[i].window.blur, 0), \
|
|
|
|
OPT_FLOATRANGE(n"-wtaper", scaler[i].window.taper, 0, 0.0, 1.0), \
|
2017-07-12 17:08:58 +00:00
|
|
|
OPT_FLOATRANGE(n"-clamp", scaler[i].clamp, 0, 0.0, 1.0), \
|
2015-08-29 01:24:15 +00:00
|
|
|
OPT_FLOATRANGE(n"-radius", scaler[i].radius, 0, 0.5, 16.0), \
|
|
|
|
OPT_FLOATRANGE(n"-antiring", scaler[i].antiring, 0, 0.0, 1.0), \
|
|
|
|
OPT_STRING_VALIDATE(n"-window", scaler[i].window.name, 0, validate_window_opt)
|
|
|
|
|
2013-03-01 20:19:20 +00:00
|
|
|
const struct m_sub_options gl_video_conf = {
|
2014-06-10 21:56:05 +00:00
|
|
|
.opts = (const m_option_t[]) {
|
vo_opengl: refactor into vo_gpu
This is done in several steps:
1. refactor MPGLContext -> struct ra_ctx
2. move GL-specific stuff in vo_opengl into opengl/context.c
3. generalize context creation to support other APIs, and add --gpu-api
4. rename all of the --opengl- options that are no longer opengl-specific
5. move all of the stuff from opengl/* that isn't GL-specific into gpu/
(note: opengl/gl_utils.h became opengl/utils.h)
6. rename vo_opengl to vo_gpu
7. to handle window screenshots, the short-term approach was to just add
it to ra_swchain_fns. Long term (and for vulkan) this has to be moved to
ra itself (and vo_gpu altered to compensate), but this was a stop-gap
measure to prevent this commit from getting too big
8. move ra->fns->flush to ra_gl_ctx instead
9. some other minor changes that I've probably already forgotten
Note: This is one half of a major refactor, the other half of which is
provided by rossy's following commit. This commit enables support for
all linux platforms, while his version enables support for all non-linux
platforms.
Note 2: vo_opengl_cb.c also re-uses ra_gl_ctx so it benefits from the
--opengl- options like --opengl-early-flush, --opengl-finish etc. Should
be a strict superset of the old functionality.
Disclaimer: Since I have no way of compiling mpv on all platforms, some
of these ports were done blindly. Specifically, the blind ports included
context_mali_fbdev.c and context_rpi.c. Since they're both based on
egl_helpers, the port should have gone smoothly without any major
changes required. But if somebody complains about a compile error on
those platforms (assuming anybody actually uses them), you know where to
complain.
2017-09-14 06:04:55 +00:00
|
|
|
OPT_CHOICE("gpu-dumb-mode", dumb_mode, 0,
|
2017-07-07 12:46:46 +00:00
|
|
|
({"auto", 0}, {"yes", 1}, {"no", -1})),
|
vo_opengl: refactor into vo_gpu
This is done in several steps:
1. refactor MPGLContext -> struct ra_ctx
2. move GL-specific stuff in vo_opengl into opengl/context.c
3. generalize context creation to support other APIs, and add --gpu-api
4. rename all of the --opengl- options that are no longer opengl-specific
5. move all of the stuff from opengl/* that isn't GL-specific into gpu/
(note: opengl/gl_utils.h became opengl/utils.h)
6. rename vo_opengl to vo_gpu
7. to handle window screenshots, the short-term approach was to just add
it to ra_swchain_fns. Long term (and for vulkan) this has to be moved to
ra itself (and vo_gpu altered to compensate), but this was a stop-gap
measure to prevent this commit from getting too big
8. move ra->fns->flush to ra_gl_ctx instead
9. some other minor changes that I've probably already forgotten
Note: This is one half of a major refactor, the other half of which is
provided by rossy's following commit. This commit enables support for
all linux platforms, while his version enables support for all non-linux
platforms.
Note 2: vo_opengl_cb.c also re-uses ra_gl_ctx so it benefits from the
--opengl- options like --opengl-early-flush, --opengl-finish etc. Should
be a strict superset of the old functionality.
Disclaimer: Since I have no way of compiling mpv on all platforms, some
of these ports were done blindly. Specifically, the blind ports included
context_mali_fbdev.c and context_rpi.c. Since they're both based on
egl_helpers, the port should have gone smoothly without any major
changes required. But if somebody complains about a compile error on
those platforms (assuming anybody actually uses them), you know where to
complain.
2017-09-14 06:04:55 +00:00
|
|
|
OPT_FLOATRANGE("gamma-factor", gamma, 0, 0.1, 2.0),
|
2015-02-07 12:54:18 +00:00
|
|
|
OPT_FLAG("gamma-auto", gamma_auto, 0),
|
2015-03-31 05:31:35 +00:00
|
|
|
OPT_CHOICE_C("target-prim", target_prim, 0, mp_csp_prim_names),
|
|
|
|
OPT_CHOICE_C("target-trc", target_trc, 0, mp_csp_trc_names),
|
2018-02-14 15:10:51 +00:00
|
|
|
OPT_INTRANGE("target-peak", target_peak, 0, 10, 10000),
|
2017-08-03 10:46:57 +00:00
|
|
|
OPT_CHOICE("tone-mapping", tone_mapping, 0,
|
2016-05-30 10:30:23 +00:00
|
|
|
({"clip", TONE_MAPPING_CLIP},
|
2017-06-09 07:16:06 +00:00
|
|
|
{"mobius", TONE_MAPPING_MOBIUS},
|
2016-05-30 10:30:23 +00:00
|
|
|
{"reinhard", TONE_MAPPING_REINHARD},
|
2016-05-30 10:48:01 +00:00
|
|
|
{"hable", TONE_MAPPING_HABLE},
|
2016-05-30 10:30:23 +00:00
|
|
|
{"gamma", TONE_MAPPING_GAMMA},
|
|
|
|
{"linear", TONE_MAPPING_LINEAR})),
|
2018-02-03 13:45:01 +00:00
|
|
|
OPT_CHOICE("hdr-compute-peak", compute_hdr_peak, 0,
|
|
|
|
({"auto", 0},
|
|
|
|
{"yes", 1},
|
|
|
|
{"no", -1})),
|
2016-05-16 00:44:30 +00:00
|
|
|
OPT_FLOAT("tone-mapping-param", tone_mapping_param, 0),
|
2017-07-06 03:43:00 +00:00
|
|
|
OPT_FLOAT("tone-mapping-desaturate", tone_mapping_desat, 0),
|
2017-09-10 16:18:31 +00:00
|
|
|
OPT_FLAG("gamut-warning", gamut_warning, 0),
|
2016-09-02 13:59:40 +00:00
|
|
|
OPT_FLAG("opengl-pbo", pbo, 0),
|
2016-03-05 08:42:57 +00:00
|
|
|
SCALER_OPTS("scale", SCALER_SCALE),
|
|
|
|
SCALER_OPTS("dscale", SCALER_DSCALE),
|
|
|
|
SCALER_OPTS("cscale", SCALER_CSCALE),
|
|
|
|
SCALER_OPTS("tscale", SCALER_TSCALE),
|
2015-12-05 19:14:23 +00:00
|
|
|
OPT_INTRANGE("scaler-lut-size", scaler_lut_size, 0, 4, 10),
|
2013-05-25 21:47:55 +00:00
|
|
|
OPT_FLAG("scaler-resizes-only", scaler_resizes_only, 0),
|
2015-02-06 02:37:21 +00:00
|
|
|
OPT_FLAG("linear-scaling", linear_scaling, 0),
|
2015-11-07 16:49:14 +00:00
|
|
|
OPT_FLAG("correct-downscaling", correct_downscaling, 0),
|
2015-01-06 09:47:26 +00:00
|
|
|
OPT_FLAG("sigmoid-upscaling", sigmoid_upscaling, 0),
|
|
|
|
OPT_FLOATRANGE("sigmoid-center", sigmoid_center, 0, 0.0, 1.0),
|
|
|
|
OPT_FLOATRANGE("sigmoid-slope", sigmoid_slope, 0, 1.0, 20.0),
|
vo_opengl: refactor into vo_gpu
This is done in several steps:
1. refactor MPGLContext -> struct ra_ctx
2. move GL-specific stuff in vo_opengl into opengl/context.c
3. generalize context creation to support other APIs, and add --gpu-api
4. rename all of the --opengl- options that are no longer opengl-specific
5. move all of the stuff from opengl/* that isn't GL-specific into gpu/
(note: opengl/gl_utils.h became opengl/utils.h)
6. rename vo_opengl to vo_gpu
7. to handle window screenshots, the short-term approach was to just add
it to ra_swchain_fns. Long term (and for vulkan) this has to be moved to
ra itself (and vo_gpu altered to compensate), but this was a stop-gap
measure to prevent this commit from getting too big
8. move ra->fns->flush to ra_gl_ctx instead
9. some other minor changes that I've probably already forgotten
Note: This is one half of a major refactor, the other half of which is
provided by rossy's following commit. This commit enables support for
all linux platforms, while his version enables support for all non-linux
platforms.
Note 2: vo_opengl_cb.c also re-uses ra_gl_ctx so it benefits from the
--opengl- options like --opengl-early-flush, --opengl-finish etc. Should
be a strict superset of the old functionality.
Disclaimer: Since I have no way of compiling mpv on all platforms, some
of these ports were done blindly. Specifically, the blind ports included
context_mali_fbdev.c and context_rpi.c. Since they're both based on
egl_helpers, the port should have gone smoothly without any major
changes required. But if somebody complains about a compile error on
those platforms (assuming anybody actually uses them), you know where to
complain.
2017-09-14 06:04:55 +00:00
|
|
|
OPT_STRING("fbo-format", fbo_format, 0),
|
2013-03-28 20:39:17 +00:00
|
|
|
OPT_CHOICE_OR_INT("dither-depth", dither_depth, 0, -1, 16,
|
|
|
|
({"no", -1}, {"auto", 0})),
|
2013-05-25 23:48:39 +00:00
|
|
|
OPT_CHOICE("dither", dither_algo, 0,
|
2016-05-15 22:14:02 +00:00
|
|
|
({"fruit", DITHER_FRUIT},
|
|
|
|
{"ordered", DITHER_ORDERED},
|
|
|
|
{"no", DITHER_NONE})),
|
2013-05-25 23:48:39 +00:00
|
|
|
OPT_INTRANGE("dither-size-fruit", dither_size, 0, 2, 8),
|
|
|
|
OPT_FLAG("temporal-dither", temporal_dither, 0),
|
2015-07-20 17:09:22 +00:00
|
|
|
OPT_INTRANGE("temporal-dither-period", temporal_dither_period, 0, 1, 128),
|
2015-02-27 17:31:24 +00:00
|
|
|
OPT_CHOICE("alpha", alpha_mode, 0,
|
2016-05-15 22:14:02 +00:00
|
|
|
({"no", ALPHA_NO},
|
|
|
|
{"yes", ALPHA_YES},
|
|
|
|
{"blend", ALPHA_BLEND},
|
|
|
|
{"blend-tiles", ALPHA_BLEND_TILES})),
|
2016-09-02 13:59:40 +00:00
|
|
|
OPT_FLAG("opengl-rectangle-textures", use_rectangle, 0),
|
2014-12-09 20:34:01 +00:00
|
|
|
OPT_COLOR("background", background, 0),
|
2015-03-13 18:30:31 +00:00
|
|
|
OPT_FLAG("interpolation", interpolation, 0),
|
2016-01-27 20:07:17 +00:00
|
|
|
OPT_FLOAT("interpolation-threshold", interpolation_threshold, 0),
|
2015-04-11 17:16:34 +00:00
|
|
|
OPT_CHOICE("blend-subtitles", blend_subs, 0,
|
2016-05-15 22:14:02 +00:00
|
|
|
({"no", BLEND_SUBS_NO},
|
|
|
|
{"yes", BLEND_SUBS_YES},
|
|
|
|
{"video", BLEND_SUBS_VIDEO})),
|
vo_opengl: refactor into vo_gpu
This is done in several steps:
1. refactor MPGLContext -> struct ra_ctx
2. move GL-specific stuff in vo_opengl into opengl/context.c
3. generalize context creation to support other APIs, and add --gpu-api
4. rename all of the --opengl- options that are no longer opengl-specific
5. move all of the stuff from opengl/* that isn't GL-specific into gpu/
(note: opengl/gl_utils.h became opengl/utils.h)
6. rename vo_opengl to vo_gpu
7. to handle window screenshots, the short-term approach was to just add
it to ra_swchain_fns. Long term (and for vulkan) this has to be moved to
ra itself (and vo_gpu altered to compensate), but this was a stop-gap
measure to prevent this commit from getting too big
8. move ra->fns->flush to ra_gl_ctx instead
9. some other minor changes that I've probably already forgotten
Note: This is one half of a major refactor, the other half of which is
provided by rossy's following commit. This commit enables support for
all linux platforms, while his version enables support for all non-linux
platforms.
Note 2: vo_opengl_cb.c also re-uses ra_gl_ctx so it benefits from the
--opengl- options like --opengl-early-flush, --opengl-finish etc. Should
be a strict superset of the old functionality.
Disclaimer: Since I have no way of compiling mpv on all platforms, some
of these ports were done blindly. Specifically, the blind ports included
context_mali_fbdev.c and context_rpi.c. Since they're both based on
egl_helpers, the port should have gone smoothly without any major
changes required. But if somebody complains about a compile error on
those platforms (assuming anybody actually uses them), you know where to
complain.
2017-09-14 06:04:55 +00:00
|
|
|
OPT_PATHLIST("glsl-shaders", user_shaders, 0),
|
|
|
|
OPT_CLI_ALIAS("glsl-shader", "glsl-shaders-append"),
|
2015-09-05 15:39:27 +00:00
|
|
|
OPT_FLAG("deband", deband, 0),
|
|
|
|
OPT_SUBSTRUCT("deband", deband_opts, deband_conf, 0),
|
2015-09-23 20:43:27 +00:00
|
|
|
OPT_FLOAT("sharpen", unsharp, 0),
|
vo_opengl: refactor into vo_gpu
This is done in several steps:
1. refactor MPGLContext -> struct ra_ctx
2. move GL-specific stuff in vo_opengl into opengl/context.c
3. generalize context creation to support other APIs, and add --gpu-api
4. rename all of the --opengl- options that are no longer opengl-specific
5. move all of the stuff from opengl/* that isn't GL-specific into gpu/
(note: opengl/gl_utils.h became opengl/utils.h)
6. rename vo_opengl to vo_gpu
7. to handle window screenshots, the short-term approach was to just add
it to ra_swchain_fns. Long term (and for vulkan) this has to be moved to
ra itself (and vo_gpu altered to compensate), but this was a stop-gap
measure to prevent this commit from getting too big
8. move ra->fns->flush to ra_gl_ctx instead
9. some other minor changes that I've probably already forgotten
Note: This is one half of a major refactor, the other half of which is
provided by rossy's following commit. This commit enables support for
all linux platforms, while his version enables support for all non-linux
platforms.
Note 2: vo_opengl_cb.c also re-uses ra_gl_ctx so it benefits from the
--opengl- options like --opengl-early-flush, --opengl-finish etc. Should
be a strict superset of the old functionality.
Disclaimer: Since I have no way of compiling mpv on all platforms, some
of these ports were done blindly. Specifically, the blind ports included
context_mali_fbdev.c and context_rpi.c. Since they're both based on
egl_helpers, the port should have gone smoothly without any major
changes required. But if somebody complains about a compile error on
those platforms (assuming anybody actually uses them), you know where to
complain.
2017-09-14 06:04:55 +00:00
|
|
|
OPT_INTRANGE("gpu-tex-pad-x", tex_pad_x, 0, 0, 4096),
|
|
|
|
OPT_INTRANGE("gpu-tex-pad-y", tex_pad_y, 0, 0, 4096),
|
2016-06-03 18:35:17 +00:00
|
|
|
OPT_SUBSTRUCT("", icc_opts, mp_icc_conf, 0),
|
vo_opengl: refactor into vo_gpu
This is done in several steps:
1. refactor MPGLContext -> struct ra_ctx
2. move GL-specific stuff in vo_opengl into opengl/context.c
3. generalize context creation to support other APIs, and add --gpu-api
4. rename all of the --opengl- options that are no longer opengl-specific
5. move all of the stuff from opengl/* that isn't GL-specific into gpu/
(note: opengl/gl_utils.h became opengl/utils.h)
6. rename vo_opengl to vo_gpu
7. to handle window screenshots, the short-term approach was to just add
it to ra_swchain_fns. Long term (and for vulkan) this has to be moved to
ra itself (and vo_gpu altered to compensate), but this was a stop-gap
measure to prevent this commit from getting too big
8. move ra->fns->flush to ra_gl_ctx instead
9. some other minor changes that I've probably already forgotten
Note: This is one half of a major refactor, the other half of which is
provided by rossy's following commit. This commit enables support for
all linux platforms, while his version enables support for all non-linux
platforms.
Note 2: vo_opengl_cb.c also re-uses ra_gl_ctx so it benefits from the
--opengl- options like --opengl-early-flush, --opengl-finish etc. Should
be a strict superset of the old functionality.
Disclaimer: Since I have no way of compiling mpv on all platforms, some
of these ports were done blindly. Specifically, the blind ports included
context_mali_fbdev.c and context_rpi.c. Since they're both based on
egl_helpers, the port should have gone smoothly without any major
changes required. But if somebody complains about a compile error on
those platforms (assuming anybody actually uses them), you know where to
complain.
2017-09-14 06:04:55 +00:00
|
|
|
OPT_STRING("gpu-shader-cache-dir", shader_cache_dir, 0),
|
vo_gpu: make it possible to load multiple hwdec interop drivers
Make the VO<->decoder interface capable of supporting multiple hwdec
APIs at once. The main gain is that this simplifies autoprobing a lot.
Before this change, it could happen that the VO loaded the "wrong" hwdec
API, and the decoder was stuck with the choice (breaking hw decoding).
With the change applied, the VO simply loads all available APIs, so
autoprobing trickery is left entirely to the decoder.
In the past, we were quite careful about not accidentally loading the
wrong interop drivers. This was in part to make sure autoprobing works,
but also because libva had this obnoxious bug of dumping garbage to
stderr when using the API. libva was fixed, so this is not a problem
anymore.
The --opengl-hwdec-interop option is changed in various ways (again...),
and renamed to --gpu-hwdec-interop. It does not have much use anymore,
other than debugging. It's notable that the order in the hwdec interop
array ra_hwdec_drivers[] still matters if multiple drivers support the
same image formats, so the option can explicitly force one, if that
should ever be necessary, or more likely, for debugging. One example are
the ra_hwdec_d3d11egl and ra_hwdec_d3d11eglrgb drivers, which both
support d3d11 input.
vo_gpu now always loads the interop lazily by default, but when it does,
it loads them all. vo_opengl_cb now always loads them when the GL
context handle is initialized. I don't expect that this causes any
problems.
It's now possible to do things like changing between vdpau and nvdec
decoding at runtime.
This is also preparation for cleaning up vd_lavc.c hwdec autoprobing.
It's another reason why hwdec_devices_request_all() does not take a
hwdec type anymore.
2017-12-01 04:05:00 +00:00
|
|
|
OPT_STRING_VALIDATE("gpu-hwdec-interop", hwdec_interop, 0,
|
|
|
|
ra_hwdec_validate_opt),
|
|
|
|
OPT_REPLACED("opengl-hwdec-interop", "gpu-hwdec-interop"),
|
|
|
|
OPT_REPLACED("hwdec-preload", "opengl-hwdec-interop"),
|
2017-08-03 10:46:57 +00:00
|
|
|
OPT_REPLACED("hdr-tone-mapping", "tone-mapping"),
|
vo_opengl: refactor into vo_gpu
This is done in several steps:
1. refactor MPGLContext -> struct ra_ctx
2. move GL-specific stuff in vo_opengl into opengl/context.c
3. generalize context creation to support other APIs, and add --gpu-api
4. rename all of the --opengl- options that are no longer opengl-specific
5. move all of the stuff from opengl/* that isn't GL-specific into gpu/
(note: opengl/gl_utils.h became opengl/utils.h)
6. rename vo_opengl to vo_gpu
7. to handle window screenshots, the short-term approach was to just add
it to ra_swchain_fns. Long term (and for vulkan) this has to be moved to
ra itself (and vo_gpu altered to compensate), but this was a stop-gap
measure to prevent this commit from getting too big
8. move ra->fns->flush to ra_gl_ctx instead
9. some other minor changes that I've probably already forgotten
Note: This is one half of a major refactor, the other half of which is
provided by rossy's following commit. This commit enables support for
all linux platforms, while his version enables support for all non-linux
platforms.
Note 2: vo_opengl_cb.c also re-uses ra_gl_ctx so it benefits from the
--opengl- options like --opengl-early-flush, --opengl-finish etc. Should
be a strict superset of the old functionality.
Disclaimer: Since I have no way of compiling mpv on all platforms, some
of these ports were done blindly. Specifically, the blind ports included
context_mali_fbdev.c and context_rpi.c. Since they're both based on
egl_helpers, the port should have gone smoothly without any major
changes required. But if somebody complains about a compile error on
those platforms (assuming anybody actually uses them), you know where to
complain.
2017-09-14 06:04:55 +00:00
|
|
|
OPT_REPLACED("opengl-shaders", "glsl-shaders"),
|
2017-09-22 09:31:03 +00:00
|
|
|
OPT_REPLACED("opengl-shader", "glsl-shader"),
|
vo_opengl: refactor into vo_gpu
This is done in several steps:
1. refactor MPGLContext -> struct ra_ctx
2. move GL-specific stuff in vo_opengl into opengl/context.c
3. generalize context creation to support other APIs, and add --gpu-api
4. rename all of the --opengl- options that are no longer opengl-specific
5. move all of the stuff from opengl/* that isn't GL-specific into gpu/
(note: opengl/gl_utils.h became opengl/utils.h)
6. rename vo_opengl to vo_gpu
7. to handle window screenshots, the short-term approach was to just add
it to ra_swchain_fns. Long term (and for vulkan) this has to be moved to
ra itself (and vo_gpu altered to compensate), but this was a stop-gap
measure to prevent this commit from getting too big
8. move ra->fns->flush to ra_gl_ctx instead
9. some other minor changes that I've probably already forgotten
Note: This is one half of a major refactor, the other half of which is
provided by rossy's following commit. This commit enables support for
all linux platforms, while his version enables support for all non-linux
platforms.
Note 2: vo_opengl_cb.c also re-uses ra_gl_ctx so it benefits from the
--opengl- options like --opengl-early-flush, --opengl-finish etc. Should
be a strict superset of the old functionality.
Disclaimer: Since I have no way of compiling mpv on all platforms, some
of these ports were done blindly. Specifically, the blind ports included
context_mali_fbdev.c and context_rpi.c. Since they're both based on
egl_helpers, the port should have gone smoothly without any major
changes required. But if somebody complains about a compile error on
those platforms (assuming anybody actually uses them), you know where to
complain.
2017-09-14 06:04:55 +00:00
|
|
|
OPT_REPLACED("opengl-shader-cache-dir", "gpu-shader-cache-dir"),
|
|
|
|
OPT_REPLACED("opengl-tex-pad-x", "gpu-tex-pad-x"),
|
|
|
|
OPT_REPLACED("opengl-tex-pad-y", "gpu-tex-pad-y"),
|
|
|
|
OPT_REPLACED("opengl-fbo-format", "fbo-format"),
|
|
|
|
OPT_REPLACED("opengl-dumb-mode", "gpu-dumb-mode"),
|
2017-09-28 15:21:56 +00:00
|
|
|
OPT_REPLACED("opengl-gamma", "gamma-factor"),
|
2016-09-02 13:59:40 +00:00
|
|
|
{0}
|
|
|
|
},
|
|
|
|
.size = sizeof(struct gl_video_opts),
|
|
|
|
.defaults = &gl_video_opts_def,
|
|
|
|
};
|
|
|
|
|
2013-03-01 20:19:20 +00:00
|
|
|
static void uninit_rendering(struct gl_video *p);
|
vo_opengl: refactor scaler configuration
This merges all of the scaler-related options into a single
configuration struct, and also cleans up the way they're passed through
the code. (For example, the scaler index is no longer threaded through
pass_sample, just the scaler configuration itself, and there's no longer
duplication of the params etc.)
In addition, this commit makes scale-down more principled, and turns it
into a scaler in its own right - so there's no longer an ugly separation
between scale and scale-down in the code.
Finally, the radius stuff has been made more proper - filters always
have a radius now (there's no more radius -1), and get a new .resizable
attribute instead for when it's tunable.
User-visible changes:
1. scale-down has been renamed dscale and now has its own set of config
options (dscale-param1, dscale-radius) etc., instead of reusing
scale-param1 (which was arguably a bug).
2. The default radius is no longer fixed at 3, but instead uses that
filter's preferred radius by default. (Scalers with a default radius
other than 3 include sinc, gaussian, box and triangle)
3. scale-radius etc. now goes down to 0.5, rather than 1.0. 0.5 is the
smallest radius that theoretically makes sense, and indeed it's used
by at least one filter (nearest).
Apart from that, it should just be internal changes only.
Note that this sets up for the refactor discussed in #1720, which would
be to merge scaler and window configurations (include parameters etc.)
into a single, simplified string. In the code, this would now basically
just mean getting rid of all the OPT_FLOATRANGE etc. lines related to
scalers and replacing them by a single function that parses a string and
updates the struct scaler_config as appropriate.
2015-03-26 00:55:32 +00:00
|
|
|
static void uninit_scaler(struct gl_video *p, struct scaler *scaler);
|
vo_opengl: restore single pass optimization as separate code path
The single path optimization, rendering the video in one shader pass and
without FBO indirections, was removed soem commits ago. It didn't have a
place in this code, and caused considerable complexity and maintenance
issues.
On the other hand, it still has some worth, such as for use with
extremely crappy hardware (GLES only or OpenGL 2.1 without FBO
extension). Ideally, these use cases would be handled by a separate VO
(say, vo_gles). While cleaner, this would still cause code duplication
and other complexity.
The third option is making the single-pass optimization a completely
separate code path, with most vo_opengl features disabled. While this
does duplicate some functionality (such as "unpacking" the video data
from textures), it's also relatively unintrusive, and the high quality
code path doesn't need to take it into account at all. On another
positive node, this "dumb-mode" could be forced in other cases where
OpenGL 2.1 is not enough, and where we don't want to care about versions
this old.
2015-09-07 19:09:06 +00:00
|
|
|
static void check_gl_features(struct gl_video *p);
|
2017-06-29 15:09:11 +00:00
|
|
|
static bool pass_upload_image(struct gl_video *p, struct mp_image *mpi, uint64_t id);
|
2016-06-04 18:48:56 +00:00
|
|
|
static const char *handle_scaler_opt(const char *name, bool tscale);
|
2016-06-04 15:52:10 +00:00
|
|
|
static void reinit_from_options(struct gl_video *p);
|
2016-04-08 20:21:31 +00:00
|
|
|
static void get_scale_factors(struct gl_video *p, bool transpose_rot, double xy[2]);
|
2016-05-18 15:47:10 +00:00
|
|
|
static void gl_video_setup_hooks(struct gl_video *p);
|
2017-10-17 07:07:35 +00:00
|
|
|
static void gl_video_update_options(struct gl_video *p);
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
|
|
|
|
#define GLSL(x) gl_sc_add(p->sc, #x "\n");
|
|
|
|
#define GLSLF(...) gl_sc_addf(p->sc, __VA_ARGS__)
|
2016-02-27 22:56:33 +00:00
|
|
|
#define GLSLHF(...) gl_sc_haddf(p->sc, __VA_ARGS__)
|
2017-07-17 16:11:32 +00:00
|
|
|
#define PRELUDE(...) gl_sc_paddf(p->sc, __VA_ARGS__)
|
2013-03-01 20:19:20 +00:00
|
|
|
|
2016-04-20 23:33:13 +00:00
|
|
|
static struct bstr load_cached_file(struct gl_video *p, const char *path)
|
2015-09-23 20:13:03 +00:00
|
|
|
{
|
|
|
|
if (!path || !path[0])
|
2016-04-20 23:33:13 +00:00
|
|
|
return (struct bstr){0};
|
2015-09-23 20:13:03 +00:00
|
|
|
for (int n = 0; n < p->num_files; n++) {
|
|
|
|
if (strcmp(p->files[n].path, path) == 0)
|
|
|
|
return p->files[n].body;
|
|
|
|
}
|
|
|
|
// not found -> load it
|
2016-06-10 12:53:31 +00:00
|
|
|
struct bstr s = stream_read_file(path, p, p->global, 1024000); // 1024 kB
|
2015-09-23 20:13:03 +00:00
|
|
|
if (s.len) {
|
2016-05-16 10:46:29 +00:00
|
|
|
struct cached_file new = {
|
2015-09-23 20:13:03 +00:00
|
|
|
.path = talloc_strdup(p, path),
|
2016-04-20 23:33:13 +00:00
|
|
|
.body = s,
|
2015-09-23 20:13:03 +00:00
|
|
|
};
|
2016-05-16 10:46:29 +00:00
|
|
|
MP_TARRAY_APPEND(p, p->files, p->num_files, new);
|
|
|
|
return new.body;
|
2015-09-23 20:13:03 +00:00
|
|
|
}
|
2016-04-20 23:33:13 +00:00
|
|
|
return (struct bstr){0};
|
2015-09-23 20:13:03 +00:00
|
|
|
}
|
|
|
|
|
2013-03-01 20:19:20 +00:00
|
|
|
static void debug_check_gl(struct gl_video *p, const char *msg)
|
|
|
|
{
|
2017-08-07 17:14:18 +00:00
|
|
|
if (p->ra->fns->debug_marker)
|
|
|
|
p->ra->fns->debug_marker(p->ra, msg);
|
2013-03-01 20:19:20 +00:00
|
|
|
}
|
|
|
|
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
static void gl_video_reset_surfaces(struct gl_video *p)
|
|
|
|
{
|
2017-09-20 08:45:33 +00:00
|
|
|
for (int i = 0; i < SURFACES_MAX; i++) {
|
2016-11-01 12:06:48 +00:00
|
|
|
p->surfaces[i].id = 0;
|
2015-06-26 08:59:57 +00:00
|
|
|
p->surfaces[i].pts = MP_NOPTS_VALUE;
|
2016-11-01 12:06:48 +00:00
|
|
|
}
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
p->surface_idx = 0;
|
|
|
|
p->surface_now = 0;
|
2015-07-02 11:17:20 +00:00
|
|
|
p->frames_drawn = 0;
|
2017-09-20 08:45:33 +00:00
|
|
|
p->output_tex_valid = false;
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
}
|
|
|
|
|
2016-04-20 23:33:13 +00:00
|
|
|
static void gl_video_reset_hooks(struct gl_video *p)
|
|
|
|
{
|
2017-09-27 21:38:54 +00:00
|
|
|
for (int i = 0; i < p->num_tex_hooks; i++)
|
2017-07-10 20:52:39 +00:00
|
|
|
talloc_free(p->tex_hooks[i].priv);
|
2016-04-20 23:33:13 +00:00
|
|
|
|
2017-09-27 21:38:54 +00:00
|
|
|
for (int i = 0; i < p->num_user_textures; i++)
|
2017-07-30 09:38:52 +00:00
|
|
|
ra_tex_free(p->ra, &p->user_textures[i].tex);
|
2017-07-10 23:59:21 +00:00
|
|
|
|
2017-09-27 21:38:54 +00:00
|
|
|
p->num_tex_hooks = 0;
|
|
|
|
p->num_user_textures = 0;
|
2016-04-20 23:33:13 +00:00
|
|
|
}
|
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
static inline int surface_wrap(int id)
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
{
|
2017-09-20 08:45:33 +00:00
|
|
|
id = id % SURFACES_MAX;
|
|
|
|
return id < 0 ? id + SURFACES_MAX : id;
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
}
|
|
|
|
|
2016-05-18 15:47:10 +00:00
|
|
|
static void reinit_osd(struct gl_video *p)
|
2013-03-01 20:19:20 +00:00
|
|
|
{
|
2015-03-23 15:32:59 +00:00
|
|
|
mpgl_osd_destroy(p->osd);
|
|
|
|
p->osd = NULL;
|
2017-08-05 11:48:46 +00:00
|
|
|
if (p->osd_state)
|
2017-08-19 02:33:40 +00:00
|
|
|
p->osd = mpgl_osd_init(p->ra, p->log, p->osd_state);
|
2015-02-03 16:12:04 +00:00
|
|
|
}
|
|
|
|
|
2013-03-01 20:19:20 +00:00
|
|
|
static void uninit_rendering(struct gl_video *p)
|
|
|
|
{
|
2016-03-05 08:42:57 +00:00
|
|
|
for (int n = 0; n < SCALER_COUNT; n++)
|
vo_opengl: refactor scaler configuration
This merges all of the scaler-related options into a single
configuration struct, and also cleans up the way they're passed through
the code. (For example, the scaler index is no longer threaded through
pass_sample, just the scaler configuration itself, and there's no longer
duplication of the params etc.)
In addition, this commit makes scale-down more principled, and turns it
into a scaler in its own right - so there's no longer an ugly separation
between scale and scale-down in the code.
Finally, the radius stuff has been made more proper - filters always
have a radius now (there's no more radius -1), and get a new .resizable
attribute instead for when it's tunable.
User-visible changes:
1. scale-down has been renamed dscale and now has its own set of config
options (dscale-param1, dscale-radius) etc., instead of reusing
scale-param1 (which was arguably a bug).
2. The default radius is no longer fixed at 3, but instead uses that
filter's preferred radius by default. (Scalers with a default radius
other than 3 include sinc, gaussian, box and triangle)
3. scale-radius etc. now goes down to 0.5, rather than 1.0. 0.5 is the
smallest radius that theoretically makes sense, and indeed it's used
by at least one filter (nearest).
Apart from that, it should just be internal changes only.
Note that this sets up for the refactor discussed in #1720, which would
be to merge scaler and window configurations (include parameters etc.)
into a single, simplified string. In the code, this would now basically
just mean getting rid of all the OPT_FLOATRANGE etc. lines related to
scalers and replacing them by a single function that parses a string and
updates the struct scaler_config as appropriate.
2015-03-26 00:55:32 +00:00
|
|
|
uninit_scaler(p, &p->scaler[n]);
|
2013-03-01 20:19:20 +00:00
|
|
|
|
2017-07-29 18:14:48 +00:00
|
|
|
ra_tex_free(p->ra, &p->dither_texture);
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
for (int n = 0; n < 4; n++) {
|
2017-09-20 08:45:33 +00:00
|
|
|
ra_tex_free(p->ra, &p->merge_tex[n]);
|
|
|
|
ra_tex_free(p->ra, &p->scale_tex[n]);
|
|
|
|
ra_tex_free(p->ra, &p->integer_tex[n]);
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
}
|
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
ra_tex_free(p->ra, &p->indirect_tex);
|
|
|
|
ra_tex_free(p->ra, &p->blend_subs_tex);
|
|
|
|
ra_tex_free(p->ra, &p->screen_tex);
|
|
|
|
ra_tex_free(p->ra, &p->output_tex);
|
2015-03-27 12:27:40 +00:00
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
for (int n = 0; n < SURFACES_MAX; n++)
|
|
|
|
ra_tex_free(p->ra, &p->surfaces[n].tex);
|
2015-03-13 23:32:20 +00:00
|
|
|
|
2017-09-27 21:38:54 +00:00
|
|
|
for (int n = 0; n < p->num_hook_textures; n++)
|
|
|
|
ra_tex_free(p->ra, &p->hook_textures[n]);
|
2016-04-19 18:45:40 +00:00
|
|
|
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
gl_video_reset_surfaces(p);
|
2016-04-20 23:33:13 +00:00
|
|
|
gl_video_reset_hooks(p);
|
2016-05-12 09:27:00 +00:00
|
|
|
|
|
|
|
gl_sc_reset_error(p->sc);
|
2013-03-01 20:19:20 +00:00
|
|
|
}
|
|
|
|
|
2016-09-02 12:50:03 +00:00
|
|
|
bool gl_video_gamma_auto_enabled(struct gl_video *p)
|
|
|
|
{
|
|
|
|
return p->opts.gamma_auto;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct mp_colorspace gl_video_get_output_colorspace(struct gl_video *p)
|
|
|
|
{
|
|
|
|
return (struct mp_colorspace) {
|
|
|
|
.primaries = p->opts.target_prim,
|
|
|
|
.gamma = p->opts.target_trc,
|
2018-02-14 15:10:51 +00:00
|
|
|
.sig_peak = p->opts.target_peak / MP_REF_WHITE,
|
2016-09-02 12:50:03 +00:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2016-06-03 18:35:17 +00:00
|
|
|
// Warning: profile.start must point to a ta allocation, and the function
|
|
|
|
// takes over ownership.
|
|
|
|
void gl_video_set_icc_profile(struct gl_video *p, bstr icc_data)
|
2016-02-13 14:33:00 +00:00
|
|
|
{
|
2016-06-04 15:52:10 +00:00
|
|
|
if (gl_lcms_set_memory_profile(p->cms, icc_data))
|
|
|
|
reinit_from_options(p);
|
2016-02-13 14:33:00 +00:00
|
|
|
}
|
|
|
|
|
2016-06-03 18:35:17 +00:00
|
|
|
bool gl_video_icc_auto_enabled(struct gl_video *p)
|
|
|
|
{
|
|
|
|
return p->opts.icc_opts ? p->opts.icc_opts->profile_auto : false;
|
|
|
|
}
|
|
|
|
|
2016-02-13 14:33:00 +00:00
|
|
|
static bool gl_video_get_lut3d(struct gl_video *p, enum mp_csp_prim prim,
|
|
|
|
enum mp_csp_trc trc)
|
2013-03-01 20:19:20 +00:00
|
|
|
{
|
2016-06-04 11:44:46 +00:00
|
|
|
if (!p->use_lut_3d)
|
2016-02-13 14:33:00 +00:00
|
|
|
return false;
|
2014-03-24 22:30:12 +00:00
|
|
|
|
2017-07-25 21:17:04 +00:00
|
|
|
struct AVBufferRef *icc = NULL;
|
|
|
|
if (p->image.mpi)
|
|
|
|
icc = p->image.mpi->icc_profile;
|
|
|
|
|
|
|
|
if (p->lut_3d_texture && !gl_lcms_has_changed(p->cms, prim, trc, icc))
|
2016-02-13 14:33:00 +00:00
|
|
|
return true;
|
|
|
|
|
2017-07-29 19:22:11 +00:00
|
|
|
// GLES3 doesn't provide filtered 16 bit integer textures
|
|
|
|
// GLES2 doesn't even provide 3D textures
|
2017-08-27 11:40:59 +00:00
|
|
|
const struct ra_format *fmt = ra_find_unorm_format(p->ra, 2, 4);
|
2017-07-29 19:22:11 +00:00
|
|
|
if (!fmt || !(p->ra->caps & RA_CAP_TEX_3D)) {
|
|
|
|
p->use_lut_3d = false;
|
2017-08-27 11:40:59 +00:00
|
|
|
MP_WARN(p, "Disabling color management (no RGBA16 3D textures).\n");
|
2017-07-29 19:22:11 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2016-02-13 14:33:00 +00:00
|
|
|
struct lut3d *lut3d = NULL;
|
2017-07-25 21:17:04 +00:00
|
|
|
if (!fmt || !gl_lcms_get_lut3d(p->cms, &lut3d, prim, trc, icc) || !lut3d) {
|
2016-06-04 15:52:10 +00:00
|
|
|
p->use_lut_3d = false;
|
2016-02-13 14:33:00 +00:00
|
|
|
return false;
|
2015-11-19 20:21:04 +00:00
|
|
|
}
|
2014-12-23 01:48:58 +00:00
|
|
|
|
2017-07-29 19:22:11 +00:00
|
|
|
ra_tex_free(p->ra, &p->lut_3d_texture);
|
2013-03-01 20:19:20 +00:00
|
|
|
|
2017-07-29 19:22:11 +00:00
|
|
|
struct ra_tex_params params = {
|
|
|
|
.dimensions = 3,
|
|
|
|
.w = lut3d->size[0],
|
|
|
|
.h = lut3d->size[1],
|
|
|
|
.d = lut3d->size[2],
|
|
|
|
.format = fmt,
|
|
|
|
.render_src = true,
|
|
|
|
.src_linear = true,
|
|
|
|
.initial_data = lut3d->data,
|
|
|
|
};
|
|
|
|
p->lut_3d_texture = ra_tex_create(p->ra, ¶ms);
|
2013-03-01 20:19:20 +00:00
|
|
|
|
|
|
|
debug_check_gl(p, "after 3d lut creation");
|
2014-03-24 22:30:12 +00:00
|
|
|
|
2016-07-25 08:18:41 +00:00
|
|
|
for (int i = 0; i < 3; i++)
|
|
|
|
p->lut_3d_size[i] = lut3d->size[i];
|
|
|
|
|
2016-06-03 18:03:49 +00:00
|
|
|
talloc_free(lut3d);
|
|
|
|
|
2016-02-13 14:33:00 +00:00
|
|
|
return true;
|
2013-03-01 20:19:20 +00:00
|
|
|
}
|
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
// Fill an image struct from a ra_tex + some metadata
|
|
|
|
static struct image image_wrap(struct ra_tex *tex, enum plane_type type,
|
|
|
|
int components)
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
{
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
assert(type != PLANE_NONE);
|
2017-09-20 08:45:33 +00:00
|
|
|
return (struct image){
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
.type = type,
|
2017-09-20 08:45:33 +00:00
|
|
|
.tex = tex,
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
.multiplier = 1.0,
|
2017-09-23 07:54:42 +00:00
|
|
|
.w = tex ? tex->params.w : 1,
|
|
|
|
.h = tex ? tex->params.h : 1,
|
2016-04-16 16:14:32 +00:00
|
|
|
.transform = identity_trans,
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
.components = components,
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2017-09-27 22:07:42 +00:00
|
|
|
// Bind an image to a free texture unit and return its ID.
|
2017-09-20 08:45:33 +00:00
|
|
|
static int pass_bind(struct gl_video *p, struct image img)
|
2013-03-28 19:40:19 +00:00
|
|
|
{
|
2017-09-27 22:07:42 +00:00
|
|
|
int idx = p->num_pass_imgs;
|
|
|
|
MP_TARRAY_APPEND(p, p->pass_imgs, p->num_pass_imgs, img);
|
|
|
|
return idx;
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
}
|
2013-03-28 19:40:19 +00:00
|
|
|
|
2016-04-08 20:21:31 +00:00
|
|
|
// Rotation by 90° and flipping.
|
2016-10-01 14:07:51 +00:00
|
|
|
// w/h is used for recentering.
|
|
|
|
static void get_transform(float w, float h, int rotate, bool flip,
|
|
|
|
struct gl_transform *out_tr)
|
2016-04-08 20:21:31 +00:00
|
|
|
{
|
2016-10-01 14:07:51 +00:00
|
|
|
int a = rotate % 90 ? 0 : rotate / 90;
|
2016-04-08 20:21:31 +00:00
|
|
|
int sin90[4] = {0, 1, 0, -1}; // just to avoid rounding issues etc.
|
|
|
|
int cos90[4] = {1, 0, -1, 0};
|
2016-10-01 14:12:03 +00:00
|
|
|
struct gl_transform tr = {{{ cos90[a], sin90[a]},
|
|
|
|
{-sin90[a], cos90[a]}}};
|
2016-04-08 20:21:31 +00:00
|
|
|
|
|
|
|
// basically, recenter to keep the whole image in view
|
|
|
|
float b[2] = {1, 1};
|
2016-10-01 14:12:03 +00:00
|
|
|
gl_transform_vec(tr, &b[0], &b[1]);
|
2016-10-01 14:07:51 +00:00
|
|
|
tr.t[0] += b[0] < 0 ? w : 0;
|
|
|
|
tr.t[1] += b[1] < 0 ? h : 0;
|
2016-04-08 20:21:31 +00:00
|
|
|
|
2016-10-01 14:07:51 +00:00
|
|
|
if (flip) {
|
|
|
|
struct gl_transform fliptr = {{{1, 0}, {0, -1}}, {0, h}};
|
|
|
|
gl_transform_trans(fliptr, &tr);
|
2016-04-08 20:21:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
*out_tr = tr;
|
|
|
|
}
|
|
|
|
|
2016-10-01 14:07:51 +00:00
|
|
|
// Return the chroma plane upscaled to luma size, but with additional padding
|
|
|
|
// for image sizes not aligned to subsampling.
|
2017-06-30 15:13:58 +00:00
|
|
|
static int chroma_upsize(int size, int pixel)
|
2016-10-01 14:07:51 +00:00
|
|
|
{
|
2017-06-30 15:13:58 +00:00
|
|
|
return (size + pixel - 1) / pixel * pixel;
|
2016-10-01 14:07:51 +00:00
|
|
|
}
|
|
|
|
|
2017-07-10 15:56:43 +00:00
|
|
|
// If a and b are on the same plane, return what plane type should be used.
|
|
|
|
// If a or b are none, the other type always wins.
|
|
|
|
// Usually: LUMA/RGB/XYZ > CHROMA > ALPHA
|
|
|
|
static enum plane_type merge_plane_types(enum plane_type a, enum plane_type b)
|
|
|
|
{
|
|
|
|
if (a == PLANE_NONE)
|
|
|
|
return b;
|
|
|
|
if (b == PLANE_LUMA || b == PLANE_RGB || b == PLANE_XYZ)
|
|
|
|
return b;
|
|
|
|
if (b != PLANE_NONE && a == PLANE_ALPHA)
|
|
|
|
return b;
|
|
|
|
return a;
|
|
|
|
}
|
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
// Places a video_image's image textures + associated metadata into img[]. The
|
2016-04-16 16:14:32 +00:00
|
|
|
// number of textures is equal to p->plane_count. Any necessary plane offsets
|
|
|
|
// are stored in off. (e.g. chroma position)
|
2017-09-20 08:45:33 +00:00
|
|
|
static void pass_get_images(struct gl_video *p, struct video_image *vimg,
|
|
|
|
struct image img[4], struct gl_transform off[4])
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
{
|
2015-01-22 17:29:37 +00:00
|
|
|
assert(vimg->mpi);
|
|
|
|
|
2016-10-01 14:07:51 +00:00
|
|
|
int w = p->image_params.w;
|
|
|
|
int h = p->image_params.h;
|
|
|
|
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
// Determine the chroma offset
|
vo_opengl: start work on rendering API abstraction
This starts work on moving OpenGL-specific code out of the general
renderer code, so that we can support other other GPU APIs. This is in
a very early stage and it's only a proof of concept. It's unknown
whether this will succeed or result in other backends.
For now, the GL rendering API ("ra") and its only provider (ra_gl) does
texture creation/upload/destruction only. And it's used for the main
video texture only. All other code is still hardcoded to GL.
There is some duplication with ra_format and gl_format handling. In the
end, only the ra variants will be needed (plus the gl_format table of
course). For now, this is simpler, because for some reason lots of hwdec
code still requires the GL variants, and would have to be updated to
use the ra ones.
Currently, the video.c code accesses private ra_gl fields. In the end,
it should not do that of course, and it would not include ra_gl.h.
Probably adds bugs, but you can keep them.
2017-07-26 09:19:51 +00:00
|
|
|
float ls_w = 1.0 / p->ra_format.chroma_w;
|
|
|
|
float ls_h = 1.0 / p->ra_format.chroma_h;
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
|
2016-04-16 16:14:32 +00:00
|
|
|
struct gl_transform chroma = {{{ls_w, 0.0}, {0.0, ls_h}}};
|
|
|
|
|
2015-04-02 21:59:50 +00:00
|
|
|
if (p->image_params.chroma_location != MP_CHROMA_CENTER) {
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
int cx, cy;
|
2015-04-02 21:59:50 +00:00
|
|
|
mp_get_chroma_location(p->image_params.chroma_location, &cx, &cy);
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
// By default texture coordinates are such that chroma is centered with
|
|
|
|
// any chroma subsampling. If a specific direction is given, make it
|
|
|
|
// so that the luma and chroma sample line up exactly.
|
|
|
|
// For 4:4:4, setting chroma location should have no effect at all.
|
|
|
|
// luma sample size (in chroma coord. space)
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
chroma.t[0] = ls_w < 1 ? ls_w * -cx / 2 : 0;
|
|
|
|
chroma.t[1] = ls_h < 1 ? ls_h * -cy / 2 : 0;
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
}
|
|
|
|
|
2017-06-30 15:13:58 +00:00
|
|
|
int msb_valid_bits =
|
vo_opengl: start work on rendering API abstraction
This starts work on moving OpenGL-specific code out of the general
renderer code, so that we can support other other GPU APIs. This is in
a very early stage and it's only a proof of concept. It's unknown
whether this will succeed or result in other backends.
For now, the GL rendering API ("ra") and its only provider (ra_gl) does
texture creation/upload/destruction only. And it's used for the main
video texture only. All other code is still hardcoded to GL.
There is some duplication with ra_format and gl_format handling. In the
end, only the ra variants will be needed (plus the gl_format table of
course). For now, this is simpler, because for some reason lots of hwdec
code still requires the GL variants, and would have to be updated to
use the ra ones.
Currently, the video.c code accesses private ra_gl fields. In the end,
it should not do that of course, and it would not include ra_gl.h.
Probably adds bugs, but you can keep them.
2017-07-26 09:19:51 +00:00
|
|
|
p->ra_format.component_bits + MPMIN(p->ra_format.component_pad, 0);
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
// The existing code assumes we just have a single tex multiplier for
|
|
|
|
// all of the planes. This may change in the future
|
2016-06-29 07:16:13 +00:00
|
|
|
float tex_mul = 1.0 / mp_get_csp_mul(p->image_params.color.space,
|
2017-06-30 15:13:58 +00:00
|
|
|
msb_valid_bits,
|
vo_opengl: start work on rendering API abstraction
This starts work on moving OpenGL-specific code out of the general
renderer code, so that we can support other other GPU APIs. This is in
a very early stage and it's only a proof of concept. It's unknown
whether this will succeed or result in other backends.
For now, the GL rendering API ("ra") and its only provider (ra_gl) does
texture creation/upload/destruction only. And it's used for the main
video texture only. All other code is still hardcoded to GL.
There is some duplication with ra_format and gl_format handling. In the
end, only the ra variants will be needed (plus the gl_format table of
course). For now, this is simpler, because for some reason lots of hwdec
code still requires the GL variants, and would have to be updated to
use the ra ones.
Currently, the video.c code accesses private ra_gl fields. In the end,
it should not do that of course, and it would not include ra_gl.h.
Probably adds bugs, but you can keep them.
2017-07-26 09:19:51 +00:00
|
|
|
p->ra_format.component_bits);
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
memset(img, 0, 4 * sizeof(img[0]));
|
2015-03-13 12:42:05 +00:00
|
|
|
for (int n = 0; n < p->plane_count; n++) {
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
struct texplane *t = &vimg->planes[n];
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
|
2017-07-10 15:56:43 +00:00
|
|
|
enum plane_type type = PLANE_NONE;
|
|
|
|
for (int i = 0; i < 4; i++) {
|
vo_opengl: start work on rendering API abstraction
This starts work on moving OpenGL-specific code out of the general
renderer code, so that we can support other other GPU APIs. This is in
a very early stage and it's only a proof of concept. It's unknown
whether this will succeed or result in other backends.
For now, the GL rendering API ("ra") and its only provider (ra_gl) does
texture creation/upload/destruction only. And it's used for the main
video texture only. All other code is still hardcoded to GL.
There is some duplication with ra_format and gl_format handling. In the
end, only the ra variants will be needed (plus the gl_format table of
course). For now, this is simpler, because for some reason lots of hwdec
code still requires the GL variants, and would have to be updated to
use the ra ones.
Currently, the video.c code accesses private ra_gl fields. In the end,
it should not do that of course, and it would not include ra_gl.h.
Probably adds bugs, but you can keep them.
2017-07-26 09:19:51 +00:00
|
|
|
int c = p->ra_format.components[n][i];
|
2017-07-10 15:56:43 +00:00
|
|
|
enum plane_type ctype;
|
|
|
|
if (c == 0) {
|
|
|
|
ctype = PLANE_NONE;
|
|
|
|
} else if (c == 4) {
|
|
|
|
ctype = PLANE_ALPHA;
|
|
|
|
} else if (p->image_params.color.space == MP_CSP_RGB) {
|
|
|
|
ctype = PLANE_RGB;
|
|
|
|
} else if (p->image_params.color.space == MP_CSP_XYZ) {
|
|
|
|
ctype = PLANE_XYZ;
|
|
|
|
} else {
|
|
|
|
ctype = c == 1 ? PLANE_LUMA : PLANE_CHROMA;
|
|
|
|
}
|
|
|
|
type = merge_plane_types(type, ctype);
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
}
|
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
img[n] = (struct image){
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
.type = type,
|
2017-08-04 13:47:50 +00:00
|
|
|
.tex = t->tex,
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
.multiplier = tex_mul,
|
2015-09-02 10:52:11 +00:00
|
|
|
.w = t->w,
|
|
|
|
.h = t->h,
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
};
|
2017-06-30 15:13:58 +00:00
|
|
|
|
|
|
|
for (int i = 0; i < 4; i++)
|
2017-09-20 08:45:33 +00:00
|
|
|
img[n].components += !!p->ra_format.components[n][i];
|
2017-06-30 15:13:58 +00:00
|
|
|
|
2016-10-01 14:07:51 +00:00
|
|
|
get_transform(t->w, t->h, p->image_params.rotate, t->flipped,
|
2017-09-20 08:45:33 +00:00
|
|
|
&img[n].transform);
|
2016-04-08 20:21:31 +00:00
|
|
|
if (p->image_params.rotate % 180 == 90)
|
2017-09-20 08:45:33 +00:00
|
|
|
MPSWAP(int, img[n].w, img[n].h);
|
2016-04-16 16:14:32 +00:00
|
|
|
|
2016-10-01 14:07:51 +00:00
|
|
|
off[n] = identity_trans;
|
|
|
|
|
|
|
|
if (type == PLANE_CHROMA) {
|
|
|
|
struct gl_transform rot;
|
|
|
|
get_transform(0, 0, p->image_params.rotate, true, &rot);
|
|
|
|
|
|
|
|
struct gl_transform tr = chroma;
|
|
|
|
gl_transform_vec(rot, &tr.t[0], &tr.t[1]);
|
|
|
|
|
vo_opengl: start work on rendering API abstraction
This starts work on moving OpenGL-specific code out of the general
renderer code, so that we can support other other GPU APIs. This is in
a very early stage and it's only a proof of concept. It's unknown
whether this will succeed or result in other backends.
For now, the GL rendering API ("ra") and its only provider (ra_gl) does
texture creation/upload/destruction only. And it's used for the main
video texture only. All other code is still hardcoded to GL.
There is some duplication with ra_format and gl_format handling. In the
end, only the ra variants will be needed (plus the gl_format table of
course). For now, this is simpler, because for some reason lots of hwdec
code still requires the GL variants, and would have to be updated to
use the ra ones.
Currently, the video.c code accesses private ra_gl fields. In the end,
it should not do that of course, and it would not include ra_gl.h.
Probably adds bugs, but you can keep them.
2017-07-26 09:19:51 +00:00
|
|
|
float dx = (chroma_upsize(w, p->ra_format.chroma_w) - w) * ls_w;
|
|
|
|
float dy = (chroma_upsize(h, p->ra_format.chroma_h) - h) * ls_h;
|
2016-10-01 14:07:51 +00:00
|
|
|
|
|
|
|
// Adjust the chroma offset if the real chroma size is fractional
|
|
|
|
// due image sizes not aligned to chroma subsampling.
|
|
|
|
struct gl_transform rot2;
|
|
|
|
get_transform(0, 0, p->image_params.rotate, t->flipped, &rot2);
|
|
|
|
if (rot2.m[0][0] < 0)
|
|
|
|
tr.t[0] += dx;
|
|
|
|
if (rot2.m[1][0] < 0)
|
|
|
|
tr.t[0] += dy;
|
|
|
|
if (rot2.m[0][1] < 0)
|
|
|
|
tr.t[1] += dx;
|
|
|
|
if (rot2.m[1][1] < 0)
|
|
|
|
tr.t[1] += dy;
|
|
|
|
|
|
|
|
off[n] = tr;
|
|
|
|
}
|
2013-03-28 19:40:19 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-06-30 14:57:17 +00:00
|
|
|
// Return the index of the given component (assuming all non-padding components
|
|
|
|
// of all planes are concatenated into a linear list).
|
vo_opengl: start work on rendering API abstraction
This starts work on moving OpenGL-specific code out of the general
renderer code, so that we can support other other GPU APIs. This is in
a very early stage and it's only a proof of concept. It's unknown
whether this will succeed or result in other backends.
For now, the GL rendering API ("ra") and its only provider (ra_gl) does
texture creation/upload/destruction only. And it's used for the main
video texture only. All other code is still hardcoded to GL.
There is some duplication with ra_format and gl_format handling. In the
end, only the ra variants will be needed (plus the gl_format table of
course). For now, this is simpler, because for some reason lots of hwdec
code still requires the GL variants, and would have to be updated to
use the ra ones.
Currently, the video.c code accesses private ra_gl fields. In the end,
it should not do that of course, and it would not include ra_gl.h.
Probably adds bugs, but you can keep them.
2017-07-26 09:19:51 +00:00
|
|
|
static int find_comp(struct ra_imgfmt_desc *desc, int component)
|
2017-06-30 14:57:17 +00:00
|
|
|
{
|
|
|
|
int cur = 0;
|
|
|
|
for (int n = 0; n < desc->num_planes; n++) {
|
|
|
|
for (int i = 0; i < 4; i++) {
|
|
|
|
if (desc->components[n][i]) {
|
|
|
|
if (desc->components[n][i] == component)
|
|
|
|
return cur;
|
|
|
|
cur++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2015-01-29 18:53:49 +00:00
|
|
|
static void init_video(struct gl_video *p)
|
2013-03-01 20:19:20 +00:00
|
|
|
{
|
2017-06-30 14:57:17 +00:00
|
|
|
p->use_integer_conversion = false;
|
|
|
|
|
vo_gpu: make it possible to load multiple hwdec interop drivers
Make the VO<->decoder interface capable of supporting multiple hwdec
APIs at once. The main gain is that this simplifies autoprobing a lot.
Before this change, it could happen that the VO loaded the "wrong" hwdec
API, and the decoder was stuck with the choice (breaking hw decoding).
With the change applied, the VO simply loads all available APIs, so
autoprobing trickery is left entirely to the decoder.
In the past, we were quite careful about not accidentally loading the
wrong interop drivers. This was in part to make sure autoprobing works,
but also because libva had this obnoxious bug of dumping garbage to
stderr when using the API. libva was fixed, so this is not a problem
anymore.
The --opengl-hwdec-interop option is changed in various ways (again...),
and renamed to --gpu-hwdec-interop. It does not have much use anymore,
other than debugging. It's notable that the order in the hwdec interop
array ra_hwdec_drivers[] still matters if multiple drivers support the
same image formats, so the option can explicitly force one, if that
should ever be necessary, or more likely, for debugging. One example are
the ra_hwdec_d3d11egl and ra_hwdec_d3d11eglrgb drivers, which both
support d3d11 input.
vo_gpu now always loads the interop lazily by default, but when it does,
it loads them all. vo_opengl_cb now always loads them when the GL
context handle is initialized. I don't expect that this causes any
problems.
It's now possible to do things like changing between vdpau and nvdec
decoding at runtime.
This is also preparation for cleaning up vd_lavc.c hwdec autoprobing.
It's another reason why hwdec_devices_request_all() does not take a
hwdec type anymore.
2017-12-01 04:05:00 +00:00
|
|
|
struct ra_hwdec *hwdec = NULL;
|
|
|
|
for (int n = 0; n < p->num_hwdecs; n++) {
|
|
|
|
if (ra_hwdec_test_format(p->hwdecs[n], p->image_params.imgfmt)) {
|
|
|
|
hwdec = p->hwdecs[n];
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (hwdec) {
|
|
|
|
if (hwdec->driver->overlay_frame) {
|
2017-08-30 10:19:32 +00:00
|
|
|
MP_WARN(p, "Using HW-overlay mode. No GL filtering is performed "
|
|
|
|
"on the video!\n");
|
vo_gpu: make it possible to load multiple hwdec interop drivers
Make the VO<->decoder interface capable of supporting multiple hwdec
APIs at once. The main gain is that this simplifies autoprobing a lot.
Before this change, it could happen that the VO loaded the "wrong" hwdec
API, and the decoder was stuck with the choice (breaking hw decoding).
With the change applied, the VO simply loads all available APIs, so
autoprobing trickery is left entirely to the decoder.
In the past, we were quite careful about not accidentally loading the
wrong interop drivers. This was in part to make sure autoprobing works,
but also because libva had this obnoxious bug of dumping garbage to
stderr when using the API. libva was fixed, so this is not a problem
anymore.
The --opengl-hwdec-interop option is changed in various ways (again...),
and renamed to --gpu-hwdec-interop. It does not have much use anymore,
other than debugging. It's notable that the order in the hwdec interop
array ra_hwdec_drivers[] still matters if multiple drivers support the
same image formats, so the option can explicitly force one, if that
should ever be necessary, or more likely, for debugging. One example are
the ra_hwdec_d3d11egl and ra_hwdec_d3d11eglrgb drivers, which both
support d3d11 input.
vo_gpu now always loads the interop lazily by default, but when it does,
it loads them all. vo_opengl_cb now always loads them when the GL
context handle is initialized. I don't expect that this causes any
problems.
It's now possible to do things like changing between vdpau and nvdec
decoding at runtime.
This is also preparation for cleaning up vd_lavc.c hwdec autoprobing.
It's another reason why hwdec_devices_request_all() does not take a
hwdec type anymore.
2017-12-01 04:05:00 +00:00
|
|
|
p->hwdec_overlay = hwdec;
|
2017-08-30 10:19:32 +00:00
|
|
|
} else {
|
vo_gpu: make it possible to load multiple hwdec interop drivers
Make the VO<->decoder interface capable of supporting multiple hwdec
APIs at once. The main gain is that this simplifies autoprobing a lot.
Before this change, it could happen that the VO loaded the "wrong" hwdec
API, and the decoder was stuck with the choice (breaking hw decoding).
With the change applied, the VO simply loads all available APIs, so
autoprobing trickery is left entirely to the decoder.
In the past, we were quite careful about not accidentally loading the
wrong interop drivers. This was in part to make sure autoprobing works,
but also because libva had this obnoxious bug of dumping garbage to
stderr when using the API. libva was fixed, so this is not a problem
anymore.
The --opengl-hwdec-interop option is changed in various ways (again...),
and renamed to --gpu-hwdec-interop. It does not have much use anymore,
other than debugging. It's notable that the order in the hwdec interop
array ra_hwdec_drivers[] still matters if multiple drivers support the
same image formats, so the option can explicitly force one, if that
should ever be necessary, or more likely, for debugging. One example are
the ra_hwdec_d3d11egl and ra_hwdec_d3d11eglrgb drivers, which both
support d3d11 input.
vo_gpu now always loads the interop lazily by default, but when it does,
it loads them all. vo_opengl_cb now always loads them when the GL
context handle is initialized. I don't expect that this causes any
problems.
It's now possible to do things like changing between vdpau and nvdec
decoding at runtime.
This is also preparation for cleaning up vd_lavc.c hwdec autoprobing.
It's another reason why hwdec_devices_request_all() does not take a
hwdec type anymore.
2017-12-01 04:05:00 +00:00
|
|
|
p->hwdec_mapper = ra_hwdec_mapper_create(hwdec, &p->image_params);
|
2017-08-30 10:19:32 +00:00
|
|
|
if (!p->hwdec_mapper)
|
|
|
|
MP_ERR(p, "Initializing texture for hardware decoding failed.\n");
|
|
|
|
}
|
vo_opengl: separate hwdec context and mapping, port it to use ra
This does two separate rather intrusive things:
1. Make the hwdec context (which does initialization, provides the
device to the decoder, and other basic state) and frame mapping
(getting textures from a mp_image) separate. This is more
flexible, and you could map multiple images at once. It will
help removing some hwdec special-casing from video.c.
2. Switch all hwdec API use to ra. Of course all code is still
GL specific, but in theory it would be possible to support other
backends. The most important change is that the hwdec interop
returns ra objects, instead of anything GL specific. This removes
the last dependency on GL-specific header files from video.c.
I'm mixing these separate changes because both requires essentially
rewriting all the glue code, so better do them at once. For the same
reason, this change isn't done incrementally.
hwdec_ios.m is untested, since I can't test it. Apart from superficial
mistakes, this also requires dealing with Apple's texture format
fuckups: they force you to use GL_LUMINANCE[_ALPHA] instead of GL_RED
and GL_RG. We also need to report the correct format via ra_tex to
the renderer, which is done by find_la_variant(). It's unknown whether
this works correctly.
hwdec_rpi.c as well as vo_rpi.c are still broken. (I need to pull my
RPI out of a dusty pile of devices and cables, so, later.)
2017-08-10 15:48:33 +00:00
|
|
|
if (p->hwdec_mapper)
|
|
|
|
p->image_params = p->hwdec_mapper->dst_params;
|
vo_gpu: make it possible to load multiple hwdec interop drivers
Make the VO<->decoder interface capable of supporting multiple hwdec
APIs at once. The main gain is that this simplifies autoprobing a lot.
Before this change, it could happen that the VO loaded the "wrong" hwdec
API, and the decoder was stuck with the choice (breaking hw decoding).
With the change applied, the VO simply loads all available APIs, so
autoprobing trickery is left entirely to the decoder.
In the past, we were quite careful about not accidentally loading the
wrong interop drivers. This was in part to make sure autoprobing works,
but also because libva had this obnoxious bug of dumping garbage to
stderr when using the API. libva was fixed, so this is not a problem
anymore.
The --opengl-hwdec-interop option is changed in various ways (again...),
and renamed to --gpu-hwdec-interop. It does not have much use anymore,
other than debugging. It's notable that the order in the hwdec interop
array ra_hwdec_drivers[] still matters if multiple drivers support the
same image formats, so the option can explicitly force one, if that
should ever be necessary, or more likely, for debugging. One example are
the ra_hwdec_d3d11egl and ra_hwdec_d3d11eglrgb drivers, which both
support d3d11 input.
vo_gpu now always loads the interop lazily by default, but when it does,
it loads them all. vo_opengl_cb now always loads them when the GL
context handle is initialized. I don't expect that this causes any
problems.
It's now possible to do things like changing between vdpau and nvdec
decoding at runtime.
This is also preparation for cleaning up vd_lavc.c hwdec autoprobing.
It's another reason why hwdec_devices_request_all() does not take a
hwdec type anymore.
2017-12-01 04:05:00 +00:00
|
|
|
const char **exts = hwdec->glsl_extensions;
|
2016-05-19 10:02:08 +00:00
|
|
|
for (int n = 0; exts && exts[n]; n++)
|
|
|
|
gl_sc_enable_extension(p->sc, (char *)exts[n]);
|
vo_opengl: refactor how hwdec interop exports textures
Rename gl_hwdec_driver.map_image to map_frame, and let it fill out a
struct gl_hwdec_frame describing the exact texture layout. This gives
more flexibility to what the hwdec interop can export. In particular, it
can export strange component orders/permutations and textures with
padded size. (The latter originating from cropped video.)
The way gl_hwdec_frame works is in the spirit of the rest of the
vo_opengl video processing code, which tends to put as much information
in immediate state (as part of the dataflow), instead of declaring it
globally. To some degree this duplicates the texplane and img_tex
structs, but until we somehow unify those, it's better to give the hwdec
state its own struct. The fact that changing the hwdec struct would
require changes and testing on at least 4 platform/GPU combinations
makes duplicating it almost a requirement to avoid pain later.
Make gl_hwdec_driver.reinit set the new image format and remove the
gl_hwdec.converted_imgfmt field.
Likewise, gl_hwdec.gl_texture_target is replaced with
gl_hwdec_plane.gl_target.
Split out a init_image_desc function from init_format. The latter is not
called in the hwdec case at all anymore. Setting up most of struct
texplane is also completely separate in the hwdec and normal cases.
video.c does not check whether the hwdec "mapped" image format is
supported. This should not really happen anyway, and if it does, the
hwdec interop backend must fail at creation time, so this is not an
issue.
2016-05-10 16:29:10 +00:00
|
|
|
p->hwdec_active = true;
|
2015-01-29 18:53:49 +00:00
|
|
|
}
|
2013-12-01 22:39:13 +00:00
|
|
|
|
vo_opengl: start work on rendering API abstraction
This starts work on moving OpenGL-specific code out of the general
renderer code, so that we can support other other GPU APIs. This is in
a very early stage and it's only a proof of concept. It's unknown
whether this will succeed or result in other backends.
For now, the GL rendering API ("ra") and its only provider (ra_gl) does
texture creation/upload/destruction only. And it's used for the main
video texture only. All other code is still hardcoded to GL.
There is some duplication with ra_format and gl_format handling. In the
end, only the ra variants will be needed (plus the gl_format table of
course). For now, this is simpler, because for some reason lots of hwdec
code still requires the GL variants, and would have to be updated to
use the ra ones.
Currently, the video.c code accesses private ra_gl fields. In the end,
it should not do that of course, and it would not include ra_gl.h.
Probably adds bugs, but you can keep them.
2017-07-26 09:19:51 +00:00
|
|
|
p->ra_format = (struct ra_imgfmt_desc){0};
|
|
|
|
ra_get_imgfmt_desc(p->ra, p->image_params.imgfmt, &p->ra_format);
|
2017-06-30 14:57:17 +00:00
|
|
|
|
vo_opengl: start work on rendering API abstraction
This starts work on moving OpenGL-specific code out of the general
renderer code, so that we can support other other GPU APIs. This is in
a very early stage and it's only a proof of concept. It's unknown
whether this will succeed or result in other backends.
For now, the GL rendering API ("ra") and its only provider (ra_gl) does
texture creation/upload/destruction only. And it's used for the main
video texture only. All other code is still hardcoded to GL.
There is some duplication with ra_format and gl_format handling. In the
end, only the ra variants will be needed (plus the gl_format table of
course). For now, this is simpler, because for some reason lots of hwdec
code still requires the GL variants, and would have to be updated to
use the ra ones.
Currently, the video.c code accesses private ra_gl fields. In the end,
it should not do that of course, and it would not include ra_gl.h.
Probably adds bugs, but you can keep them.
2017-07-26 09:19:51 +00:00
|
|
|
p->plane_count = p->ra_format.num_planes;
|
2017-06-30 14:57:17 +00:00
|
|
|
|
2017-06-30 15:13:58 +00:00
|
|
|
p->has_alpha = false;
|
|
|
|
p->is_gray = true;
|
|
|
|
|
vo_opengl: start work on rendering API abstraction
This starts work on moving OpenGL-specific code out of the general
renderer code, so that we can support other other GPU APIs. This is in
a very early stage and it's only a proof of concept. It's unknown
whether this will succeed or result in other backends.
For now, the GL rendering API ("ra") and its only provider (ra_gl) does
texture creation/upload/destruction only. And it's used for the main
video texture only. All other code is still hardcoded to GL.
There is some duplication with ra_format and gl_format handling. In the
end, only the ra variants will be needed (plus the gl_format table of
course). For now, this is simpler, because for some reason lots of hwdec
code still requires the GL variants, and would have to be updated to
use the ra ones.
Currently, the video.c code accesses private ra_gl fields. In the end,
it should not do that of course, and it would not include ra_gl.h.
Probably adds bugs, but you can keep them.
2017-07-26 09:19:51 +00:00
|
|
|
for (int n = 0; n < p->ra_format.num_planes; n++) {
|
2017-06-30 15:13:58 +00:00
|
|
|
for (int i = 0; i < 4; i++) {
|
vo_opengl: start work on rendering API abstraction
This starts work on moving OpenGL-specific code out of the general
renderer code, so that we can support other other GPU APIs. This is in
a very early stage and it's only a proof of concept. It's unknown
whether this will succeed or result in other backends.
For now, the GL rendering API ("ra") and its only provider (ra_gl) does
texture creation/upload/destruction only. And it's used for the main
video texture only. All other code is still hardcoded to GL.
There is some duplication with ra_format and gl_format handling. In the
end, only the ra variants will be needed (plus the gl_format table of
course). For now, this is simpler, because for some reason lots of hwdec
code still requires the GL variants, and would have to be updated to
use the ra ones.
Currently, the video.c code accesses private ra_gl fields. In the end,
it should not do that of course, and it would not include ra_gl.h.
Probably adds bugs, but you can keep them.
2017-07-26 09:19:51 +00:00
|
|
|
if (p->ra_format.components[n][i]) {
|
|
|
|
p->has_alpha |= p->ra_format.components[n][i] == 4;
|
|
|
|
p->is_gray &= p->ra_format.components[n][i] == 1 ||
|
|
|
|
p->ra_format.components[n][i] == 4;
|
2017-06-30 15:13:58 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2017-06-30 14:57:17 +00:00
|
|
|
|
|
|
|
for (int c = 0; c < 4; c++) {
|
vo_opengl: start work on rendering API abstraction
This starts work on moving OpenGL-specific code out of the general
renderer code, so that we can support other other GPU APIs. This is in
a very early stage and it's only a proof of concept. It's unknown
whether this will succeed or result in other backends.
For now, the GL rendering API ("ra") and its only provider (ra_gl) does
texture creation/upload/destruction only. And it's used for the main
video texture only. All other code is still hardcoded to GL.
There is some duplication with ra_format and gl_format handling. In the
end, only the ra variants will be needed (plus the gl_format table of
course). For now, this is simpler, because for some reason lots of hwdec
code still requires the GL variants, and would have to be updated to
use the ra ones.
Currently, the video.c code accesses private ra_gl fields. In the end,
it should not do that of course, and it would not include ra_gl.h.
Probably adds bugs, but you can keep them.
2017-07-26 09:19:51 +00:00
|
|
|
int loc = find_comp(&p->ra_format, c + 1);
|
2017-06-30 14:57:17 +00:00
|
|
|
p->color_swizzle[c] = "rgba"[loc >= 0 && loc < 4 ? loc : 0];
|
|
|
|
}
|
|
|
|
p->color_swizzle[4] = '\0';
|
|
|
|
|
2015-01-29 18:53:49 +00:00
|
|
|
mp_image_params_guess_csp(&p->image_params);
|
2013-12-01 22:39:13 +00:00
|
|
|
|
2015-03-27 12:27:40 +00:00
|
|
|
av_lfg_init(&p->lfg, 1);
|
|
|
|
|
2013-03-01 20:19:20 +00:00
|
|
|
debug_check_gl(p, "before video texture creation");
|
|
|
|
|
vo_opengl: refactor how hwdec interop exports textures
Rename gl_hwdec_driver.map_image to map_frame, and let it fill out a
struct gl_hwdec_frame describing the exact texture layout. This gives
more flexibility to what the hwdec interop can export. In particular, it
can export strange component orders/permutations and textures with
padded size. (The latter originating from cropped video.)
The way gl_hwdec_frame works is in the spirit of the rest of the
vo_opengl video processing code, which tends to put as much information
in immediate state (as part of the dataflow), instead of declaring it
globally. To some degree this duplicates the texplane and img_tex
structs, but until we somehow unify those, it's better to give the hwdec
state its own struct. The fact that changing the hwdec struct would
require changes and testing on at least 4 platform/GPU combinations
makes duplicating it almost a requirement to avoid pain later.
Make gl_hwdec_driver.reinit set the new image format and remove the
gl_hwdec.converted_imgfmt field.
Likewise, gl_hwdec.gl_texture_target is replaced with
gl_hwdec_plane.gl_target.
Split out a init_image_desc function from init_format. The latter is not
called in the hwdec case at all anymore. Setting up most of struct
texplane is also completely separate in the hwdec and normal cases.
video.c does not check whether the hwdec "mapped" image format is
supported. This should not really happen anyway, and if it does, the
hwdec interop backend must fail at creation time, so this is not an
issue.
2016-05-10 16:29:10 +00:00
|
|
|
if (!p->hwdec_active) {
|
|
|
|
struct video_image *vimg = &p->image;
|
2013-03-01 20:19:20 +00:00
|
|
|
|
vo_opengl: refactor how hwdec interop exports textures
Rename gl_hwdec_driver.map_image to map_frame, and let it fill out a
struct gl_hwdec_frame describing the exact texture layout. This gives
more flexibility to what the hwdec interop can export. In particular, it
can export strange component orders/permutations and textures with
padded size. (The latter originating from cropped video.)
The way gl_hwdec_frame works is in the spirit of the rest of the
vo_opengl video processing code, which tends to put as much information
in immediate state (as part of the dataflow), instead of declaring it
globally. To some degree this duplicates the texplane and img_tex
structs, but until we somehow unify those, it's better to give the hwdec
state its own struct. The fact that changing the hwdec struct would
require changes and testing on at least 4 platform/GPU combinations
makes duplicating it almost a requirement to avoid pain later.
Make gl_hwdec_driver.reinit set the new image format and remove the
gl_hwdec.converted_imgfmt field.
Likewise, gl_hwdec.gl_texture_target is replaced with
gl_hwdec_plane.gl_target.
Split out a init_image_desc function from init_format. The latter is not
called in the hwdec case at all anymore. Setting up most of struct
texplane is also completely separate in the hwdec and normal cases.
video.c does not check whether the hwdec "mapped" image format is
supported. This should not really happen anyway, and if it does, the
hwdec interop backend must fail at creation time, so this is not an
issue.
2016-05-10 16:29:10 +00:00
|
|
|
struct mp_image layout = {0};
|
|
|
|
mp_image_set_params(&layout, &p->image_params);
|
|
|
|
|
|
|
|
for (int n = 0; n < p->plane_count; n++) {
|
|
|
|
struct texplane *plane = &vimg->planes[n];
|
vo_opengl: start work on rendering API abstraction
This starts work on moving OpenGL-specific code out of the general
renderer code, so that we can support other other GPU APIs. This is in
a very early stage and it's only a proof of concept. It's unknown
whether this will succeed or result in other backends.
For now, the GL rendering API ("ra") and its only provider (ra_gl) does
texture creation/upload/destruction only. And it's used for the main
video texture only. All other code is still hardcoded to GL.
There is some duplication with ra_format and gl_format handling. In the
end, only the ra variants will be needed (plus the gl_format table of
course). For now, this is simpler, because for some reason lots of hwdec
code still requires the GL variants, and would have to be updated to
use the ra ones.
Currently, the video.c code accesses private ra_gl fields. In the end,
it should not do that of course, and it would not include ra_gl.h.
Probably adds bugs, but you can keep them.
2017-07-26 09:19:51 +00:00
|
|
|
const struct ra_format *format = p->ra_format.planes[n];
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
|
2016-10-01 10:01:57 +00:00
|
|
|
plane->w = mp_image_plane_w(&layout, n);
|
|
|
|
plane->h = mp_image_plane_h(&layout, n);
|
2013-03-01 20:19:20 +00:00
|
|
|
|
vo_opengl: start work on rendering API abstraction
This starts work on moving OpenGL-specific code out of the general
renderer code, so that we can support other other GPU APIs. This is in
a very early stage and it's only a proof of concept. It's unknown
whether this will succeed or result in other backends.
For now, the GL rendering API ("ra") and its only provider (ra_gl) does
texture creation/upload/destruction only. And it's used for the main
video texture only. All other code is still hardcoded to GL.
There is some duplication with ra_format and gl_format handling. In the
end, only the ra variants will be needed (plus the gl_format table of
course). For now, this is simpler, because for some reason lots of hwdec
code still requires the GL variants, and would have to be updated to
use the ra ones.
Currently, the video.c code accesses private ra_gl fields. In the end,
it should not do that of course, and it would not include ra_gl.h.
Probably adds bugs, but you can keep them.
2017-07-26 09:19:51 +00:00
|
|
|
struct ra_tex_params params = {
|
|
|
|
.dimensions = 2,
|
2017-08-04 13:47:50 +00:00
|
|
|
.w = plane->w + p->opts.tex_pad_x,
|
|
|
|
.h = plane->h + p->opts.tex_pad_y,
|
vo_opengl: start work on rendering API abstraction
This starts work on moving OpenGL-specific code out of the general
renderer code, so that we can support other other GPU APIs. This is in
a very early stage and it's only a proof of concept. It's unknown
whether this will succeed or result in other backends.
For now, the GL rendering API ("ra") and its only provider (ra_gl) does
texture creation/upload/destruction only. And it's used for the main
video texture only. All other code is still hardcoded to GL.
There is some duplication with ra_format and gl_format handling. In the
end, only the ra variants will be needed (plus the gl_format table of
course). For now, this is simpler, because for some reason lots of hwdec
code still requires the GL variants, and would have to be updated to
use the ra ones.
Currently, the video.c code accesses private ra_gl fields. In the end,
it should not do that of course, and it would not include ra_gl.h.
Probably adds bugs, but you can keep them.
2017-07-26 09:19:51 +00:00
|
|
|
.d = 1,
|
|
|
|
.format = format,
|
2017-08-05 12:20:14 +00:00
|
|
|
.render_src = true,
|
vo_opengl: start work on rendering API abstraction
This starts work on moving OpenGL-specific code out of the general
renderer code, so that we can support other other GPU APIs. This is in
a very early stage and it's only a proof of concept. It's unknown
whether this will succeed or result in other backends.
For now, the GL rendering API ("ra") and its only provider (ra_gl) does
texture creation/upload/destruction only. And it's used for the main
video texture only. All other code is still hardcoded to GL.
There is some duplication with ra_format and gl_format handling. In the
end, only the ra variants will be needed (plus the gl_format table of
course). For now, this is simpler, because for some reason lots of hwdec
code still requires the GL variants, and would have to be updated to
use the ra ones.
Currently, the video.c code accesses private ra_gl fields. In the end,
it should not do that of course, and it would not include ra_gl.h.
Probably adds bugs, but you can keep them.
2017-07-26 09:19:51 +00:00
|
|
|
.src_linear = format->linear_filter,
|
|
|
|
.non_normalized = p->opts.use_rectangle,
|
2017-08-16 20:13:51 +00:00
|
|
|
.host_mutable = true,
|
vo_opengl: start work on rendering API abstraction
This starts work on moving OpenGL-specific code out of the general
renderer code, so that we can support other other GPU APIs. This is in
a very early stage and it's only a proof of concept. It's unknown
whether this will succeed or result in other backends.
For now, the GL rendering API ("ra") and its only provider (ra_gl) does
texture creation/upload/destruction only. And it's used for the main
video texture only. All other code is still hardcoded to GL.
There is some duplication with ra_format and gl_format handling. In the
end, only the ra variants will be needed (plus the gl_format table of
course). For now, this is simpler, because for some reason lots of hwdec
code still requires the GL variants, and would have to be updated to
use the ra ones.
Currently, the video.c code accesses private ra_gl fields. In the end,
it should not do that of course, and it would not include ra_gl.h.
Probably adds bugs, but you can keep them.
2017-07-26 09:19:51 +00:00
|
|
|
};
|
2013-03-01 20:19:20 +00:00
|
|
|
|
2017-08-04 13:47:50 +00:00
|
|
|
MP_VERBOSE(p, "Texture for plane %d: %dx%d\n", n,
|
|
|
|
params.w, params.h);
|
2013-03-28 19:48:53 +00:00
|
|
|
|
2017-08-04 13:47:50 +00:00
|
|
|
plane->tex = ra_tex_create(p->ra, ¶ms);
|
|
|
|
if (!plane->tex)
|
|
|
|
abort(); // shit happens
|
vo_opengl: start work on rendering API abstraction
This starts work on moving OpenGL-specific code out of the general
renderer code, so that we can support other other GPU APIs. This is in
a very early stage and it's only a proof of concept. It's unknown
whether this will succeed or result in other backends.
For now, the GL rendering API ("ra") and its only provider (ra_gl) does
texture creation/upload/destruction only. And it's used for the main
video texture only. All other code is still hardcoded to GL.
There is some duplication with ra_format and gl_format handling. In the
end, only the ra variants will be needed (plus the gl_format table of
course). For now, this is simpler, because for some reason lots of hwdec
code still requires the GL variants, and would have to be updated to
use the ra ones.
Currently, the video.c code accesses private ra_gl fields. In the end,
it should not do that of course, and it would not include ra_gl.h.
Probably adds bugs, but you can keep them.
2017-07-26 09:19:51 +00:00
|
|
|
|
|
|
|
p->use_integer_conversion |= format->ctype == RA_CTYPE_UINT;
|
vo_opengl: refactor how hwdec interop exports textures
Rename gl_hwdec_driver.map_image to map_frame, and let it fill out a
struct gl_hwdec_frame describing the exact texture layout. This gives
more flexibility to what the hwdec interop can export. In particular, it
can export strange component orders/permutations and textures with
padded size. (The latter originating from cropped video.)
The way gl_hwdec_frame works is in the spirit of the rest of the
vo_opengl video processing code, which tends to put as much information
in immediate state (as part of the dataflow), instead of declaring it
globally. To some degree this duplicates the texplane and img_tex
structs, but until we somehow unify those, it's better to give the hwdec
state its own struct. The fact that changing the hwdec struct would
require changes and testing on at least 4 platform/GPU combinations
makes duplicating it almost a requirement to avoid pain later.
Make gl_hwdec_driver.reinit set the new image format and remove the
gl_hwdec.converted_imgfmt field.
Likewise, gl_hwdec.gl_texture_target is replaced with
gl_hwdec_plane.gl_target.
Split out a init_image_desc function from init_format. The latter is not
called in the hwdec case at all anymore. Setting up most of struct
texplane is also completely separate in the hwdec and normal cases.
video.c does not check whether the hwdec "mapped" image format is
supported. This should not really happen anyway, and if it does, the
hwdec interop backend must fail at creation time, so this is not an
issue.
2016-05-10 16:29:10 +00:00
|
|
|
}
|
2013-03-01 20:19:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
debug_check_gl(p, "after video texture creation");
|
|
|
|
|
2018-01-17 10:09:03 +00:00
|
|
|
// Format-dependent checks.
|
|
|
|
check_gl_features(p);
|
|
|
|
|
2016-05-18 15:47:10 +00:00
|
|
|
gl_video_setup_hooks(p);
|
2013-03-01 20:19:20 +00:00
|
|
|
}
|
|
|
|
|
2017-07-23 07:41:51 +00:00
|
|
|
static struct dr_buffer *gl_find_dr_buffer(struct gl_video *p, uint8_t *ptr)
|
|
|
|
{
|
|
|
|
for (int i = 0; i < p->num_dr_buffers; i++) {
|
vo_opengl: start work on rendering API abstraction
This starts work on moving OpenGL-specific code out of the general
renderer code, so that we can support other other GPU APIs. This is in
a very early stage and it's only a proof of concept. It's unknown
whether this will succeed or result in other backends.
For now, the GL rendering API ("ra") and its only provider (ra_gl) does
texture creation/upload/destruction only. And it's used for the main
video texture only. All other code is still hardcoded to GL.
There is some duplication with ra_format and gl_format handling. In the
end, only the ra variants will be needed (plus the gl_format table of
course). For now, this is simpler, because for some reason lots of hwdec
code still requires the GL variants, and would have to be updated to
use the ra ones.
Currently, the video.c code accesses private ra_gl fields. In the end,
it should not do that of course, and it would not include ra_gl.h.
Probably adds bugs, but you can keep them.
2017-07-26 09:19:51 +00:00
|
|
|
struct dr_buffer *buffer = &p->dr_buffers[i];
|
2017-08-05 20:29:48 +00:00
|
|
|
uint8_t *bufptr = buffer->buf->data;
|
|
|
|
size_t size = buffer->buf->params.size;
|
|
|
|
if (ptr >= bufptr && ptr < bufptr + size)
|
vo_opengl: start work on rendering API abstraction
This starts work on moving OpenGL-specific code out of the general
renderer code, so that we can support other other GPU APIs. This is in
a very early stage and it's only a proof of concept. It's unknown
whether this will succeed or result in other backends.
For now, the GL rendering API ("ra") and its only provider (ra_gl) does
texture creation/upload/destruction only. And it's used for the main
video texture only. All other code is still hardcoded to GL.
There is some duplication with ra_format and gl_format handling. In the
end, only the ra variants will be needed (plus the gl_format table of
course). For now, this is simpler, because for some reason lots of hwdec
code still requires the GL variants, and would have to be updated to
use the ra ones.
Currently, the video.c code accesses private ra_gl fields. In the end,
it should not do that of course, and it would not include ra_gl.h.
Probably adds bugs, but you can keep them.
2017-07-26 09:19:51 +00:00
|
|
|
return buffer;
|
2017-07-23 07:41:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void gc_pending_dr_fences(struct gl_video *p, bool force)
|
|
|
|
{
|
|
|
|
again:;
|
|
|
|
for (int n = 0; n < p->num_dr_buffers; n++) {
|
|
|
|
struct dr_buffer *buffer = &p->dr_buffers[n];
|
vo_opengl: start work on rendering API abstraction
This starts work on moving OpenGL-specific code out of the general
renderer code, so that we can support other other GPU APIs. This is in
a very early stage and it's only a proof of concept. It's unknown
whether this will succeed or result in other backends.
For now, the GL rendering API ("ra") and its only provider (ra_gl) does
texture creation/upload/destruction only. And it's used for the main
video texture only. All other code is still hardcoded to GL.
There is some duplication with ra_format and gl_format handling. In the
end, only the ra variants will be needed (plus the gl_format table of
course). For now, this is simpler, because for some reason lots of hwdec
code still requires the GL variants, and would have to be updated to
use the ra ones.
Currently, the video.c code accesses private ra_gl fields. In the end,
it should not do that of course, and it would not include ra_gl.h.
Probably adds bugs, but you can keep them.
2017-07-26 09:19:51 +00:00
|
|
|
if (!buffer->mpi)
|
2017-07-23 07:41:51 +00:00
|
|
|
continue;
|
|
|
|
|
2017-08-16 20:13:51 +00:00
|
|
|
bool res = p->ra->fns->buf_poll(p->ra, buffer->buf);
|
vo_opengl: start work on rendering API abstraction
This starts work on moving OpenGL-specific code out of the general
renderer code, so that we can support other other GPU APIs. This is in
a very early stage and it's only a proof of concept. It's unknown
whether this will succeed or result in other backends.
For now, the GL rendering API ("ra") and its only provider (ra_gl) does
texture creation/upload/destruction only. And it's used for the main
video texture only. All other code is still hardcoded to GL.
There is some duplication with ra_format and gl_format handling. In the
end, only the ra variants will be needed (plus the gl_format table of
course). For now, this is simpler, because for some reason lots of hwdec
code still requires the GL variants, and would have to be updated to
use the ra ones.
Currently, the video.c code accesses private ra_gl fields. In the end,
it should not do that of course, and it would not include ra_gl.h.
Probably adds bugs, but you can keep them.
2017-07-26 09:19:51 +00:00
|
|
|
if (res || force) {
|
2017-07-23 07:41:51 +00:00
|
|
|
// Unreferencing the image could cause gl_video_dr_free_buffer()
|
|
|
|
// to be called by the talloc destructor (if it was the last
|
|
|
|
// reference). This will implicitly invalidate the buffer pointer
|
|
|
|
// and change the p->dr_buffers array. To make it worse, it could
|
|
|
|
// free multiple dr_buffers due to weird theoretical corner cases.
|
|
|
|
// This is also why we use the goto to iterate again from the
|
|
|
|
// start, because everything gets fucked up. Hail satan!
|
|
|
|
struct mp_image *ref = buffer->mpi;
|
|
|
|
buffer->mpi = NULL;
|
|
|
|
talloc_free(ref);
|
|
|
|
goto again;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-06-20 11:57:46 +00:00
|
|
|
static void unref_current_image(struct gl_video *p)
|
|
|
|
{
|
2017-11-03 14:11:14 +00:00
|
|
|
struct video_image *vimg = &p->image;
|
|
|
|
|
|
|
|
if (vimg->hwdec_mapped) {
|
|
|
|
assert(p->hwdec_active && p->hwdec_mapper);
|
|
|
|
ra_hwdec_mapper_unmap(p->hwdec_mapper);
|
|
|
|
memset(vimg->planes, 0, sizeof(vimg->planes));
|
|
|
|
vimg->hwdec_mapped = false;
|
|
|
|
}
|
2017-07-23 07:41:51 +00:00
|
|
|
|
2017-11-03 14:11:14 +00:00
|
|
|
vimg->id = 0;
|
|
|
|
|
|
|
|
mp_image_unrefp(&vimg->mpi);
|
2017-07-23 07:41:51 +00:00
|
|
|
|
|
|
|
// While we're at it, also garbage collect pending fences in here to
|
|
|
|
// get it out of the way.
|
|
|
|
gc_pending_dr_fences(p, false);
|
vo_opengl: refactor how hwdec interop exports textures
Rename gl_hwdec_driver.map_image to map_frame, and let it fill out a
struct gl_hwdec_frame describing the exact texture layout. This gives
more flexibility to what the hwdec interop can export. In particular, it
can export strange component orders/permutations and textures with
padded size. (The latter originating from cropped video.)
The way gl_hwdec_frame works is in the spirit of the rest of the
vo_opengl video processing code, which tends to put as much information
in immediate state (as part of the dataflow), instead of declaring it
globally. To some degree this duplicates the texplane and img_tex
structs, but until we somehow unify those, it's better to give the hwdec
state its own struct. The fact that changing the hwdec struct would
require changes and testing on at least 4 platform/GPU combinations
makes duplicating it almost a requirement to avoid pain later.
Make gl_hwdec_driver.reinit set the new image format and remove the
gl_hwdec.converted_imgfmt field.
Likewise, gl_hwdec.gl_texture_target is replaced with
gl_hwdec_plane.gl_target.
Split out a init_image_desc function from init_format. The latter is not
called in the hwdec case at all anymore. Setting up most of struct
texplane is also completely separate in the hwdec and normal cases.
video.c does not check whether the hwdec "mapped" image format is
supported. This should not really happen anyway, and if it does, the
hwdec interop backend must fail at creation time, so this is not an
issue.
2016-05-10 16:29:10 +00:00
|
|
|
}
|
|
|
|
|
2017-05-02 15:09:16 +00:00
|
|
|
// If overlay mode is used, make sure to remove the overlay.
|
|
|
|
// Be careful with this. Removing the overlay and adding another one will
|
|
|
|
// lead to flickering artifacts.
|
|
|
|
static void unmap_overlay(struct gl_video *p)
|
|
|
|
{
|
vo_gpu: make it possible to load multiple hwdec interop drivers
Make the VO<->decoder interface capable of supporting multiple hwdec
APIs at once. The main gain is that this simplifies autoprobing a lot.
Before this change, it could happen that the VO loaded the "wrong" hwdec
API, and the decoder was stuck with the choice (breaking hw decoding).
With the change applied, the VO simply loads all available APIs, so
autoprobing trickery is left entirely to the decoder.
In the past, we were quite careful about not accidentally loading the
wrong interop drivers. This was in part to make sure autoprobing works,
but also because libva had this obnoxious bug of dumping garbage to
stderr when using the API. libva was fixed, so this is not a problem
anymore.
The --opengl-hwdec-interop option is changed in various ways (again...),
and renamed to --gpu-hwdec-interop. It does not have much use anymore,
other than debugging. It's notable that the order in the hwdec interop
array ra_hwdec_drivers[] still matters if multiple drivers support the
same image formats, so the option can explicitly force one, if that
should ever be necessary, or more likely, for debugging. One example are
the ra_hwdec_d3d11egl and ra_hwdec_d3d11eglrgb drivers, which both
support d3d11 input.
vo_gpu now always loads the interop lazily by default, but when it does,
it loads them all. vo_opengl_cb now always loads them when the GL
context handle is initialized. I don't expect that this causes any
problems.
It's now possible to do things like changing between vdpau and nvdec
decoding at runtime.
This is also preparation for cleaning up vd_lavc.c hwdec autoprobing.
It's another reason why hwdec_devices_request_all() does not take a
hwdec type anymore.
2017-12-01 04:05:00 +00:00
|
|
|
if (p->hwdec_overlay)
|
|
|
|
p->hwdec_overlay->driver->overlay_frame(p->hwdec_overlay, NULL, NULL, NULL, true);
|
2017-05-02 15:09:16 +00:00
|
|
|
}
|
|
|
|
|
2013-03-01 20:19:20 +00:00
|
|
|
static void uninit_video(struct gl_video *p)
|
|
|
|
{
|
|
|
|
uninit_rendering(p);
|
|
|
|
|
2013-03-28 19:40:19 +00:00
|
|
|
struct video_image *vimg = &p->image;
|
|
|
|
|
2017-05-02 15:09:16 +00:00
|
|
|
unmap_overlay(p);
|
vo_opengl: refactor how hwdec interop exports textures
Rename gl_hwdec_driver.map_image to map_frame, and let it fill out a
struct gl_hwdec_frame describing the exact texture layout. This gives
more flexibility to what the hwdec interop can export. In particular, it
can export strange component orders/permutations and textures with
padded size. (The latter originating from cropped video.)
The way gl_hwdec_frame works is in the spirit of the rest of the
vo_opengl video processing code, which tends to put as much information
in immediate state (as part of the dataflow), instead of declaring it
globally. To some degree this duplicates the texplane and img_tex
structs, but until we somehow unify those, it's better to give the hwdec
state its own struct. The fact that changing the hwdec struct would
require changes and testing on at least 4 platform/GPU combinations
makes duplicating it almost a requirement to avoid pain later.
Make gl_hwdec_driver.reinit set the new image format and remove the
gl_hwdec.converted_imgfmt field.
Likewise, gl_hwdec.gl_texture_target is replaced with
gl_hwdec_plane.gl_target.
Split out a init_image_desc function from init_format. The latter is not
called in the hwdec case at all anymore. Setting up most of struct
texplane is also completely separate in the hwdec and normal cases.
video.c does not check whether the hwdec "mapped" image format is
supported. This should not really happen anyway, and if it does, the
hwdec interop backend must fail at creation time, so this is not an
issue.
2016-05-10 16:29:10 +00:00
|
|
|
unref_current_image(p);
|
|
|
|
|
2015-03-13 23:32:20 +00:00
|
|
|
for (int n = 0; n < p->plane_count; n++) {
|
2013-03-28 19:40:19 +00:00
|
|
|
struct texplane *plane = &vimg->planes[n];
|
2017-08-04 13:47:50 +00:00
|
|
|
ra_tex_free(p->ra, &plane->tex);
|
2013-03-01 20:19:20 +00:00
|
|
|
}
|
vo_opengl: refactor how hwdec interop exports textures
Rename gl_hwdec_driver.map_image to map_frame, and let it fill out a
struct gl_hwdec_frame describing the exact texture layout. This gives
more flexibility to what the hwdec interop can export. In particular, it
can export strange component orders/permutations and textures with
padded size. (The latter originating from cropped video.)
The way gl_hwdec_frame works is in the spirit of the rest of the
vo_opengl video processing code, which tends to put as much information
in immediate state (as part of the dataflow), instead of declaring it
globally. To some degree this duplicates the texplane and img_tex
structs, but until we somehow unify those, it's better to give the hwdec
state its own struct. The fact that changing the hwdec struct would
require changes and testing on at least 4 platform/GPU combinations
makes duplicating it almost a requirement to avoid pain later.
Make gl_hwdec_driver.reinit set the new image format and remove the
gl_hwdec.converted_imgfmt field.
Likewise, gl_hwdec.gl_texture_target is replaced with
gl_hwdec_plane.gl_target.
Split out a init_image_desc function from init_format. The latter is not
called in the hwdec case at all anymore. Setting up most of struct
texplane is also completely separate in the hwdec and normal cases.
video.c does not check whether the hwdec "mapped" image format is
supported. This should not really happen anyway, and if it does, the
hwdec interop backend must fail at creation time, so this is not an
issue.
2016-05-10 16:29:10 +00:00
|
|
|
*vimg = (struct video_image){0};
|
2013-03-01 20:19:20 +00:00
|
|
|
|
2015-01-07 18:00:26 +00:00
|
|
|
// Invalidate image_params to ensure that gl_video_config() will call
|
|
|
|
// init_video() on uninitialized gl_video.
|
2015-01-29 18:53:49 +00:00
|
|
|
p->real_image_params = (struct mp_image_params){0};
|
|
|
|
p->image_params = p->real_image_params;
|
vo_opengl: refactor how hwdec interop exports textures
Rename gl_hwdec_driver.map_image to map_frame, and let it fill out a
struct gl_hwdec_frame describing the exact texture layout. This gives
more flexibility to what the hwdec interop can export. In particular, it
can export strange component orders/permutations and textures with
padded size. (The latter originating from cropped video.)
The way gl_hwdec_frame works is in the spirit of the rest of the
vo_opengl video processing code, which tends to put as much information
in immediate state (as part of the dataflow), instead of declaring it
globally. To some degree this duplicates the texplane and img_tex
structs, but until we somehow unify those, it's better to give the hwdec
state its own struct. The fact that changing the hwdec struct would
require changes and testing on at least 4 platform/GPU combinations
makes duplicating it almost a requirement to avoid pain later.
Make gl_hwdec_driver.reinit set the new image format and remove the
gl_hwdec.converted_imgfmt field.
Likewise, gl_hwdec.gl_texture_target is replaced with
gl_hwdec_plane.gl_target.
Split out a init_image_desc function from init_format. The latter is not
called in the hwdec case at all anymore. Setting up most of struct
texplane is also completely separate in the hwdec and normal cases.
video.c does not check whether the hwdec "mapped" image format is
supported. This should not really happen anyway, and if it does, the
hwdec interop backend must fail at creation time, so this is not an
issue.
2016-05-10 16:29:10 +00:00
|
|
|
p->hwdec_active = false;
|
vo_gpu: make it possible to load multiple hwdec interop drivers
Make the VO<->decoder interface capable of supporting multiple hwdec
APIs at once. The main gain is that this simplifies autoprobing a lot.
Before this change, it could happen that the VO loaded the "wrong" hwdec
API, and the decoder was stuck with the choice (breaking hw decoding).
With the change applied, the VO simply loads all available APIs, so
autoprobing trickery is left entirely to the decoder.
In the past, we were quite careful about not accidentally loading the
wrong interop drivers. This was in part to make sure autoprobing works,
but also because libva had this obnoxious bug of dumping garbage to
stderr when using the API. libva was fixed, so this is not a problem
anymore.
The --opengl-hwdec-interop option is changed in various ways (again...),
and renamed to --gpu-hwdec-interop. It does not have much use anymore,
other than debugging. It's notable that the order in the hwdec interop
array ra_hwdec_drivers[] still matters if multiple drivers support the
same image formats, so the option can explicitly force one, if that
should ever be necessary, or more likely, for debugging. One example are
the ra_hwdec_d3d11egl and ra_hwdec_d3d11eglrgb drivers, which both
support d3d11 input.
vo_gpu now always loads the interop lazily by default, but when it does,
it loads them all. vo_opengl_cb now always loads them when the GL
context handle is initialized. I don't expect that this causes any
problems.
It's now possible to do things like changing between vdpau and nvdec
decoding at runtime.
This is also preparation for cleaning up vd_lavc.c hwdec autoprobing.
It's another reason why hwdec_devices_request_all() does not take a
hwdec type anymore.
2017-12-01 04:05:00 +00:00
|
|
|
p->hwdec_overlay = NULL;
|
vo_opengl: separate hwdec context and mapping, port it to use ra
This does two separate rather intrusive things:
1. Make the hwdec context (which does initialization, provides the
device to the decoder, and other basic state) and frame mapping
(getting textures from a mp_image) separate. This is more
flexible, and you could map multiple images at once. It will
help removing some hwdec special-casing from video.c.
2. Switch all hwdec API use to ra. Of course all code is still
GL specific, but in theory it would be possible to support other
backends. The most important change is that the hwdec interop
returns ra objects, instead of anything GL specific. This removes
the last dependency on GL-specific header files from video.c.
I'm mixing these separate changes because both requires essentially
rewriting all the glue code, so better do them at once. For the same
reason, this change isn't done incrementally.
hwdec_ios.m is untested, since I can't test it. Apart from superficial
mistakes, this also requires dealing with Apple's texture format
fuckups: they force you to use GL_LUMINANCE[_ALPHA] instead of GL_RED
and GL_RG. We also need to report the correct format via ra_tex to
the renderer, which is done by find_la_variant(). It's unknown whether
this works correctly.
hwdec_rpi.c as well as vo_rpi.c are still broken. (I need to pull my
RPI out of a dusty pile of devices and cables, so, later.)
2017-08-10 15:48:33 +00:00
|
|
|
ra_hwdec_mapper_free(&p->hwdec_mapper);
|
2018-01-26 23:31:14 +00:00
|
|
|
|
|
|
|
for (int n = 0; n < 2; n++)
|
|
|
|
ra_tex_free(p->ra, &p->vdpau_deinterleave_tex[n]);
|
2013-03-01 20:19:20 +00:00
|
|
|
}
|
|
|
|
|
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number
of changes and improvements:
1. mpv users can now introspect the vo_opengl passes, which is something
that has been requested multiple times.
2. performance data is now measured per-pass, which helps both
development and debugging.
3. since adding more passes is cheap, we can now report information for
more passes (e.g. the blit pass, and the osd pass). Note: we also
switch to nanosecond scale, to be able to measure these passes
better.
4. `--user-shaders` authors can now describe their own passes, helping
users both identify which user shaders are active at any given time
as well as helping shader authors identify performance issues.
5. the timing data per pass is now exported as a full list of samples,
so projects like Argon-/mpv-stats can immediately read out all of the
samples and render a graph without having to manually poll this
option constantly.
Due to gl_timer's design being complicated (directly reading performance
data would block, so we delay the actual read-back until the next _start
command), it's vital not to conflate different passes that might be
doing different things from one frame to another. To accomplish this,
the actual timers are stored as part of the gl_shader_cache's sc_entry,
which makes them unique for that exact shader.
Starting and stopping the time measurement is easy to unify with the
gl_sc architecture, because the existing API already relies on a
"generate, render, reset" flow, so we can just put timer_start and
timer_stop in sc_generate and sc_reset, respectively.
The ugliest thing about this code is that due to the need to keep pass
information relatively stable in between frames, we need to distinguish
between "new" and "redrawn" frames, which bloats the code somewhat and
also feels hacky and vo_opengl-specific. (But then again, this entire
thing is vo_opengl-specific)
2017-06-29 15:00:06 +00:00
|
|
|
static void pass_record(struct gl_video *p, struct mp_pass_perf perf)
|
|
|
|
{
|
2017-09-27 21:38:54 +00:00
|
|
|
if (!p->pass || p->pass_idx == VO_PASS_PERF_MAX)
|
2017-07-03 14:59:38 +00:00
|
|
|
return;
|
|
|
|
|
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number
of changes and improvements:
1. mpv users can now introspect the vo_opengl passes, which is something
that has been requested multiple times.
2. performance data is now measured per-pass, which helps both
development and debugging.
3. since adding more passes is cheap, we can now report information for
more passes (e.g. the blit pass, and the osd pass). Note: we also
switch to nanosecond scale, to be able to measure these passes
better.
4. `--user-shaders` authors can now describe their own passes, helping
users both identify which user shaders are active at any given time
as well as helping shader authors identify performance issues.
5. the timing data per pass is now exported as a full list of samples,
so projects like Argon-/mpv-stats can immediately read out all of the
samples and render a graph without having to manually poll this
option constantly.
Due to gl_timer's design being complicated (directly reading performance
data would block, so we delay the actual read-back until the next _start
command), it's vital not to conflate different passes that might be
doing different things from one frame to another. To accomplish this,
the actual timers are stored as part of the gl_shader_cache's sc_entry,
which makes them unique for that exact shader.
Starting and stopping the time measurement is easy to unify with the
gl_sc architecture, because the existing API already relies on a
"generate, render, reset" flow, so we can just put timer_start and
timer_stop in sc_generate and sc_reset, respectively.
The ugliest thing about this code is that due to the need to keep pass
information relatively stable in between frames, we need to distinguish
between "new" and "redrawn" frames, which bloats the code somewhat and
also feels hacky and vo_opengl-specific. (But then again, this entire
thing is vo_opengl-specific)
2017-06-29 15:00:06 +00:00
|
|
|
struct pass_info *pass = &p->pass[p->pass_idx];
|
|
|
|
pass->perf = perf;
|
|
|
|
|
|
|
|
if (pass->desc.len == 0)
|
|
|
|
bstr_xappend(p, &pass->desc, bstr0("(unknown)"));
|
|
|
|
|
|
|
|
p->pass_idx++;
|
|
|
|
}
|
|
|
|
|
2017-07-22 20:08:23 +00:00
|
|
|
PRINTF_ATTRIBUTE(2, 3)
|
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number
of changes and improvements:
1. mpv users can now introspect the vo_opengl passes, which is something
that has been requested multiple times.
2. performance data is now measured per-pass, which helps both
development and debugging.
3. since adding more passes is cheap, we can now report information for
more passes (e.g. the blit pass, and the osd pass). Note: we also
switch to nanosecond scale, to be able to measure these passes
better.
4. `--user-shaders` authors can now describe their own passes, helping
users both identify which user shaders are active at any given time
as well as helping shader authors identify performance issues.
5. the timing data per pass is now exported as a full list of samples,
so projects like Argon-/mpv-stats can immediately read out all of the
samples and render a graph without having to manually poll this
option constantly.
Due to gl_timer's design being complicated (directly reading performance
data would block, so we delay the actual read-back until the next _start
command), it's vital not to conflate different passes that might be
doing different things from one frame to another. To accomplish this,
the actual timers are stored as part of the gl_shader_cache's sc_entry,
which makes them unique for that exact shader.
Starting and stopping the time measurement is easy to unify with the
gl_sc architecture, because the existing API already relies on a
"generate, render, reset" flow, so we can just put timer_start and
timer_stop in sc_generate and sc_reset, respectively.
The ugliest thing about this code is that due to the need to keep pass
information relatively stable in between frames, we need to distinguish
between "new" and "redrawn" frames, which bloats the code somewhat and
also feels hacky and vo_opengl-specific. (But then again, this entire
thing is vo_opengl-specific)
2017-06-29 15:00:06 +00:00
|
|
|
static void pass_describe(struct gl_video *p, const char *textf, ...)
|
|
|
|
{
|
2017-09-27 21:38:54 +00:00
|
|
|
if (!p->pass || p->pass_idx == VO_PASS_PERF_MAX)
|
2017-07-03 14:59:38 +00:00
|
|
|
return;
|
|
|
|
|
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number
of changes and improvements:
1. mpv users can now introspect the vo_opengl passes, which is something
that has been requested multiple times.
2. performance data is now measured per-pass, which helps both
development and debugging.
3. since adding more passes is cheap, we can now report information for
more passes (e.g. the blit pass, and the osd pass). Note: we also
switch to nanosecond scale, to be able to measure these passes
better.
4. `--user-shaders` authors can now describe their own passes, helping
users both identify which user shaders are active at any given time
as well as helping shader authors identify performance issues.
5. the timing data per pass is now exported as a full list of samples,
so projects like Argon-/mpv-stats can immediately read out all of the
samples and render a graph without having to manually poll this
option constantly.
Due to gl_timer's design being complicated (directly reading performance
data would block, so we delay the actual read-back until the next _start
command), it's vital not to conflate different passes that might be
doing different things from one frame to another. To accomplish this,
the actual timers are stored as part of the gl_shader_cache's sc_entry,
which makes them unique for that exact shader.
Starting and stopping the time measurement is easy to unify with the
gl_sc architecture, because the existing API already relies on a
"generate, render, reset" flow, so we can just put timer_start and
timer_stop in sc_generate and sc_reset, respectively.
The ugliest thing about this code is that due to the need to keep pass
information relatively stable in between frames, we need to distinguish
between "new" and "redrawn" frames, which bloats the code somewhat and
also feels hacky and vo_opengl-specific. (But then again, this entire
thing is vo_opengl-specific)
2017-06-29 15:00:06 +00:00
|
|
|
struct pass_info *pass = &p->pass[p->pass_idx];
|
|
|
|
|
|
|
|
if (pass->desc.len > 0)
|
|
|
|
bstr_xappend(p, &pass->desc, bstr0(" + "));
|
|
|
|
|
|
|
|
va_list ap;
|
|
|
|
va_start(ap, textf);
|
|
|
|
bstr_xappend_vasprintf(p, &pass->desc, textf, ap);
|
|
|
|
va_end(ap);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void pass_info_reset(struct gl_video *p, bool is_redraw)
|
|
|
|
{
|
|
|
|
p->pass = is_redraw ? p->pass_redraw : p->pass_fresh;
|
|
|
|
p->pass_idx = 0;
|
|
|
|
|
2017-09-27 21:38:54 +00:00
|
|
|
for (int i = 0; i < VO_PASS_PERF_MAX; i++) {
|
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number
of changes and improvements:
1. mpv users can now introspect the vo_opengl passes, which is something
that has been requested multiple times.
2. performance data is now measured per-pass, which helps both
development and debugging.
3. since adding more passes is cheap, we can now report information for
more passes (e.g. the blit pass, and the osd pass). Note: we also
switch to nanosecond scale, to be able to measure these passes
better.
4. `--user-shaders` authors can now describe their own passes, helping
users both identify which user shaders are active at any given time
as well as helping shader authors identify performance issues.
5. the timing data per pass is now exported as a full list of samples,
so projects like Argon-/mpv-stats can immediately read out all of the
samples and render a graph without having to manually poll this
option constantly.
Due to gl_timer's design being complicated (directly reading performance
data would block, so we delay the actual read-back until the next _start
command), it's vital not to conflate different passes that might be
doing different things from one frame to another. To accomplish this,
the actual timers are stored as part of the gl_shader_cache's sc_entry,
which makes them unique for that exact shader.
Starting and stopping the time measurement is easy to unify with the
gl_sc architecture, because the existing API already relies on a
"generate, render, reset" flow, so we can just put timer_start and
timer_stop in sc_generate and sc_reset, respectively.
The ugliest thing about this code is that due to the need to keep pass
information relatively stable in between frames, we need to distinguish
between "new" and "redrawn" frames, which bloats the code somewhat and
also feels hacky and vo_opengl-specific. (But then again, this entire
thing is vo_opengl-specific)
2017-06-29 15:00:06 +00:00
|
|
|
p->pass[i].desc.len = 0;
|
|
|
|
p->pass[i].perf = (struct mp_pass_perf){0};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-07-03 14:59:38 +00:00
|
|
|
static void pass_report_performance(struct gl_video *p)
|
|
|
|
{
|
|
|
|
if (!p->pass)
|
|
|
|
return;
|
|
|
|
|
2017-09-27 21:38:54 +00:00
|
|
|
for (int i = 0; i < VO_PASS_PERF_MAX; i++) {
|
2017-07-03 14:59:38 +00:00
|
|
|
struct pass_info *pass = &p->pass[i];
|
|
|
|
if (pass->desc.len) {
|
2017-09-28 09:53:57 +00:00
|
|
|
MP_TRACE(p, "pass '%.*s': last %dus avg %dus peak %dus\n",
|
|
|
|
BSTR_P(pass->desc),
|
|
|
|
(int)pass->perf.last/1000,
|
|
|
|
(int)pass->perf.avg/1000,
|
|
|
|
(int)pass->perf.peak/1000);
|
2017-07-03 14:59:38 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
static void pass_prepare_src_tex(struct gl_video *p)
|
2013-05-25 23:48:39 +00:00
|
|
|
{
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
struct gl_shader_cache *sc = p->sc;
|
|
|
|
|
2017-09-27 22:07:42 +00:00
|
|
|
for (int n = 0; n < p->num_pass_imgs; n++) {
|
|
|
|
struct image *s = &p->pass_imgs[n];
|
2017-08-04 13:47:50 +00:00
|
|
|
if (!s->tex)
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
continue;
|
|
|
|
|
2017-07-24 06:07:32 +00:00
|
|
|
char *texture_name = mp_tprintf(32, "texture%d", n);
|
|
|
|
char *texture_size = mp_tprintf(32, "texture_size%d", n);
|
|
|
|
char *texture_rot = mp_tprintf(32, "texture_rot%d", n);
|
2017-07-17 16:11:32 +00:00
|
|
|
char *texture_off = mp_tprintf(32, "texture_off%d", n);
|
2017-07-24 06:07:32 +00:00
|
|
|
char *pixel_size = mp_tprintf(32, "pixel_size%d", n);
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
|
2017-08-04 13:47:50 +00:00
|
|
|
gl_sc_uniform_texture(sc, texture_name, s->tex);
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
float f[2] = {1, 1};
|
2017-08-04 13:47:50 +00:00
|
|
|
if (!s->tex->params.non_normalized) {
|
|
|
|
f[0] = s->tex->params.w;
|
|
|
|
f[1] = s->tex->params.h;
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
}
|
|
|
|
gl_sc_uniform_vec2(sc, texture_size, f);
|
2016-05-14 05:05:04 +00:00
|
|
|
gl_sc_uniform_mat2(sc, texture_rot, true, (float *)s->transform.m);
|
2017-07-17 16:11:32 +00:00
|
|
|
gl_sc_uniform_vec2(sc, texture_off, (float *)s->transform.t);
|
2017-08-07 17:14:18 +00:00
|
|
|
gl_sc_uniform_vec2(sc, pixel_size, (float[]){1.0f / f[0],
|
|
|
|
1.0f / f[1]});
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
}
|
|
|
|
}
|
2013-05-25 23:48:39 +00:00
|
|
|
|
2017-09-27 22:07:42 +00:00
|
|
|
static void cleanup_binds(struct gl_video *p)
|
|
|
|
{
|
|
|
|
p->num_pass_imgs = 0;
|
|
|
|
}
|
|
|
|
|
2017-07-29 18:41:50 +00:00
|
|
|
// Sets the appropriate compute shader metadata for an implicit compute pass
|
|
|
|
// bw/bh: block size
|
|
|
|
static void pass_is_compute(struct gl_video *p, int bw, int bh)
|
2017-07-17 16:11:32 +00:00
|
|
|
{
|
2017-07-29 18:41:50 +00:00
|
|
|
p->pass_compute = (struct compute_info){
|
|
|
|
.active = true,
|
|
|
|
.block_w = bw,
|
|
|
|
.block_h = bh,
|
|
|
|
};
|
2017-07-17 16:11:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// w/h: the width/height of the compute shader's operating domain (e.g. the
|
|
|
|
// target target that needs to be written, or the source texture that needs to
|
|
|
|
// be reduced)
|
2017-07-29 18:41:50 +00:00
|
|
|
static void dispatch_compute(struct gl_video *p, int w, int h,
|
|
|
|
struct compute_info info)
|
2017-07-17 16:11:32 +00:00
|
|
|
{
|
2017-07-29 18:41:50 +00:00
|
|
|
PRELUDE("layout (local_size_x = %d, local_size_y = %d) in;\n",
|
|
|
|
info.threads_w > 0 ? info.threads_w : info.block_w,
|
|
|
|
info.threads_h > 0 ? info.threads_h : info.block_h);
|
2017-07-17 16:11:32 +00:00
|
|
|
|
|
|
|
pass_prepare_src_tex(p);
|
|
|
|
|
|
|
|
// Since we don't actually have vertices, we pretend for convenience
|
|
|
|
// reasons that we do and calculate the right texture coordinates based on
|
|
|
|
// the output sample ID
|
2017-08-05 12:20:14 +00:00
|
|
|
gl_sc_uniform_vec2(p->sc, "out_scale", (float[2]){ 1.0 / w, 1.0 / h });
|
2017-07-17 16:11:32 +00:00
|
|
|
PRELUDE("#define outcoord(id) (out_scale * (vec2(id) + vec2(0.5)))\n");
|
|
|
|
|
2017-09-27 22:07:42 +00:00
|
|
|
for (int n = 0; n < p->num_pass_imgs; n++) {
|
|
|
|
struct image *s = &p->pass_imgs[n];
|
2017-08-04 13:47:50 +00:00
|
|
|
if (!s->tex)
|
2017-07-17 16:11:32 +00:00
|
|
|
continue;
|
|
|
|
|
|
|
|
// We need to rescale the coordinates to the true texture size
|
2017-09-27 22:07:42 +00:00
|
|
|
char *tex_scale = mp_tprintf(32, "tex_scale%d", n);
|
2017-08-07 17:14:18 +00:00
|
|
|
gl_sc_uniform_vec2(p->sc, tex_scale, (float[2]){
|
2017-08-04 13:47:50 +00:00
|
|
|
(float)s->w / s->tex->params.w,
|
|
|
|
(float)s->h / s->tex->params.h,
|
2017-07-17 16:11:32 +00:00
|
|
|
});
|
|
|
|
|
2017-09-20 09:03:49 +00:00
|
|
|
PRELUDE("#define texmap%d_raw(id) (tex_scale%d * outcoord(id))\n", n, n);
|
|
|
|
PRELUDE("#define texmap%d(id) (texture_rot%d * texmap%d_raw(id) + "
|
2017-07-17 16:11:32 +00:00
|
|
|
"pixel_size%d * texture_off%d)\n", n, n, n, n, n);
|
2017-08-03 16:50:07 +00:00
|
|
|
PRELUDE("#define texcoord%d texmap%d(gl_GlobalInvocationID)\n", n, n);
|
2017-07-17 16:11:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// always round up when dividing to make sure we don't leave off a part of
|
|
|
|
// the image
|
2017-07-29 18:41:50 +00:00
|
|
|
int num_x = info.block_w > 0 ? (w + info.block_w - 1) / info.block_w : 1,
|
|
|
|
num_y = info.block_h > 0 ? (h + info.block_h - 1) / info.block_h : 1;
|
2017-07-17 16:11:32 +00:00
|
|
|
|
2017-08-05 12:20:14 +00:00
|
|
|
pass_record(p, gl_sc_dispatch_compute(p->sc, num_x, num_y, 1));
|
2017-09-23 07:54:42 +00:00
|
|
|
cleanup_binds(p);
|
2017-07-17 16:11:32 +00:00
|
|
|
}
|
|
|
|
|
2017-08-07 14:44:15 +00:00
|
|
|
static struct mp_pass_perf render_pass_quad(struct gl_video *p,
|
2017-08-18 00:31:58 +00:00
|
|
|
struct ra_fbo fbo, bool discard,
|
2017-08-05 12:20:14 +00:00
|
|
|
const struct mp_rect *dst)
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
{
|
2017-09-27 22:07:42 +00:00
|
|
|
// The first element is reserved for `vec2 position`
|
|
|
|
int num_vertex_attribs = 1 + p->num_pass_imgs;
|
|
|
|
size_t vertex_stride = num_vertex_attribs * sizeof(struct vertex_pt);
|
|
|
|
|
|
|
|
// Expand the VAO if necessary
|
|
|
|
while (p->vao_len < num_vertex_attribs) {
|
|
|
|
MP_TARRAY_APPEND(p, p->vao, p->vao_len, (struct ra_renderpass_input) {
|
|
|
|
.name = talloc_asprintf(p, "texcoord%d", p->vao_len - 1),
|
|
|
|
.type = RA_VARTYPE_FLOAT,
|
|
|
|
.dim_v = 2,
|
|
|
|
.dim_m = 1,
|
|
|
|
.offset = p->vao_len * sizeof(struct vertex_pt),
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
int num_vertices = 6; // quad as triangle list
|
|
|
|
int num_attribs_total = num_vertices * num_vertex_attribs;
|
|
|
|
MP_TARRAY_GROW(p, p->tmp_vertex, num_attribs_total);
|
2013-05-25 23:48:39 +00:00
|
|
|
|
2015-03-13 20:14:18 +00:00
|
|
|
struct gl_transform t;
|
2017-09-20 08:45:33 +00:00
|
|
|
gl_transform_ortho_fbo(&t, fbo);
|
2013-05-25 23:48:39 +00:00
|
|
|
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
float x[2] = {dst->x0, dst->x1};
|
|
|
|
float y[2] = {dst->y0, dst->y1};
|
2015-03-13 20:14:18 +00:00
|
|
|
gl_transform_vec(t, &x[0], &y[0]);
|
|
|
|
gl_transform_vec(t, &x[1], &y[1]);
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
|
|
|
|
for (int n = 0; n < 4; n++) {
|
2017-09-27 22:07:42 +00:00
|
|
|
struct vertex_pt *vs = &p->tmp_vertex[num_vertex_attribs * n];
|
|
|
|
// vec2 position in idx 0
|
|
|
|
vs[0].x = x[n / 2];
|
|
|
|
vs[0].y = y[n % 2];
|
|
|
|
for (int i = 0; i < p->num_pass_imgs; i++) {
|
|
|
|
struct image *s = &p->pass_imgs[i];
|
2017-08-04 13:47:50 +00:00
|
|
|
if (!s->tex)
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
continue;
|
2016-04-08 20:21:31 +00:00
|
|
|
struct gl_transform tr = s->transform;
|
2016-03-28 14:30:48 +00:00
|
|
|
float tx = (n / 2) * s->w;
|
|
|
|
float ty = (n % 2) * s->h;
|
2016-04-08 20:21:31 +00:00
|
|
|
gl_transform_vec(tr, &tx, &ty);
|
2017-08-04 13:47:50 +00:00
|
|
|
bool rect = s->tex->params.non_normalized;
|
2017-09-27 22:07:42 +00:00
|
|
|
// vec2 texcoordN in idx N+1
|
|
|
|
vs[i + 1].x = tx / (rect ? 1 : s->tex->params.w);
|
|
|
|
vs[i + 1].y = ty / (rect ? 1 : s->tex->params.h);
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-09-27 22:07:42 +00:00
|
|
|
memmove(&p->tmp_vertex[num_vertex_attribs * 4],
|
|
|
|
&p->tmp_vertex[num_vertex_attribs * 2],
|
|
|
|
vertex_stride);
|
2017-08-04 16:22:26 +00:00
|
|
|
|
2017-09-27 22:07:42 +00:00
|
|
|
memmove(&p->tmp_vertex[num_vertex_attribs * 5],
|
|
|
|
&p->tmp_vertex[num_vertex_attribs * 1],
|
|
|
|
vertex_stride);
|
|
|
|
|
2017-08-18 00:31:58 +00:00
|
|
|
return gl_sc_dispatch_draw(p->sc, fbo.tex, discard, p->vao, num_vertex_attribs,
|
2017-09-28 10:50:45 +00:00
|
|
|
vertex_stride, p->tmp_vertex, num_vertices);
|
2013-05-25 23:48:39 +00:00
|
|
|
}
|
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
static void finish_pass_fbo(struct gl_video *p, struct ra_fbo fbo,
|
2017-08-18 00:31:58 +00:00
|
|
|
bool discard, const struct mp_rect *dst)
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
{
|
|
|
|
pass_prepare_src_tex(p);
|
2017-08-18 00:31:58 +00:00
|
|
|
pass_record(p, render_pass_quad(p, fbo, discard, dst));
|
2017-08-05 12:20:14 +00:00
|
|
|
debug_check_gl(p, "after rendering");
|
2017-09-23 07:54:42 +00:00
|
|
|
cleanup_binds(p);
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// dst_fbo: this will be used for rendering; possibly reallocating the whole
|
|
|
|
// FBO, if the required parameters have changed
|
|
|
|
// w, h: required FBO target dimension, and also defines the target rectangle
|
|
|
|
// used for rasterization
|
2017-09-20 08:45:33 +00:00
|
|
|
static void finish_pass_tex(struct gl_video *p, struct ra_tex **dst_tex,
|
2017-09-22 14:33:25 +00:00
|
|
|
int w, int h)
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
{
|
2017-09-23 07:54:42 +00:00
|
|
|
if (!ra_tex_resize(p->ra, p->log, dst_tex, w, h, p->fbo_format)) {
|
|
|
|
cleanup_binds(p);
|
|
|
|
gl_sc_reset(p->sc);
|
2017-09-20 08:45:33 +00:00
|
|
|
return;
|
2017-09-23 07:54:42 +00:00
|
|
|
}
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
|
2017-09-24 13:21:37 +00:00
|
|
|
// If RA_CAP_PARALLEL_COMPUTE is set, try to prefer compute shaders
|
|
|
|
// over fragment shaders wherever possible.
|
|
|
|
if (!p->pass_compute.active && (p->ra->caps & RA_CAP_PARALLEL_COMPUTE))
|
|
|
|
pass_is_compute(p, 16, 16);
|
|
|
|
|
2017-07-29 18:41:50 +00:00
|
|
|
if (p->pass_compute.active) {
|
2017-09-20 08:45:33 +00:00
|
|
|
gl_sc_uniform_image2D_wo(p->sc, "out_image", *dst_tex);
|
2017-07-29 18:41:50 +00:00
|
|
|
if (!p->pass_compute.directly_writes)
|
|
|
|
GLSL(imageStore(out_image, ivec2(gl_GlobalInvocationID), color);)
|
|
|
|
|
|
|
|
dispatch_compute(p, w, h, p->pass_compute);
|
|
|
|
p->pass_compute = (struct compute_info){0};
|
2017-08-05 12:20:14 +00:00
|
|
|
|
|
|
|
debug_check_gl(p, "after dispatching compute shader");
|
2017-07-17 16:11:32 +00:00
|
|
|
} else {
|
2017-09-20 08:45:33 +00:00
|
|
|
struct ra_fbo fbo = { .tex = *dst_tex, };
|
2017-08-18 00:31:58 +00:00
|
|
|
finish_pass_fbo(p, fbo, true, &(struct mp_rect){0, 0, w, h});
|
2017-07-17 16:11:32 +00:00
|
|
|
}
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
}
|
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
static const char *get_tex_swizzle(struct image *img)
|
2017-02-17 14:46:11 +00:00
|
|
|
{
|
vo_opengl: separate hwdec context and mapping, port it to use ra
This does two separate rather intrusive things:
1. Make the hwdec context (which does initialization, provides the
device to the decoder, and other basic state) and frame mapping
(getting textures from a mp_image) separate. This is more
flexible, and you could map multiple images at once. It will
help removing some hwdec special-casing from video.c.
2. Switch all hwdec API use to ra. Of course all code is still
GL specific, but in theory it would be possible to support other
backends. The most important change is that the hwdec interop
returns ra objects, instead of anything GL specific. This removes
the last dependency on GL-specific header files from video.c.
I'm mixing these separate changes because both requires essentially
rewriting all the glue code, so better do them at once. For the same
reason, this change isn't done incrementally.
hwdec_ios.m is untested, since I can't test it. Apart from superficial
mistakes, this also requires dealing with Apple's texture format
fuckups: they force you to use GL_LUMINANCE[_ALPHA] instead of GL_RED
and GL_RG. We also need to report the correct format via ra_tex to
the renderer, which is done by find_la_variant(). It's unknown whether
this works correctly.
hwdec_rpi.c as well as vo_rpi.c are still broken. (I need to pull my
RPI out of a dusty pile of devices and cables, so, later.)
2017-08-10 15:48:33 +00:00
|
|
|
if (!img->tex)
|
|
|
|
return "rgba";
|
2017-08-04 13:47:50 +00:00
|
|
|
return img->tex->params.format->luminance_alpha ? "raaa" : "rgba";
|
2017-02-17 14:46:11 +00:00
|
|
|
}
|
|
|
|
|
2016-04-19 18:45:40 +00:00
|
|
|
// Copy a texture to the vec4 color, while increasing offset. Also applies
|
|
|
|
// the texture multiplier to the sampled color
|
2017-09-20 08:45:33 +00:00
|
|
|
static void copy_image(struct gl_video *p, int *offset, struct image img)
|
2016-04-19 18:45:40 +00:00
|
|
|
{
|
|
|
|
int count = img.components;
|
|
|
|
assert(*offset + count <= 4);
|
|
|
|
|
|
|
|
int id = pass_bind(p, img);
|
|
|
|
char src[5] = {0};
|
|
|
|
char dst[5] = {0};
|
2017-02-17 14:46:11 +00:00
|
|
|
const char *tex_fmt = get_tex_swizzle(&img);
|
2016-04-19 18:45:40 +00:00
|
|
|
const char *dst_fmt = "rgba";
|
|
|
|
for (int i = 0; i < count; i++) {
|
|
|
|
src[i] = tex_fmt[i];
|
|
|
|
dst[i] = dst_fmt[*offset + i];
|
|
|
|
}
|
|
|
|
|
2017-08-04 13:47:50 +00:00
|
|
|
if (img.tex && img.tex->params.format->ctype == RA_CTYPE_UINT) {
|
vo_opengl: start work on rendering API abstraction
This starts work on moving OpenGL-specific code out of the general
renderer code, so that we can support other other GPU APIs. This is in
a very early stage and it's only a proof of concept. It's unknown
whether this will succeed or result in other backends.
For now, the GL rendering API ("ra") and its only provider (ra_gl) does
texture creation/upload/destruction only. And it's used for the main
video texture only. All other code is still hardcoded to GL.
There is some duplication with ra_format and gl_format handling. In the
end, only the ra variants will be needed (plus the gl_format table of
course). For now, this is simpler, because for some reason lots of hwdec
code still requires the GL variants, and would have to be updated to
use the ra ones.
Currently, the video.c code accesses private ra_gl fields. In the end,
it should not do that of course, and it would not include ra_gl.h.
Probably adds bugs, but you can keep them.
2017-07-26 09:19:51 +00:00
|
|
|
uint64_t tex_max = 1ull << p->ra_format.component_bits;
|
2016-04-19 18:45:40 +00:00
|
|
|
img.multiplier *= 1.0 / (tex_max - 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
GLSLF("color.%s = %f * vec4(texture(texture%d, texcoord%d)).%s;\n",
|
|
|
|
dst, img.multiplier, id, id, src);
|
|
|
|
|
|
|
|
*offset += count;
|
|
|
|
}
|
|
|
|
|
2016-03-05 11:38:51 +00:00
|
|
|
static void skip_unused(struct gl_video *p, int num_components)
|
|
|
|
{
|
|
|
|
for (int i = num_components; i < 4; i++)
|
|
|
|
GLSLF("color.%c = %f;\n", "rgba"[i], i < 3 ? 0.0 : 1.0);
|
|
|
|
}
|
|
|
|
|
vo_opengl: refactor scaler configuration
This merges all of the scaler-related options into a single
configuration struct, and also cleans up the way they're passed through
the code. (For example, the scaler index is no longer threaded through
pass_sample, just the scaler configuration itself, and there's no longer
duplication of the params etc.)
In addition, this commit makes scale-down more principled, and turns it
into a scaler in its own right - so there's no longer an ugly separation
between scale and scale-down in the code.
Finally, the radius stuff has been made more proper - filters always
have a radius now (there's no more radius -1), and get a new .resizable
attribute instead for when it's tunable.
User-visible changes:
1. scale-down has been renamed dscale and now has its own set of config
options (dscale-param1, dscale-radius) etc., instead of reusing
scale-param1 (which was arguably a bug).
2. The default radius is no longer fixed at 3, but instead uses that
filter's preferred radius by default. (Scalers with a default radius
other than 3 include sinc, gaussian, box and triangle)
3. scale-radius etc. now goes down to 0.5, rather than 1.0. 0.5 is the
smallest radius that theoretically makes sense, and indeed it's used
by at least one filter (nearest).
Apart from that, it should just be internal changes only.
Note that this sets up for the refactor discussed in #1720, which would
be to merge scaler and window configurations (include parameters etc.)
into a single, simplified string. In the code, this would now basically
just mean getting rid of all the OPT_FLOATRANGE etc. lines related to
scalers and replacing them by a single function that parses a string and
updates the struct scaler_config as appropriate.
2015-03-26 00:55:32 +00:00
|
|
|
static void uninit_scaler(struct gl_video *p, struct scaler *scaler)
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
{
|
2017-09-20 08:45:33 +00:00
|
|
|
ra_tex_free(p->ra, &scaler->sep_fbo);
|
2017-07-29 18:15:59 +00:00
|
|
|
ra_tex_free(p->ra, &scaler->lut);
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
scaler->kernel = NULL;
|
|
|
|
scaler->initialized = false;
|
|
|
|
}
|
|
|
|
|
2016-05-14 05:05:04 +00:00
|
|
|
static void hook_prelude(struct gl_video *p, const char *name, int id,
|
2017-09-20 08:45:33 +00:00
|
|
|
struct image img)
|
2016-04-16 16:14:32 +00:00
|
|
|
{
|
2016-05-14 05:05:04 +00:00
|
|
|
GLSLHF("#define %s_raw texture%d\n", name, id);
|
2016-04-16 16:14:32 +00:00
|
|
|
GLSLHF("#define %s_pos texcoord%d\n", name, id);
|
|
|
|
GLSLHF("#define %s_size texture_size%d\n", name, id);
|
2016-05-14 05:05:04 +00:00
|
|
|
GLSLHF("#define %s_rot texture_rot%d\n", name, id);
|
2016-04-16 16:14:32 +00:00
|
|
|
GLSLHF("#define %s_pt pixel_size%d\n", name, id);
|
2017-07-20 09:26:01 +00:00
|
|
|
GLSLHF("#define %s_map texmap%d\n", name, id);
|
2017-09-20 08:45:33 +00:00
|
|
|
GLSLHF("#define %s_mul %f\n", name, img.multiplier);
|
2016-05-14 05:05:04 +00:00
|
|
|
|
|
|
|
// Set up the sampling functions
|
2017-07-06 09:27:24 +00:00
|
|
|
GLSLHF("#define %s_tex(pos) (%s_mul * vec4(texture(%s_raw, pos)).%s)\n",
|
2017-09-20 08:45:33 +00:00
|
|
|
name, name, name, get_tex_swizzle(&img));
|
2016-05-14 05:05:04 +00:00
|
|
|
|
|
|
|
// Since the extra matrix multiplication impacts performance,
|
|
|
|
// skip it unless the texture was actually rotated
|
2017-09-20 08:45:33 +00:00
|
|
|
if (gl_transform_eq(img.transform, identity_trans)) {
|
2016-05-14 05:05:04 +00:00
|
|
|
GLSLHF("#define %s_texOff(off) %s_tex(%s_pos + %s_pt * vec2(off))\n",
|
|
|
|
name, name, name, name);
|
|
|
|
} else {
|
|
|
|
GLSLHF("#define %s_texOff(off) "
|
|
|
|
"%s_tex(%s_pos + %s_rot * vec2(off)/%s_size)\n",
|
|
|
|
name, name, name, name, name);
|
|
|
|
}
|
2016-04-16 16:14:32 +00:00
|
|
|
}
|
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
static bool saved_img_find(struct gl_video *p, const char *name,
|
|
|
|
struct image *out)
|
2016-04-16 16:14:32 +00:00
|
|
|
{
|
|
|
|
if (!name || !out)
|
|
|
|
return false;
|
|
|
|
|
2017-09-27 21:38:54 +00:00
|
|
|
for (int i = 0; i < p->num_saved_imgs; i++) {
|
|
|
|
if (strcmp(p->saved_imgs[i].name, name) == 0) {
|
|
|
|
*out = p->saved_imgs[i].img;
|
2016-04-16 16:14:32 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
static void saved_img_store(struct gl_video *p, const char *name,
|
|
|
|
struct image img)
|
2016-04-16 16:14:32 +00:00
|
|
|
{
|
|
|
|
assert(name);
|
|
|
|
|
2017-09-27 21:38:54 +00:00
|
|
|
for (int i = 0; i < p->num_saved_imgs; i++) {
|
|
|
|
if (strcmp(p->saved_imgs[i].name, name) == 0) {
|
|
|
|
p->saved_imgs[i].img = img;
|
2016-04-16 16:14:32 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-09-27 21:38:54 +00:00
|
|
|
MP_TARRAY_APPEND(p, p->saved_imgs, p->num_saved_imgs, (struct saved_img) {
|
2016-04-16 16:14:32 +00:00
|
|
|
.name = name,
|
2017-09-20 08:45:33 +00:00
|
|
|
.img = img
|
2017-09-27 21:38:54 +00:00
|
|
|
});
|
2016-04-16 16:14:32 +00:00
|
|
|
}
|
|
|
|
|
2017-07-10 23:59:21 +00:00
|
|
|
static bool pass_hook_setup_binds(struct gl_video *p, const char *name,
|
2017-09-20 08:45:33 +00:00
|
|
|
struct image img, struct tex_hook *hook)
|
2017-07-10 23:59:21 +00:00
|
|
|
{
|
2017-09-27 22:07:42 +00:00
|
|
|
for (int t = 0; t < SHADER_MAX_BINDS; t++) {
|
2017-07-10 23:59:21 +00:00
|
|
|
char *bind_name = (char *)hook->bind_tex[t];
|
|
|
|
|
|
|
|
if (!bind_name)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// This is a special name that means "currently hooked texture"
|
|
|
|
if (strcmp(bind_name, "HOOKED") == 0) {
|
2017-09-20 08:45:33 +00:00
|
|
|
int id = pass_bind(p, img);
|
|
|
|
hook_prelude(p, "HOOKED", id, img);
|
|
|
|
hook_prelude(p, name, id, img);
|
2017-07-10 23:59:21 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// BIND can also be used to load user-defined textures, in which
|
|
|
|
// case we will directly load them as a uniform instead of
|
|
|
|
// generating the hook_prelude boilerplate
|
2017-09-27 21:38:54 +00:00
|
|
|
for (int u = 0; u < p->num_user_textures; u++) {
|
2017-07-10 23:59:21 +00:00
|
|
|
struct gl_user_shader_tex *utex = &p->user_textures[u];
|
|
|
|
if (bstr_equals0(utex->name, bind_name)) {
|
2017-07-30 09:38:52 +00:00
|
|
|
gl_sc_uniform_texture(p->sc, bind_name, utex->tex);
|
2017-07-10 23:59:21 +00:00
|
|
|
goto next_bind;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
struct image bind_img;
|
|
|
|
if (!saved_img_find(p, bind_name, &bind_img)) {
|
2017-07-10 23:59:21 +00:00
|
|
|
// Clean up texture bindings and move on to the next hook
|
2017-09-28 09:53:57 +00:00
|
|
|
MP_TRACE(p, "Skipping hook on %s due to no texture named %s.\n",
|
|
|
|
name, bind_name);
|
2017-09-27 22:07:42 +00:00
|
|
|
p->num_pass_imgs -= t;
|
2017-07-10 23:59:21 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
hook_prelude(p, bind_name, pass_bind(p, bind_img), bind_img);
|
2017-07-10 23:59:21 +00:00
|
|
|
|
|
|
|
next_bind: ;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-09-27 21:38:54 +00:00
|
|
|
static struct ra_tex **next_hook_tex(struct gl_video *p)
|
|
|
|
{
|
|
|
|
if (p->idx_hook_textures == p->num_hook_textures)
|
|
|
|
MP_TARRAY_APPEND(p, p->hook_textures, p->num_hook_textures, NULL);
|
|
|
|
|
|
|
|
return &p->hook_textures[p->idx_hook_textures++];
|
|
|
|
}
|
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
// Process hooks for a plane, saving the result and returning a new image
|
|
|
|
// If 'trans' is NULL, the shader is forbidden from transforming img
|
|
|
|
static struct image pass_hook(struct gl_video *p, const char *name,
|
2017-09-27 21:38:54 +00:00
|
|
|
struct image img, struct gl_transform *trans)
|
2016-04-16 16:14:32 +00:00
|
|
|
{
|
|
|
|
if (!name)
|
2017-09-20 08:45:33 +00:00
|
|
|
return img;
|
2016-04-16 16:14:32 +00:00
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
saved_img_store(p, name, img);
|
2016-04-16 16:14:32 +00:00
|
|
|
|
2017-09-28 09:53:57 +00:00
|
|
|
MP_TRACE(p, "Running hooks for %s\n", name);
|
2017-09-27 21:38:54 +00:00
|
|
|
for (int i = 0; i < p->num_tex_hooks; i++) {
|
2016-04-16 16:14:32 +00:00
|
|
|
struct tex_hook *hook = &p->tex_hooks[i];
|
|
|
|
|
2017-07-10 20:52:39 +00:00
|
|
|
// Figure out if this pass hooks this texture
|
|
|
|
for (int h = 0; h < SHADER_MAX_HOOKS; h++) {
|
|
|
|
if (hook->hook_tex[h] && strcmp(hook->hook_tex[h], name) == 0)
|
|
|
|
goto found;
|
|
|
|
}
|
2016-04-16 16:14:32 +00:00
|
|
|
|
2017-07-10 20:52:39 +00:00
|
|
|
continue;
|
|
|
|
|
|
|
|
found:
|
2016-06-08 13:05:28 +00:00
|
|
|
// Check the hook's condition
|
2017-09-20 08:45:33 +00:00
|
|
|
if (hook->cond && !hook->cond(p, img, hook->priv)) {
|
2017-09-28 09:53:57 +00:00
|
|
|
MP_TRACE(p, "Skipping hook on %s due to condition.\n", name);
|
2016-06-08 13:05:28 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
if (!pass_hook_setup_binds(p, name, img, hook))
|
2017-07-10 23:59:21 +00:00
|
|
|
continue;
|
2016-04-16 16:14:32 +00:00
|
|
|
|
|
|
|
// Run the actual hook. This generates a series of GLSL shader
|
|
|
|
// instructions sufficient for drawing the hook's output
|
|
|
|
struct gl_transform hook_off = identity_trans;
|
2017-09-20 08:45:33 +00:00
|
|
|
hook->hook(p, img, &hook_off, hook->priv);
|
2016-04-16 16:14:32 +00:00
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
int comps = hook->components ? hook->components : img.components;
|
2016-04-16 16:14:32 +00:00
|
|
|
skip_unused(p, comps);
|
|
|
|
|
|
|
|
// Compute the updated FBO dimensions and store the result
|
2017-09-20 08:45:33 +00:00
|
|
|
struct mp_rect_f sz = {0, 0, img.w, img.h};
|
2016-04-16 16:14:32 +00:00
|
|
|
gl_transform_rect(hook_off, &sz);
|
|
|
|
int w = lroundf(fabs(sz.x1 - sz.x0));
|
|
|
|
int h = lroundf(fabs(sz.y1 - sz.y0));
|
2016-04-19 18:45:40 +00:00
|
|
|
|
2017-09-27 21:38:54 +00:00
|
|
|
struct ra_tex **tex = next_hook_tex(p);
|
|
|
|
finish_pass_tex(p, tex, w, h);
|
2016-04-16 16:14:32 +00:00
|
|
|
const char *store_name = hook->save_tex ? hook->save_tex : name;
|
2017-09-27 21:38:54 +00:00
|
|
|
struct image saved_img = image_wrap(*tex, img.type, comps);
|
2016-04-16 16:14:32 +00:00
|
|
|
|
|
|
|
// If the texture we're saving overwrites the "current" texture, also
|
|
|
|
// update the tex parameter so that the future loop cycles will use the
|
|
|
|
// updated values, and export the offset
|
|
|
|
if (strcmp(store_name, name) == 0) {
|
|
|
|
if (!trans && !gl_transform_eq(hook_off, identity_trans)) {
|
|
|
|
MP_ERR(p, "Hook tried changing size of unscalable texture %s!\n",
|
|
|
|
name);
|
2017-09-20 08:45:33 +00:00
|
|
|
return img;
|
2016-04-16 16:14:32 +00:00
|
|
|
}
|
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
img = saved_img;
|
2016-04-16 16:14:32 +00:00
|
|
|
if (trans)
|
|
|
|
gl_transform_trans(hook_off, trans);
|
|
|
|
}
|
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
saved_img_store(p, store_name, saved_img);
|
2016-04-16 16:14:32 +00:00
|
|
|
}
|
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
return img;
|
2016-04-16 16:14:32 +00:00
|
|
|
}
|
|
|
|
|
2016-04-19 18:45:40 +00:00
|
|
|
// This can be used at any time in the middle of rendering to specify an
|
|
|
|
// optional hook point, which if triggered will render out to a new FBO and
|
|
|
|
// load the result back into vec4 color. Offsets applied by the hooks are
|
|
|
|
// accumulated in tex_trans, and the FBO is dimensioned according
|
|
|
|
// to p->texture_w/h
|
|
|
|
static void pass_opt_hook_point(struct gl_video *p, const char *name,
|
|
|
|
struct gl_transform *tex_trans)
|
|
|
|
{
|
|
|
|
if (!name)
|
|
|
|
return;
|
|
|
|
|
2017-09-27 21:38:54 +00:00
|
|
|
for (int i = 0; i < p->num_tex_hooks; i++) {
|
2016-04-20 23:33:13 +00:00
|
|
|
struct tex_hook *hook = &p->tex_hooks[i];
|
|
|
|
|
2017-07-10 20:52:39 +00:00
|
|
|
for (int h = 0; h < SHADER_MAX_HOOKS; h++) {
|
|
|
|
if (hook->hook_tex[h] && strcmp(hook->hook_tex[h], name) == 0)
|
|
|
|
goto found;
|
|
|
|
}
|
2016-04-20 23:33:13 +00:00
|
|
|
|
2017-09-27 22:07:42 +00:00
|
|
|
for (int b = 0; b < SHADER_MAX_BINDS; b++) {
|
2016-04-20 23:33:13 +00:00
|
|
|
if (hook->bind_tex[b] && strcmp(hook->bind_tex[b], name) == 0)
|
|
|
|
goto found;
|
|
|
|
}
|
2016-04-19 18:45:40 +00:00
|
|
|
}
|
|
|
|
|
2016-04-20 23:33:13 +00:00
|
|
|
// Nothing uses this texture, don't bother storing it
|
|
|
|
return;
|
2016-04-19 18:45:40 +00:00
|
|
|
|
2017-09-27 21:38:54 +00:00
|
|
|
found: ;
|
|
|
|
struct ra_tex **tex = next_hook_tex(p);
|
2017-09-20 08:45:33 +00:00
|
|
|
finish_pass_tex(p, tex, p->texture_w, p->texture_h);
|
|
|
|
struct image img = image_wrap(*tex, PLANE_RGB, p->components);
|
2016-04-19 18:45:40 +00:00
|
|
|
img = pass_hook(p, name, img, tex_trans);
|
2017-09-20 08:45:33 +00:00
|
|
|
copy_image(p, &(int){0}, img);
|
2016-04-19 18:45:40 +00:00
|
|
|
p->texture_w = img.w;
|
|
|
|
p->texture_h = img.h;
|
|
|
|
p->components = img.components;
|
2017-07-09 06:48:29 +00:00
|
|
|
pass_describe(p, "(remainder pass)");
|
2016-04-19 18:45:40 +00:00
|
|
|
}
|
2016-04-16 16:14:32 +00:00
|
|
|
|
2016-04-20 23:33:13 +00:00
|
|
|
static void load_shader(struct gl_video *p, struct bstr body)
|
2015-03-27 12:27:40 +00:00
|
|
|
{
|
2016-04-20 23:33:13 +00:00
|
|
|
gl_sc_hadd_bstr(p->sc, body);
|
2017-09-17 08:55:43 +00:00
|
|
|
gl_sc_uniform_dynamic(p->sc);
|
2015-03-27 12:27:40 +00:00
|
|
|
gl_sc_uniform_f(p->sc, "random", (double)av_lfg_get(&p->lfg) / UINT32_MAX);
|
2017-09-17 08:55:43 +00:00
|
|
|
gl_sc_uniform_dynamic(p->sc);
|
2017-07-11 02:05:26 +00:00
|
|
|
gl_sc_uniform_i(p->sc, "frame", p->frames_uploaded);
|
2017-03-19 14:13:51 +00:00
|
|
|
gl_sc_uniform_vec2(p->sc, "input_size",
|
2017-08-07 17:14:18 +00:00
|
|
|
(float[]){(p->src_rect.x1 - p->src_rect.x0) *
|
|
|
|
p->texture_offset.m[0][0],
|
|
|
|
(p->src_rect.y1 - p->src_rect.y0) *
|
|
|
|
p->texture_offset.m[1][1]});
|
2016-06-23 17:49:10 +00:00
|
|
|
gl_sc_uniform_vec2(p->sc, "target_size",
|
2017-08-07 17:14:18 +00:00
|
|
|
(float[]){p->dst_rect.x1 - p->dst_rect.x0,
|
|
|
|
p->dst_rect.y1 - p->dst_rect.y0});
|
2016-07-15 06:26:35 +00:00
|
|
|
gl_sc_uniform_vec2(p->sc, "tex_offset",
|
2017-08-07 17:14:18 +00:00
|
|
|
(float[]){p->src_rect.x0 * p->texture_offset.m[0][0] +
|
|
|
|
p->texture_offset.t[0],
|
|
|
|
p->src_rect.y0 * p->texture_offset.m[1][1] +
|
|
|
|
p->texture_offset.t[1]});
|
2015-03-27 12:27:40 +00:00
|
|
|
}
|
|
|
|
|
vo_opengl: refactor scaler configuration
This merges all of the scaler-related options into a single
configuration struct, and also cleans up the way they're passed through
the code. (For example, the scaler index is no longer threaded through
pass_sample, just the scaler configuration itself, and there's no longer
duplication of the params etc.)
In addition, this commit makes scale-down more principled, and turns it
into a scaler in its own right - so there's no longer an ugly separation
between scale and scale-down in the code.
Finally, the radius stuff has been made more proper - filters always
have a radius now (there's no more radius -1), and get a new .resizable
attribute instead for when it's tunable.
User-visible changes:
1. scale-down has been renamed dscale and now has its own set of config
options (dscale-param1, dscale-radius) etc., instead of reusing
scale-param1 (which was arguably a bug).
2. The default radius is no longer fixed at 3, but instead uses that
filter's preferred radius by default. (Scalers with a default radius
other than 3 include sinc, gaussian, box and triangle)
3. scale-radius etc. now goes down to 0.5, rather than 1.0. 0.5 is the
smallest radius that theoretically makes sense, and indeed it's used
by at least one filter (nearest).
Apart from that, it should just be internal changes only.
Note that this sets up for the refactor discussed in #1720, which would
be to merge scaler and window configurations (include parameters etc.)
into a single, simplified string. In the code, this would now basically
just mean getting rid of all the OPT_FLOATRANGE etc. lines related to
scalers and replacing them by a single function that parses a string and
updates the struct scaler_config as appropriate.
2015-03-26 00:55:32 +00:00
|
|
|
// Semantic equality
|
|
|
|
static bool double_seq(double a, double b)
|
|
|
|
{
|
|
|
|
return (isnan(a) && isnan(b)) || a == b;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool scaler_fun_eq(struct scaler_fun a, struct scaler_fun b)
|
|
|
|
{
|
|
|
|
if ((a.name && !b.name) || (b.name && !a.name))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return ((!a.name && !b.name) || strcmp(a.name, b.name) == 0) &&
|
|
|
|
double_seq(a.params[0], b.params[0]) &&
|
|
|
|
double_seq(a.params[1], b.params[1]) &&
|
2016-10-26 14:32:57 +00:00
|
|
|
a.blur == b.blur &&
|
|
|
|
a.taper == b.taper;
|
vo_opengl: refactor scaler configuration
This merges all of the scaler-related options into a single
configuration struct, and also cleans up the way they're passed through
the code. (For example, the scaler index is no longer threaded through
pass_sample, just the scaler configuration itself, and there's no longer
duplication of the params etc.)
In addition, this commit makes scale-down more principled, and turns it
into a scaler in its own right - so there's no longer an ugly separation
between scale and scale-down in the code.
Finally, the radius stuff has been made more proper - filters always
have a radius now (there's no more radius -1), and get a new .resizable
attribute instead for when it's tunable.
User-visible changes:
1. scale-down has been renamed dscale and now has its own set of config
options (dscale-param1, dscale-radius) etc., instead of reusing
scale-param1 (which was arguably a bug).
2. The default radius is no longer fixed at 3, but instead uses that
filter's preferred radius by default. (Scalers with a default radius
other than 3 include sinc, gaussian, box and triangle)
3. scale-radius etc. now goes down to 0.5, rather than 1.0. 0.5 is the
smallest radius that theoretically makes sense, and indeed it's used
by at least one filter (nearest).
Apart from that, it should just be internal changes only.
Note that this sets up for the refactor discussed in #1720, which would
be to merge scaler and window configurations (include parameters etc.)
into a single, simplified string. In the code, this would now basically
just mean getting rid of all the OPT_FLOATRANGE etc. lines related to
scalers and replacing them by a single function that parses a string and
updates the struct scaler_config as appropriate.
2015-03-26 00:55:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static bool scaler_conf_eq(struct scaler_config a, struct scaler_config b)
|
|
|
|
{
|
|
|
|
// Note: antiring isn't compared because it doesn't affect LUT
|
|
|
|
// generation
|
|
|
|
return scaler_fun_eq(a.kernel, b.kernel) &&
|
|
|
|
scaler_fun_eq(a.window, b.window) &&
|
2015-08-20 19:45:58 +00:00
|
|
|
a.radius == b.radius &&
|
|
|
|
a.clamp == b.clamp;
|
vo_opengl: refactor scaler configuration
This merges all of the scaler-related options into a single
configuration struct, and also cleans up the way they're passed through
the code. (For example, the scaler index is no longer threaded through
pass_sample, just the scaler configuration itself, and there's no longer
duplication of the params etc.)
In addition, this commit makes scale-down more principled, and turns it
into a scaler in its own right - so there's no longer an ugly separation
between scale and scale-down in the code.
Finally, the radius stuff has been made more proper - filters always
have a radius now (there's no more radius -1), and get a new .resizable
attribute instead for when it's tunable.
User-visible changes:
1. scale-down has been renamed dscale and now has its own set of config
options (dscale-param1, dscale-radius) etc., instead of reusing
scale-param1 (which was arguably a bug).
2. The default radius is no longer fixed at 3, but instead uses that
filter's preferred radius by default. (Scalers with a default radius
other than 3 include sinc, gaussian, box and triangle)
3. scale-radius etc. now goes down to 0.5, rather than 1.0. 0.5 is the
smallest radius that theoretically makes sense, and indeed it's used
by at least one filter (nearest).
Apart from that, it should just be internal changes only.
Note that this sets up for the refactor discussed in #1720, which would
be to merge scaler and window configurations (include parameters etc.)
into a single, simplified string. In the code, this would now basically
just mean getting rid of all the OPT_FLOATRANGE etc. lines related to
scalers and replacing them by a single function that parses a string and
updates the struct scaler_config as appropriate.
2015-03-26 00:55:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void reinit_scaler(struct gl_video *p, struct scaler *scaler,
|
|
|
|
const struct scaler_config *conf,
|
|
|
|
double scale_factor,
|
|
|
|
int sizes[])
|
2013-03-01 20:19:20 +00:00
|
|
|
{
|
vo_opengl: refactor scaler configuration
This merges all of the scaler-related options into a single
configuration struct, and also cleans up the way they're passed through
the code. (For example, the scaler index is no longer threaded through
pass_sample, just the scaler configuration itself, and there's no longer
duplication of the params etc.)
In addition, this commit makes scale-down more principled, and turns it
into a scaler in its own right - so there's no longer an ugly separation
between scale and scale-down in the code.
Finally, the radius stuff has been made more proper - filters always
have a radius now (there's no more radius -1), and get a new .resizable
attribute instead for when it's tunable.
User-visible changes:
1. scale-down has been renamed dscale and now has its own set of config
options (dscale-param1, dscale-radius) etc., instead of reusing
scale-param1 (which was arguably a bug).
2. The default radius is no longer fixed at 3, but instead uses that
filter's preferred radius by default. (Scalers with a default radius
other than 3 include sinc, gaussian, box and triangle)
3. scale-radius etc. now goes down to 0.5, rather than 1.0. 0.5 is the
smallest radius that theoretically makes sense, and indeed it's used
by at least one filter (nearest).
Apart from that, it should just be internal changes only.
Note that this sets up for the refactor discussed in #1720, which would
be to merge scaler and window configurations (include parameters etc.)
into a single, simplified string. In the code, this would now basically
just mean getting rid of all the OPT_FLOATRANGE etc. lines related to
scalers and replacing them by a single function that parses a string and
updates the struct scaler_config as appropriate.
2015-03-26 00:55:32 +00:00
|
|
|
if (scaler_conf_eq(scaler->conf, *conf) &&
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
scaler->scale_factor == scale_factor &&
|
|
|
|
scaler->initialized)
|
2014-04-20 19:30:23 +00:00
|
|
|
return;
|
|
|
|
|
vo_opengl: refactor scaler configuration
This merges all of the scaler-related options into a single
configuration struct, and also cleans up the way they're passed through
the code. (For example, the scaler index is no longer threaded through
pass_sample, just the scaler configuration itself, and there's no longer
duplication of the params etc.)
In addition, this commit makes scale-down more principled, and turns it
into a scaler in its own right - so there's no longer an ugly separation
between scale and scale-down in the code.
Finally, the radius stuff has been made more proper - filters always
have a radius now (there's no more radius -1), and get a new .resizable
attribute instead for when it's tunable.
User-visible changes:
1. scale-down has been renamed dscale and now has its own set of config
options (dscale-param1, dscale-radius) etc., instead of reusing
scale-param1 (which was arguably a bug).
2. The default radius is no longer fixed at 3, but instead uses that
filter's preferred radius by default. (Scalers with a default radius
other than 3 include sinc, gaussian, box and triangle)
3. scale-radius etc. now goes down to 0.5, rather than 1.0. 0.5 is the
smallest radius that theoretically makes sense, and indeed it's used
by at least one filter (nearest).
Apart from that, it should just be internal changes only.
Note that this sets up for the refactor discussed in #1720, which would
be to merge scaler and window configurations (include parameters etc.)
into a single, simplified string. In the code, this would now basically
just mean getting rid of all the OPT_FLOATRANGE etc. lines related to
scalers and replacing them by a single function that parses a string and
updates the struct scaler_config as appropriate.
2015-03-26 00:55:32 +00:00
|
|
|
uninit_scaler(p, scaler);
|
2014-04-20 19:30:23 +00:00
|
|
|
|
vo_opengl: refactor scaler configuration
This merges all of the scaler-related options into a single
configuration struct, and also cleans up the way they're passed through
the code. (For example, the scaler index is no longer threaded through
pass_sample, just the scaler configuration itself, and there's no longer
duplication of the params etc.)
In addition, this commit makes scale-down more principled, and turns it
into a scaler in its own right - so there's no longer an ugly separation
between scale and scale-down in the code.
Finally, the radius stuff has been made more proper - filters always
have a radius now (there's no more radius -1), and get a new .resizable
attribute instead for when it's tunable.
User-visible changes:
1. scale-down has been renamed dscale and now has its own set of config
options (dscale-param1, dscale-radius) etc., instead of reusing
scale-param1 (which was arguably a bug).
2. The default radius is no longer fixed at 3, but instead uses that
filter's preferred radius by default. (Scalers with a default radius
other than 3 include sinc, gaussian, box and triangle)
3. scale-radius etc. now goes down to 0.5, rather than 1.0. 0.5 is the
smallest radius that theoretically makes sense, and indeed it's used
by at least one filter (nearest).
Apart from that, it should just be internal changes only.
Note that this sets up for the refactor discussed in #1720, which would
be to merge scaler and window configurations (include parameters etc.)
into a single, simplified string. In the code, this would now basically
just mean getting rid of all the OPT_FLOATRANGE etc. lines related to
scalers and replacing them by a single function that parses a string and
updates the struct scaler_config as appropriate.
2015-03-26 00:55:32 +00:00
|
|
|
scaler->conf = *conf;
|
2016-06-04 18:48:56 +00:00
|
|
|
bool is_tscale = scaler->index == SCALER_TSCALE;
|
|
|
|
scaler->conf.kernel.name = (char *)handle_scaler_opt(conf->kernel.name, is_tscale);
|
|
|
|
scaler->conf.window.name = (char *)handle_scaler_opt(conf->window.name, is_tscale);
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
scaler->scale_factor = scale_factor;
|
|
|
|
scaler->insufficient = false;
|
|
|
|
scaler->initialized = true;
|
2013-03-01 20:19:20 +00:00
|
|
|
|
vo_opengl: refactor scaler configuration
This merges all of the scaler-related options into a single
configuration struct, and also cleans up the way they're passed through
the code. (For example, the scaler index is no longer threaded through
pass_sample, just the scaler configuration itself, and there's no longer
duplication of the params etc.)
In addition, this commit makes scale-down more principled, and turns it
into a scaler in its own right - so there's no longer an ugly separation
between scale and scale-down in the code.
Finally, the radius stuff has been made more proper - filters always
have a radius now (there's no more radius -1), and get a new .resizable
attribute instead for when it's tunable.
User-visible changes:
1. scale-down has been renamed dscale and now has its own set of config
options (dscale-param1, dscale-radius) etc., instead of reusing
scale-param1 (which was arguably a bug).
2. The default radius is no longer fixed at 3, but instead uses that
filter's preferred radius by default. (Scalers with a default radius
other than 3 include sinc, gaussian, box and triangle)
3. scale-radius etc. now goes down to 0.5, rather than 1.0. 0.5 is the
smallest radius that theoretically makes sense, and indeed it's used
by at least one filter (nearest).
Apart from that, it should just be internal changes only.
Note that this sets up for the refactor discussed in #1720, which would
be to merge scaler and window configurations (include parameters etc.)
into a single, simplified string. In the code, this would now basically
just mean getting rid of all the OPT_FLOATRANGE etc. lines related to
scalers and replacing them by a single function that parses a string and
updates the struct scaler_config as appropriate.
2015-03-26 00:55:32 +00:00
|
|
|
const struct filter_kernel *t_kernel = mp_find_filter_kernel(conf->kernel.name);
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
if (!t_kernel)
|
|
|
|
return;
|
2013-03-01 20:19:20 +00:00
|
|
|
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
scaler->kernel_storage = *t_kernel;
|
|
|
|
scaler->kernel = &scaler->kernel_storage;
|
2013-03-01 20:19:20 +00:00
|
|
|
|
vo_opengl: refactor scaler configuration
This merges all of the scaler-related options into a single
configuration struct, and also cleans up the way they're passed through
the code. (For example, the scaler index is no longer threaded through
pass_sample, just the scaler configuration itself, and there's no longer
duplication of the params etc.)
In addition, this commit makes scale-down more principled, and turns it
into a scaler in its own right - so there's no longer an ugly separation
between scale and scale-down in the code.
Finally, the radius stuff has been made more proper - filters always
have a radius now (there's no more radius -1), and get a new .resizable
attribute instead for when it's tunable.
User-visible changes:
1. scale-down has been renamed dscale and now has its own set of config
options (dscale-param1, dscale-radius) etc., instead of reusing
scale-param1 (which was arguably a bug).
2. The default radius is no longer fixed at 3, but instead uses that
filter's preferred radius by default. (Scalers with a default radius
other than 3 include sinc, gaussian, box and triangle)
3. scale-radius etc. now goes down to 0.5, rather than 1.0. 0.5 is the
smallest radius that theoretically makes sense, and indeed it's used
by at least one filter (nearest).
Apart from that, it should just be internal changes only.
Note that this sets up for the refactor discussed in #1720, which would
be to merge scaler and window configurations (include parameters etc.)
into a single, simplified string. In the code, this would now basically
just mean getting rid of all the OPT_FLOATRANGE etc. lines related to
scalers and replacing them by a single function that parses a string and
updates the struct scaler_config as appropriate.
2015-03-26 00:55:32 +00:00
|
|
|
const char *win = conf->window.name;
|
vo_opengl: separate kernel and window
This makes the core much more elegant, reusable, reconfigurable and also
allows us to more easily add aliases for specific configurations.
Furthermore, this lets us apply a generic blur factor / window function
to arbitrary filters, so we can finally "mix and match" in order to
fine-tune windowing functions.
A few notes are in order:
1. The current system for configuring scalers is ugly and rapidly
getting unwieldy. I modified the man page to make it a bit more
bearable, but long-term we have to do something about it; especially
since..
2. There's currently no way to affect the blur factor or parameters of
the window functions themselves. For example, I can't actually
fine-tune the kaiser window's param1, since there's simply no way to
do so in the current API - even though filter_kernels.c supports it
just fine!
3. This removes some lesser used filters (especially those which are
purely window functions to begin with). If anybody asks, you can get
eg. the old behavior of scale=hanning by using
scale=box:scale-window=hanning:scale-radius=1 (and yes, the result is
just as terrible as that sounds - which is why nobody should have
been using them in the first place).
4. This changes the semantics of the "triangle" scaler slightly - it now
has an arbitrary radius. This can possibly produce weird results for
people who were previously using scale-down=triangle, especially if
in combination with scale-radius (for the usual upscaling). The
correct fix for this is to use scale-down=bilinear_slow instead,
which is an alias for triangle at radius 1.
In regards to the last point, in future I want to make it so that
filters have a filter-specific "preferred radius" (for the ones that
are arbitrarily tunable), once the configuration system for filters has
been redesigned (in particular in a way that will let us separate scale
and scale-down cleanly). That way, "triangle" can simply have the
preferred radius of 1 by default, while still being tunable. (Rather
than the default radius being hard-coded to 3 always)
2015-03-25 03:40:28 +00:00
|
|
|
if (!win || !win[0])
|
vo_opengl: refactor scaler configuration
This merges all of the scaler-related options into a single
configuration struct, and also cleans up the way they're passed through
the code. (For example, the scaler index is no longer threaded through
pass_sample, just the scaler configuration itself, and there's no longer
duplication of the params etc.)
In addition, this commit makes scale-down more principled, and turns it
into a scaler in its own right - so there's no longer an ugly separation
between scale and scale-down in the code.
Finally, the radius stuff has been made more proper - filters always
have a radius now (there's no more radius -1), and get a new .resizable
attribute instead for when it's tunable.
User-visible changes:
1. scale-down has been renamed dscale and now has its own set of config
options (dscale-param1, dscale-radius) etc., instead of reusing
scale-param1 (which was arguably a bug).
2. The default radius is no longer fixed at 3, but instead uses that
filter's preferred radius by default. (Scalers with a default radius
other than 3 include sinc, gaussian, box and triangle)
3. scale-radius etc. now goes down to 0.5, rather than 1.0. 0.5 is the
smallest radius that theoretically makes sense, and indeed it's used
by at least one filter (nearest).
Apart from that, it should just be internal changes only.
Note that this sets up for the refactor discussed in #1720, which would
be to merge scaler and window configurations (include parameters etc.)
into a single, simplified string. In the code, this would now basically
just mean getting rid of all the OPT_FLOATRANGE etc. lines related to
scalers and replacing them by a single function that parses a string and
updates the struct scaler_config as appropriate.
2015-03-26 00:55:32 +00:00
|
|
|
win = t_kernel->window; // fall back to the scaler's default window
|
vo_opengl: separate kernel and window
This makes the core much more elegant, reusable, reconfigurable and also
allows us to more easily add aliases for specific configurations.
Furthermore, this lets us apply a generic blur factor / window function
to arbitrary filters, so we can finally "mix and match" in order to
fine-tune windowing functions.
A few notes are in order:
1. The current system for configuring scalers is ugly and rapidly
getting unwieldy. I modified the man page to make it a bit more
bearable, but long-term we have to do something about it; especially
since..
2. There's currently no way to affect the blur factor or parameters of
the window functions themselves. For example, I can't actually
fine-tune the kaiser window's param1, since there's simply no way to
do so in the current API - even though filter_kernels.c supports it
just fine!
3. This removes some lesser used filters (especially those which are
purely window functions to begin with). If anybody asks, you can get
eg. the old behavior of scale=hanning by using
scale=box:scale-window=hanning:scale-radius=1 (and yes, the result is
just as terrible as that sounds - which is why nobody should have
been using them in the first place).
4. This changes the semantics of the "triangle" scaler slightly - it now
has an arbitrary radius. This can possibly produce weird results for
people who were previously using scale-down=triangle, especially if
in combination with scale-radius (for the usual upscaling). The
correct fix for this is to use scale-down=bilinear_slow instead,
which is an alias for triangle at radius 1.
In regards to the last point, in future I want to make it so that
filters have a filter-specific "preferred radius" (for the ones that
are arbitrarily tunable), once the configuration system for filters has
been redesigned (in particular in a way that will let us separate scale
and scale-down cleanly). That way, "triangle" can simply have the
preferred radius of 1 by default, while still being tunable. (Rather
than the default radius being hard-coded to 3 always)
2015-03-25 03:40:28 +00:00
|
|
|
const struct filter_window *t_window = mp_find_filter_window(win);
|
|
|
|
if (t_window)
|
|
|
|
scaler->kernel->w = *t_window;
|
|
|
|
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
for (int n = 0; n < 2; n++) {
|
vo_opengl: refactor scaler configuration
This merges all of the scaler-related options into a single
configuration struct, and also cleans up the way they're passed through
the code. (For example, the scaler index is no longer threaded through
pass_sample, just the scaler configuration itself, and there's no longer
duplication of the params etc.)
In addition, this commit makes scale-down more principled, and turns it
into a scaler in its own right - so there's no longer an ugly separation
between scale and scale-down in the code.
Finally, the radius stuff has been made more proper - filters always
have a radius now (there's no more radius -1), and get a new .resizable
attribute instead for when it's tunable.
User-visible changes:
1. scale-down has been renamed dscale and now has its own set of config
options (dscale-param1, dscale-radius) etc., instead of reusing
scale-param1 (which was arguably a bug).
2. The default radius is no longer fixed at 3, but instead uses that
filter's preferred radius by default. (Scalers with a default radius
other than 3 include sinc, gaussian, box and triangle)
3. scale-radius etc. now goes down to 0.5, rather than 1.0. 0.5 is the
smallest radius that theoretically makes sense, and indeed it's used
by at least one filter (nearest).
Apart from that, it should just be internal changes only.
Note that this sets up for the refactor discussed in #1720, which would
be to merge scaler and window configurations (include parameters etc.)
into a single, simplified string. In the code, this would now basically
just mean getting rid of all the OPT_FLOATRANGE etc. lines related to
scalers and replacing them by a single function that parses a string and
updates the struct scaler_config as appropriate.
2015-03-26 00:55:32 +00:00
|
|
|
if (!isnan(conf->kernel.params[n]))
|
|
|
|
scaler->kernel->f.params[n] = conf->kernel.params[n];
|
|
|
|
if (!isnan(conf->window.params[n]))
|
|
|
|
scaler->kernel->w.params[n] = conf->window.params[n];
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
}
|
2014-04-20 19:37:18 +00:00
|
|
|
|
vo_opengl: refactor scaler configuration
This merges all of the scaler-related options into a single
configuration struct, and also cleans up the way they're passed through
the code. (For example, the scaler index is no longer threaded through
pass_sample, just the scaler configuration itself, and there's no longer
duplication of the params etc.)
In addition, this commit makes scale-down more principled, and turns it
into a scaler in its own right - so there's no longer an ugly separation
between scale and scale-down in the code.
Finally, the radius stuff has been made more proper - filters always
have a radius now (there's no more radius -1), and get a new .resizable
attribute instead for when it's tunable.
User-visible changes:
1. scale-down has been renamed dscale and now has its own set of config
options (dscale-param1, dscale-radius) etc., instead of reusing
scale-param1 (which was arguably a bug).
2. The default radius is no longer fixed at 3, but instead uses that
filter's preferred radius by default. (Scalers with a default radius
other than 3 include sinc, gaussian, box and triangle)
3. scale-radius etc. now goes down to 0.5, rather than 1.0. 0.5 is the
smallest radius that theoretically makes sense, and indeed it's used
by at least one filter (nearest).
Apart from that, it should just be internal changes only.
Note that this sets up for the refactor discussed in #1720, which would
be to merge scaler and window configurations (include parameters etc.)
into a single, simplified string. In the code, this would now basically
just mean getting rid of all the OPT_FLOATRANGE etc. lines related to
scalers and replacing them by a single function that parses a string and
updates the struct scaler_config as appropriate.
2015-03-26 00:55:32 +00:00
|
|
|
if (conf->kernel.blur > 0.0)
|
|
|
|
scaler->kernel->f.blur = conf->kernel.blur;
|
|
|
|
if (conf->window.blur > 0.0)
|
|
|
|
scaler->kernel->w.blur = conf->window.blur;
|
2014-04-20 19:30:23 +00:00
|
|
|
|
2016-10-26 14:32:57 +00:00
|
|
|
if (conf->kernel.taper > 0.0)
|
|
|
|
scaler->kernel->f.taper = conf->kernel.taper;
|
|
|
|
if (conf->window.taper > 0.0)
|
|
|
|
scaler->kernel->w.taper = conf->window.taper;
|
|
|
|
|
vo_opengl: refactor scaler configuration
This merges all of the scaler-related options into a single
configuration struct, and also cleans up the way they're passed through
the code. (For example, the scaler index is no longer threaded through
pass_sample, just the scaler configuration itself, and there's no longer
duplication of the params etc.)
In addition, this commit makes scale-down more principled, and turns it
into a scaler in its own right - so there's no longer an ugly separation
between scale and scale-down in the code.
Finally, the radius stuff has been made more proper - filters always
have a radius now (there's no more radius -1), and get a new .resizable
attribute instead for when it's tunable.
User-visible changes:
1. scale-down has been renamed dscale and now has its own set of config
options (dscale-param1, dscale-radius) etc., instead of reusing
scale-param1 (which was arguably a bug).
2. The default radius is no longer fixed at 3, but instead uses that
filter's preferred radius by default. (Scalers with a default radius
other than 3 include sinc, gaussian, box and triangle)
3. scale-radius etc. now goes down to 0.5, rather than 1.0. 0.5 is the
smallest radius that theoretically makes sense, and indeed it's used
by at least one filter (nearest).
Apart from that, it should just be internal changes only.
Note that this sets up for the refactor discussed in #1720, which would
be to merge scaler and window configurations (include parameters etc.)
into a single, simplified string. In the code, this would now basically
just mean getting rid of all the OPT_FLOATRANGE etc. lines related to
scalers and replacing them by a single function that parses a string and
updates the struct scaler_config as appropriate.
2015-03-26 00:55:32 +00:00
|
|
|
if (scaler->kernel->f.resizable && conf->radius > 0.0)
|
|
|
|
scaler->kernel->f.radius = conf->radius;
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
|
2015-08-20 19:45:58 +00:00
|
|
|
scaler->kernel->clamp = conf->clamp;
|
2017-07-03 09:23:48 +00:00
|
|
|
scaler->kernel->value_cutoff = conf->cutoff;
|
2015-08-20 19:45:58 +00:00
|
|
|
|
2015-03-13 18:30:31 +00:00
|
|
|
scaler->insufficient = !mp_init_filter(scaler->kernel, sizes, scale_factor);
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
|
|
|
|
int size = scaler->kernel->size;
|
2017-08-27 07:15:50 +00:00
|
|
|
int num_components = size > 2 ? 4 : size;
|
|
|
|
const struct ra_format *fmt = ra_find_float16_format(p->ra, num_components);
|
2017-07-29 18:15:59 +00:00
|
|
|
assert(fmt);
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
|
2017-08-27 07:15:50 +00:00
|
|
|
int width = (size + num_components - 1) / num_components; // round up
|
|
|
|
int stride = width * num_components;
|
|
|
|
assert(size <= stride);
|
|
|
|
|
2015-12-05 19:14:23 +00:00
|
|
|
scaler->lut_size = 1 << p->opts.scaler_lut_size;
|
2015-12-05 18:54:25 +00:00
|
|
|
|
2017-08-27 07:15:50 +00:00
|
|
|
float *weights = talloc_array(NULL, float, scaler->lut_size * stride);
|
|
|
|
mp_compute_lut(scaler->kernel, scaler->lut_size, stride, weights);
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
|
2017-07-29 18:15:59 +00:00
|
|
|
bool use_1d = scaler->kernel->polar && (p->ra->caps & RA_CAP_TEX_1D);
|
|
|
|
|
|
|
|
struct ra_tex_params lut_params = {
|
|
|
|
.dimensions = use_1d ? 1 : 2,
|
|
|
|
.w = use_1d ? scaler->lut_size : width,
|
|
|
|
.h = use_1d ? 1 : scaler->lut_size,
|
|
|
|
.d = 1,
|
|
|
|
.format = fmt,
|
|
|
|
.render_src = true,
|
|
|
|
.src_linear = true,
|
|
|
|
.initial_data = weights,
|
|
|
|
};
|
|
|
|
scaler->lut = ra_tex_create(p->ra, &lut_params);
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
|
|
|
|
talloc_free(weights);
|
|
|
|
|
|
|
|
debug_check_gl(p, "after initializing scaler");
|
2013-03-01 20:19:20 +00:00
|
|
|
}
|
|
|
|
|
2015-09-05 12:03:00 +00:00
|
|
|
// Special helper for sampling from two separated stages
|
2017-09-20 08:45:33 +00:00
|
|
|
static void pass_sample_separated(struct gl_video *p, struct image src,
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
struct scaler *scaler, int w, int h)
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
{
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
// Separate the transformation into x and y components, per pass
|
|
|
|
struct gl_transform t_x = {
|
|
|
|
.m = {{src.transform.m[0][0], 0.0}, {src.transform.m[1][0], 1.0}},
|
|
|
|
.t = {src.transform.t[0], 0.0},
|
|
|
|
};
|
|
|
|
struct gl_transform t_y = {
|
|
|
|
.m = {{1.0, src.transform.m[0][1]}, {0.0, src.transform.m[1][1]}},
|
|
|
|
.t = {0.0, src.transform.t[1]},
|
|
|
|
};
|
|
|
|
|
|
|
|
// First pass (scale only in the y dir)
|
|
|
|
src.transform = t_y;
|
|
|
|
sampler_prelude(p->sc, pass_bind(p, src));
|
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number
of changes and improvements:
1. mpv users can now introspect the vo_opengl passes, which is something
that has been requested multiple times.
2. performance data is now measured per-pass, which helps both
development and debugging.
3. since adding more passes is cheap, we can now report information for
more passes (e.g. the blit pass, and the osd pass). Note: we also
switch to nanosecond scale, to be able to measure these passes
better.
4. `--user-shaders` authors can now describe their own passes, helping
users both identify which user shaders are active at any given time
as well as helping shader authors identify performance issues.
5. the timing data per pass is now exported as a full list of samples,
so projects like Argon-/mpv-stats can immediately read out all of the
samples and render a graph without having to manually poll this
option constantly.
Due to gl_timer's design being complicated (directly reading performance
data would block, so we delay the actual read-back until the next _start
command), it's vital not to conflate different passes that might be
doing different things from one frame to another. To accomplish this,
the actual timers are stored as part of the gl_shader_cache's sc_entry,
which makes them unique for that exact shader.
Starting and stopping the time measurement is easy to unify with the
gl_sc architecture, because the existing API already relies on a
"generate, render, reset" flow, so we can just put timer_start and
timer_stop in sc_generate and sc_reset, respectively.
The ugliest thing about this code is that due to the need to keep pass
information relatively stable in between frames, we need to distinguish
between "new" and "redrawn" frames, which bloats the code somewhat and
also feels hacky and vo_opengl-specific. (But then again, this entire
thing is vo_opengl-specific)
2017-06-29 15:00:06 +00:00
|
|
|
GLSLF("// first pass\n");
|
2015-09-05 12:03:00 +00:00
|
|
|
pass_sample_separated_gen(p->sc, scaler, 0, 1);
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
GLSLF("color *= %f;\n", src.multiplier);
|
2017-09-20 08:45:33 +00:00
|
|
|
finish_pass_tex(p, &scaler->sep_fbo, src.w, h);
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
|
|
|
|
// Second pass (scale only in the x dir)
|
2017-09-20 08:45:33 +00:00
|
|
|
src = image_wrap(scaler->sep_fbo, src.type, src.components);
|
2016-04-16 16:14:32 +00:00
|
|
|
src.transform = t_x;
|
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number
of changes and improvements:
1. mpv users can now introspect the vo_opengl passes, which is something
that has been requested multiple times.
2. performance data is now measured per-pass, which helps both
development and debugging.
3. since adding more passes is cheap, we can now report information for
more passes (e.g. the blit pass, and the osd pass). Note: we also
switch to nanosecond scale, to be able to measure these passes
better.
4. `--user-shaders` authors can now describe their own passes, helping
users both identify which user shaders are active at any given time
as well as helping shader authors identify performance issues.
5. the timing data per pass is now exported as a full list of samples,
so projects like Argon-/mpv-stats can immediately read out all of the
samples and render a graph without having to manually poll this
option constantly.
Due to gl_timer's design being complicated (directly reading performance
data would block, so we delay the actual read-back until the next _start
command), it's vital not to conflate different passes that might be
doing different things from one frame to another. To accomplish this,
the actual timers are stored as part of the gl_shader_cache's sc_entry,
which makes them unique for that exact shader.
Starting and stopping the time measurement is easy to unify with the
gl_sc architecture, because the existing API already relies on a
"generate, render, reset" flow, so we can just put timer_start and
timer_stop in sc_generate and sc_reset, respectively.
The ugliest thing about this code is that due to the need to keep pass
information relatively stable in between frames, we need to distinguish
between "new" and "redrawn" frames, which bloats the code somewhat and
also feels hacky and vo_opengl-specific. (But then again, this entire
thing is vo_opengl-specific)
2017-06-29 15:00:06 +00:00
|
|
|
pass_describe(p, "%s second pass", scaler->conf.kernel.name);
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
sampler_prelude(p->sc, pass_bind(p, src));
|
2015-09-05 12:03:00 +00:00
|
|
|
pass_sample_separated_gen(p->sc, scaler, 1, 0);
|
2015-03-15 05:27:11 +00:00
|
|
|
}
|
|
|
|
|
2017-07-25 23:42:19 +00:00
|
|
|
// Picks either the compute shader version or the regular sampler version
|
|
|
|
// depending on hardware support
|
|
|
|
static void pass_dispatch_sample_polar(struct gl_video *p, struct scaler *scaler,
|
2017-09-20 08:45:33 +00:00
|
|
|
struct image img, int w, int h)
|
2017-07-25 23:42:19 +00:00
|
|
|
{
|
2017-09-17 03:37:24 +00:00
|
|
|
uint64_t reqs = RA_CAP_COMPUTE;
|
2017-08-05 12:20:14 +00:00
|
|
|
if ((p->ra->caps & reqs) != reqs)
|
2017-07-25 23:42:19 +00:00
|
|
|
goto fallback;
|
|
|
|
|
|
|
|
int bound = ceil(scaler->kernel->radius_cutoff);
|
|
|
|
int offset = bound - 1; // padding top/left
|
|
|
|
int padding = offset + bound; // total padding
|
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
float ratiox = (float)w / img.w,
|
|
|
|
ratioy = (float)h / img.h;
|
2017-07-25 23:42:19 +00:00
|
|
|
|
|
|
|
// For performance we want to load at least as many pixels
|
|
|
|
// horizontally as there are threads in a warp (32 for nvidia), as
|
|
|
|
// well as enough to take advantage of shmem parallelism
|
|
|
|
const int warp_size = 32, threads = 256;
|
|
|
|
int bw = warp_size;
|
|
|
|
int bh = threads / bw;
|
|
|
|
|
|
|
|
// We need to sample everything from base_min to base_max, so make sure
|
|
|
|
// we have enough room in shmem
|
|
|
|
int iw = (int)ceil(bw / ratiox) + padding + 1,
|
|
|
|
ih = (int)ceil(bh / ratioy) + padding + 1;
|
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
int shmem_req = iw * ih * img.components * sizeof(float);
|
2017-08-07 17:14:18 +00:00
|
|
|
if (shmem_req > p->ra->max_shmem)
|
2017-07-25 23:42:19 +00:00
|
|
|
goto fallback;
|
|
|
|
|
2017-07-29 18:41:50 +00:00
|
|
|
pass_is_compute(p, bw, bh);
|
2017-09-20 08:45:33 +00:00
|
|
|
pass_compute_polar(p->sc, scaler, img.components, bw, bh, iw, ih);
|
2017-07-25 23:42:19 +00:00
|
|
|
return;
|
|
|
|
|
|
|
|
fallback:
|
|
|
|
// Fall back to regular polar shader when compute shaders are unsupported
|
|
|
|
// or the kernel is too big for shmem
|
2017-10-02 12:10:52 +00:00
|
|
|
pass_sample_polar(p->sc, scaler, img.components,
|
|
|
|
p->ra->caps & RA_CAP_GATHER);
|
2017-07-25 23:42:19 +00:00
|
|
|
}
|
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
// Sample from image, with the src rectangle given by it.
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
// The dst rectangle is implicit by what the caller will do next, but w and h
|
|
|
|
// must still be what is going to be used (to dimension FBOs correctly).
|
2016-02-23 15:18:17 +00:00
|
|
|
// This will write the scaled contents to the vec4 "color".
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
// The scaler unit is initialized by this function; in order to avoid cache
|
|
|
|
// thrashing, the scaler unit should usually use the same parameters.
|
2017-09-20 08:45:33 +00:00
|
|
|
static void pass_sample(struct gl_video *p, struct image img,
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
struct scaler *scaler, const struct scaler_config *conf,
|
|
|
|
double scale_factor, int w, int h)
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
{
|
vo_opengl: refactor scaler configuration
This merges all of the scaler-related options into a single
configuration struct, and also cleans up the way they're passed through
the code. (For example, the scaler index is no longer threaded through
pass_sample, just the scaler configuration itself, and there's no longer
duplication of the params etc.)
In addition, this commit makes scale-down more principled, and turns it
into a scaler in its own right - so there's no longer an ugly separation
between scale and scale-down in the code.
Finally, the radius stuff has been made more proper - filters always
have a radius now (there's no more radius -1), and get a new .resizable
attribute instead for when it's tunable.
User-visible changes:
1. scale-down has been renamed dscale and now has its own set of config
options (dscale-param1, dscale-radius) etc., instead of reusing
scale-param1 (which was arguably a bug).
2. The default radius is no longer fixed at 3, but instead uses that
filter's preferred radius by default. (Scalers with a default radius
other than 3 include sinc, gaussian, box and triangle)
3. scale-radius etc. now goes down to 0.5, rather than 1.0. 0.5 is the
smallest radius that theoretically makes sense, and indeed it's used
by at least one filter (nearest).
Apart from that, it should just be internal changes only.
Note that this sets up for the refactor discussed in #1720, which would
be to merge scaler and window configurations (include parameters etc.)
into a single, simplified string. In the code, this would now basically
just mean getting rid of all the OPT_FLOATRANGE etc. lines related to
scalers and replacing them by a single function that parses a string and
updates the struct scaler_config as appropriate.
2015-03-26 00:55:32 +00:00
|
|
|
reinit_scaler(p, scaler, conf, scale_factor, filter_sizes);
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
|
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number
of changes and improvements:
1. mpv users can now introspect the vo_opengl passes, which is something
that has been requested multiple times.
2. performance data is now measured per-pass, which helps both
development and debugging.
3. since adding more passes is cheap, we can now report information for
more passes (e.g. the blit pass, and the osd pass). Note: we also
switch to nanosecond scale, to be able to measure these passes
better.
4. `--user-shaders` authors can now describe their own passes, helping
users both identify which user shaders are active at any given time
as well as helping shader authors identify performance issues.
5. the timing data per pass is now exported as a full list of samples,
so projects like Argon-/mpv-stats can immediately read out all of the
samples and render a graph without having to manually poll this
option constantly.
Due to gl_timer's design being complicated (directly reading performance
data would block, so we delay the actual read-back until the next _start
command), it's vital not to conflate different passes that might be
doing different things from one frame to another. To accomplish this,
the actual timers are stored as part of the gl_shader_cache's sc_entry,
which makes them unique for that exact shader.
Starting and stopping the time measurement is easy to unify with the
gl_sc architecture, because the existing API already relies on a
"generate, render, reset" flow, so we can just put timer_start and
timer_stop in sc_generate and sc_reset, respectively.
The ugliest thing about this code is that due to the need to keep pass
information relatively stable in between frames, we need to distinguish
between "new" and "redrawn" frames, which bloats the code somewhat and
also feels hacky and vo_opengl-specific. (But then again, this entire
thing is vo_opengl-specific)
2017-06-29 15:00:06 +00:00
|
|
|
// Describe scaler
|
|
|
|
const char *scaler_opt[] = {
|
|
|
|
[SCALER_SCALE] = "scale",
|
|
|
|
[SCALER_DSCALE] = "dscale",
|
|
|
|
[SCALER_CSCALE] = "cscale",
|
|
|
|
[SCALER_TSCALE] = "tscale",
|
|
|
|
};
|
|
|
|
|
|
|
|
pass_describe(p, "%s=%s (%s)", scaler_opt[scaler->index],
|
2017-09-20 08:45:33 +00:00
|
|
|
scaler->conf.kernel.name, plane_names[img.type]);
|
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number
of changes and improvements:
1. mpv users can now introspect the vo_opengl passes, which is something
that has been requested multiple times.
2. performance data is now measured per-pass, which helps both
development and debugging.
3. since adding more passes is cheap, we can now report information for
more passes (e.g. the blit pass, and the osd pass). Note: we also
switch to nanosecond scale, to be able to measure these passes
better.
4. `--user-shaders` authors can now describe their own passes, helping
users both identify which user shaders are active at any given time
as well as helping shader authors identify performance issues.
5. the timing data per pass is now exported as a full list of samples,
so projects like Argon-/mpv-stats can immediately read out all of the
samples and render a graph without having to manually poll this
option constantly.
Due to gl_timer's design being complicated (directly reading performance
data would block, so we delay the actual read-back until the next _start
command), it's vital not to conflate different passes that might be
doing different things from one frame to another. To accomplish this,
the actual timers are stored as part of the gl_shader_cache's sc_entry,
which makes them unique for that exact shader.
Starting and stopping the time measurement is easy to unify with the
gl_sc architecture, because the existing API already relies on a
"generate, render, reset" flow, so we can just put timer_start and
timer_stop in sc_generate and sc_reset, respectively.
The ugliest thing about this code is that due to the need to keep pass
information relatively stable in between frames, we need to distinguish
between "new" and "redrawn" frames, which bloats the code somewhat and
also feels hacky and vo_opengl-specific. (But then again, this entire
thing is vo_opengl-specific)
2017-06-29 15:00:06 +00:00
|
|
|
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
bool is_separated = scaler->kernel && !scaler->kernel->polar;
|
|
|
|
|
|
|
|
// Set up the transformation+prelude and bind the texture, for everything
|
|
|
|
// other than separated scaling (which does this in the subfunction)
|
|
|
|
if (!is_separated)
|
2017-09-20 08:45:33 +00:00
|
|
|
sampler_prelude(p->sc, pass_bind(p, img));
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
// Dispatch the scaler. They're all wildly different.
|
vo_opengl: refactor scaler configuration
This merges all of the scaler-related options into a single
configuration struct, and also cleans up the way they're passed through
the code. (For example, the scaler index is no longer threaded through
pass_sample, just the scaler configuration itself, and there's no longer
duplication of the params etc.)
In addition, this commit makes scale-down more principled, and turns it
into a scaler in its own right - so there's no longer an ugly separation
between scale and scale-down in the code.
Finally, the radius stuff has been made more proper - filters always
have a radius now (there's no more radius -1), and get a new .resizable
attribute instead for when it's tunable.
User-visible changes:
1. scale-down has been renamed dscale and now has its own set of config
options (dscale-param1, dscale-radius) etc., instead of reusing
scale-param1 (which was arguably a bug).
2. The default radius is no longer fixed at 3, but instead uses that
filter's preferred radius by default. (Scalers with a default radius
other than 3 include sinc, gaussian, box and triangle)
3. scale-radius etc. now goes down to 0.5, rather than 1.0. 0.5 is the
smallest radius that theoretically makes sense, and indeed it's used
by at least one filter (nearest).
Apart from that, it should just be internal changes only.
Note that this sets up for the refactor discussed in #1720, which would
be to merge scaler and window configurations (include parameters etc.)
into a single, simplified string. In the code, this would now basically
just mean getting rid of all the OPT_FLOATRANGE etc. lines related to
scalers and replacing them by a single function that parses a string and
updates the struct scaler_config as appropriate.
2015-03-26 00:55:32 +00:00
|
|
|
const char *name = scaler->conf.kernel.name;
|
|
|
|
if (strcmp(name, "bilinear") == 0) {
|
2016-02-23 15:18:17 +00:00
|
|
|
GLSL(color = texture(tex, pos);)
|
vo_opengl: refactor scaler configuration
This merges all of the scaler-related options into a single
configuration struct, and also cleans up the way they're passed through
the code. (For example, the scaler index is no longer threaded through
pass_sample, just the scaler configuration itself, and there's no longer
duplication of the params etc.)
In addition, this commit makes scale-down more principled, and turns it
into a scaler in its own right - so there's no longer an ugly separation
between scale and scale-down in the code.
Finally, the radius stuff has been made more proper - filters always
have a radius now (there's no more radius -1), and get a new .resizable
attribute instead for when it's tunable.
User-visible changes:
1. scale-down has been renamed dscale and now has its own set of config
options (dscale-param1, dscale-radius) etc., instead of reusing
scale-param1 (which was arguably a bug).
2. The default radius is no longer fixed at 3, but instead uses that
filter's preferred radius by default. (Scalers with a default radius
other than 3 include sinc, gaussian, box and triangle)
3. scale-radius etc. now goes down to 0.5, rather than 1.0. 0.5 is the
smallest radius that theoretically makes sense, and indeed it's used
by at least one filter (nearest).
Apart from that, it should just be internal changes only.
Note that this sets up for the refactor discussed in #1720, which would
be to merge scaler and window configurations (include parameters etc.)
into a single, simplified string. In the code, this would now basically
just mean getting rid of all the OPT_FLOATRANGE etc. lines related to
scalers and replacing them by a single function that parses a string and
updates the struct scaler_config as appropriate.
2015-03-26 00:55:32 +00:00
|
|
|
} else if (strcmp(name, "bicubic_fast") == 0) {
|
2015-09-05 12:03:00 +00:00
|
|
|
pass_sample_bicubic_fast(p->sc);
|
vo_opengl: refactor scaler configuration
This merges all of the scaler-related options into a single
configuration struct, and also cleans up the way they're passed through
the code. (For example, the scaler index is no longer threaded through
pass_sample, just the scaler configuration itself, and there's no longer
duplication of the params etc.)
In addition, this commit makes scale-down more principled, and turns it
into a scaler in its own right - so there's no longer an ugly separation
between scale and scale-down in the code.
Finally, the radius stuff has been made more proper - filters always
have a radius now (there's no more radius -1), and get a new .resizable
attribute instead for when it's tunable.
User-visible changes:
1. scale-down has been renamed dscale and now has its own set of config
options (dscale-param1, dscale-radius) etc., instead of reusing
scale-param1 (which was arguably a bug).
2. The default radius is no longer fixed at 3, but instead uses that
filter's preferred radius by default. (Scalers with a default radius
other than 3 include sinc, gaussian, box and triangle)
3. scale-radius etc. now goes down to 0.5, rather than 1.0. 0.5 is the
smallest radius that theoretically makes sense, and indeed it's used
by at least one filter (nearest).
Apart from that, it should just be internal changes only.
Note that this sets up for the refactor discussed in #1720, which would
be to merge scaler and window configurations (include parameters etc.)
into a single, simplified string. In the code, this would now basically
just mean getting rid of all the OPT_FLOATRANGE etc. lines related to
scalers and replacing them by a single function that parses a string and
updates the struct scaler_config as appropriate.
2015-03-26 00:55:32 +00:00
|
|
|
} else if (strcmp(name, "oversample") == 0) {
|
2015-09-05 12:03:00 +00:00
|
|
|
pass_sample_oversample(p->sc, scaler, w, h);
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
} else if (scaler->kernel && scaler->kernel->polar) {
|
2017-09-20 08:45:33 +00:00
|
|
|
pass_dispatch_sample_polar(p, scaler, img, w, h);
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
} else if (scaler->kernel) {
|
2017-09-20 08:45:33 +00:00
|
|
|
pass_sample_separated(p, img, scaler, w, h);
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
} else {
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
// Should never happen
|
|
|
|
abort();
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
}
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
// Apply any required multipliers. Separated scaling already does this in
|
|
|
|
// its first stage
|
|
|
|
if (!is_separated)
|
2017-09-20 08:45:33 +00:00
|
|
|
GLSLF("color *= %f;\n", img.multiplier);
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
// Micro-optimization: Avoid scaling unneeded channels
|
2017-09-20 08:45:33 +00:00
|
|
|
skip_unused(p, img.components);
|
2015-01-29 14:50:21 +00:00
|
|
|
}
|
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
// Returns true if two images are semantically equivalent (same metadata)
|
|
|
|
static bool image_equiv(struct image a, struct image b)
|
2016-04-16 16:14:32 +00:00
|
|
|
{
|
|
|
|
return a.type == b.type &&
|
|
|
|
a.components == b.components &&
|
|
|
|
a.multiplier == b.multiplier &&
|
2017-08-04 13:47:50 +00:00
|
|
|
a.tex->params.format == b.tex->params.format &&
|
|
|
|
a.tex->params.w == b.tex->params.w &&
|
|
|
|
a.tex->params.h == b.tex->params.h &&
|
2016-04-16 16:14:32 +00:00
|
|
|
a.w == b.w &&
|
|
|
|
a.h == b.h &&
|
2017-02-17 14:46:11 +00:00
|
|
|
gl_transform_eq(a.transform, b.transform);
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
}
|
2015-10-26 22:43:48 +00:00
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
static void deband_hook(struct gl_video *p, struct image img,
|
2016-04-16 16:14:32 +00:00
|
|
|
struct gl_transform *trans, void *priv)
|
|
|
|
{
|
2017-09-20 08:45:33 +00:00
|
|
|
pass_describe(p, "debanding (%s)", plane_names[img.type]);
|
2017-09-03 19:51:48 +00:00
|
|
|
pass_sample_deband(p->sc, p->opts.deband_opts, &p->lfg,
|
|
|
|
p->image_params.color.gamma);
|
2016-04-16 16:14:32 +00:00
|
|
|
}
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
static void unsharp_hook(struct gl_video *p, struct image img,
|
2016-04-19 18:45:40 +00:00
|
|
|
struct gl_transform *trans, void *priv)
|
|
|
|
{
|
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number
of changes and improvements:
1. mpv users can now introspect the vo_opengl passes, which is something
that has been requested multiple times.
2. performance data is now measured per-pass, which helps both
development and debugging.
3. since adding more passes is cheap, we can now report information for
more passes (e.g. the blit pass, and the osd pass). Note: we also
switch to nanosecond scale, to be able to measure these passes
better.
4. `--user-shaders` authors can now describe their own passes, helping
users both identify which user shaders are active at any given time
as well as helping shader authors identify performance issues.
5. the timing data per pass is now exported as a full list of samples,
so projects like Argon-/mpv-stats can immediately read out all of the
samples and render a graph without having to manually poll this
option constantly.
Due to gl_timer's design being complicated (directly reading performance
data would block, so we delay the actual read-back until the next _start
command), it's vital not to conflate different passes that might be
doing different things from one frame to another. To accomplish this,
the actual timers are stored as part of the gl_shader_cache's sc_entry,
which makes them unique for that exact shader.
Starting and stopping the time measurement is easy to unify with the
gl_sc architecture, because the existing API already relies on a
"generate, render, reset" flow, so we can just put timer_start and
timer_stop in sc_generate and sc_reset, respectively.
The ugliest thing about this code is that due to the need to keep pass
information relatively stable in between frames, we need to distinguish
between "new" and "redrawn" frames, which bloats the code somewhat and
also feels hacky and vo_opengl-specific. (But then again, this entire
thing is vo_opengl-specific)
2017-06-29 15:00:06 +00:00
|
|
|
pass_describe(p, "unsharp masking");
|
2016-04-19 18:45:40 +00:00
|
|
|
pass_sample_unsharp(p->sc, p->opts.unsharp);
|
|
|
|
}
|
|
|
|
|
2016-07-03 17:23:03 +00:00
|
|
|
struct szexp_ctx {
|
|
|
|
struct gl_video *p;
|
2017-09-20 08:45:33 +00:00
|
|
|
struct image img;
|
2016-07-03 17:23:03 +00:00
|
|
|
};
|
2016-05-12 01:34:47 +00:00
|
|
|
|
2016-07-03 17:23:03 +00:00
|
|
|
static bool szexp_lookup(void *priv, struct bstr var, float size[2])
|
|
|
|
{
|
|
|
|
struct szexp_ctx *ctx = priv;
|
|
|
|
struct gl_video *p = ctx->p;
|
|
|
|
|
2016-07-03 12:06:41 +00:00
|
|
|
if (bstr_equals0(var, "NATIVE_CROPPED")) {
|
|
|
|
size[0] = (p->src_rect.x1 - p->src_rect.x0) * p->texture_offset.m[0][0];
|
|
|
|
size[1] = (p->src_rect.y1 - p->src_rect.y0) * p->texture_offset.m[1][1];
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-07-03 17:23:03 +00:00
|
|
|
// The size of OUTPUT is determined. It could be useful for certain
|
|
|
|
// user shaders to skip passes.
|
|
|
|
if (bstr_equals0(var, "OUTPUT")) {
|
|
|
|
size[0] = p->dst_rect.x1 - p->dst_rect.x0;
|
|
|
|
size[1] = p->dst_rect.y1 - p->dst_rect.y0;
|
|
|
|
return true;
|
|
|
|
}
|
2016-05-12 01:34:47 +00:00
|
|
|
|
2016-07-03 17:23:03 +00:00
|
|
|
// HOOKED is a special case
|
|
|
|
if (bstr_equals0(var, "HOOKED")) {
|
2017-09-20 08:45:33 +00:00
|
|
|
size[0] = ctx->img.w;
|
|
|
|
size[1] = ctx->img.h;
|
2016-07-03 17:23:03 +00:00
|
|
|
return true;
|
2016-05-12 01:34:47 +00:00
|
|
|
}
|
|
|
|
|
2017-09-27 21:38:54 +00:00
|
|
|
for (int o = 0; o < p->num_saved_imgs; o++) {
|
|
|
|
if (bstr_equals0(var, p->saved_imgs[o].name)) {
|
|
|
|
size[0] = p->saved_imgs[o].img.w;
|
|
|
|
size[1] = p->saved_imgs[o].img.h;
|
2016-07-03 17:23:03 +00:00
|
|
|
return true;
|
|
|
|
}
|
2016-05-12 01:34:47 +00:00
|
|
|
}
|
|
|
|
|
2016-07-03 17:23:03 +00:00
|
|
|
return false;
|
2016-06-08 13:05:28 +00:00
|
|
|
}
|
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
static bool user_hook_cond(struct gl_video *p, struct image img, void *priv)
|
2016-06-08 13:05:28 +00:00
|
|
|
{
|
2017-07-10 23:59:21 +00:00
|
|
|
struct gl_user_shader_hook *shader = priv;
|
2016-06-08 13:05:28 +00:00
|
|
|
assert(shader);
|
|
|
|
|
|
|
|
float res = false;
|
2017-09-20 08:45:33 +00:00
|
|
|
struct szexp_ctx ctx = {p, img};
|
|
|
|
eval_szexpr(p->log, &ctx, szexp_lookup, shader->cond, &res);
|
2016-06-08 13:05:28 +00:00
|
|
|
return res;
|
2016-05-12 01:34:47 +00:00
|
|
|
}
|
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
static void user_hook(struct gl_video *p, struct image img,
|
2016-04-20 23:33:13 +00:00
|
|
|
struct gl_transform *trans, void *priv)
|
|
|
|
{
|
2017-07-10 23:59:21 +00:00
|
|
|
struct gl_user_shader_hook *shader = priv;
|
2016-04-20 23:33:13 +00:00
|
|
|
assert(shader);
|
2017-07-29 18:41:50 +00:00
|
|
|
load_shader(p, shader->pass_body);
|
2016-04-20 23:33:13 +00:00
|
|
|
|
2017-07-10 20:52:39 +00:00
|
|
|
pass_describe(p, "user shader: %.*s (%s)", BSTR_P(shader->pass_desc),
|
2017-09-20 08:45:33 +00:00
|
|
|
plane_names[img.type]);
|
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number
of changes and improvements:
1. mpv users can now introspect the vo_opengl passes, which is something
that has been requested multiple times.
2. performance data is now measured per-pass, which helps both
development and debugging.
3. since adding more passes is cheap, we can now report information for
more passes (e.g. the blit pass, and the osd pass). Note: we also
switch to nanosecond scale, to be able to measure these passes
better.
4. `--user-shaders` authors can now describe their own passes, helping
users both identify which user shaders are active at any given time
as well as helping shader authors identify performance issues.
5. the timing data per pass is now exported as a full list of samples,
so projects like Argon-/mpv-stats can immediately read out all of the
samples and render a graph without having to manually poll this
option constantly.
Due to gl_timer's design being complicated (directly reading performance
data would block, so we delay the actual read-back until the next _start
command), it's vital not to conflate different passes that might be
doing different things from one frame to another. To accomplish this,
the actual timers are stored as part of the gl_shader_cache's sc_entry,
which makes them unique for that exact shader.
Starting and stopping the time measurement is easy to unify with the
gl_sc architecture, because the existing API already relies on a
"generate, render, reset" flow, so we can just put timer_start and
timer_stop in sc_generate and sc_reset, respectively.
The ugliest thing about this code is that due to the need to keep pass
information relatively stable in between frames, we need to distinguish
between "new" and "redrawn" frames, which bloats the code somewhat and
also feels hacky and vo_opengl-specific. (But then again, this entire
thing is vo_opengl-specific)
2017-06-29 15:00:06 +00:00
|
|
|
|
2017-07-29 18:41:50 +00:00
|
|
|
if (shader->compute.active) {
|
|
|
|
p->pass_compute = shader->compute;
|
|
|
|
GLSLF("hook();\n");
|
|
|
|
} else {
|
|
|
|
GLSLF("color = hook();\n");
|
|
|
|
}
|
2016-04-20 23:33:13 +00:00
|
|
|
|
2016-06-08 13:05:28 +00:00
|
|
|
// Make sure we at least create a legal FBO on failure, since it's better
|
|
|
|
// to do this and display an error message than just crash OpenGL
|
|
|
|
float w = 1.0, h = 1.0;
|
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
eval_szexpr(p->log, &(struct szexp_ctx){p, img}, szexp_lookup, shader->width, &w);
|
|
|
|
eval_szexpr(p->log, &(struct szexp_ctx){p, img}, szexp_lookup, shader->height, &h);
|
2016-05-12 01:34:47 +00:00
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
*trans = (struct gl_transform){{{w / img.w, 0}, {0, h / img.h}}};
|
2016-05-12 01:34:47 +00:00
|
|
|
gl_transform_trans(shader->offset, trans);
|
2016-04-20 23:33:13 +00:00
|
|
|
}
|
|
|
|
|
2017-07-10 23:59:21 +00:00
|
|
|
static bool add_user_hook(void *priv, struct gl_user_shader_hook hook)
|
|
|
|
{
|
|
|
|
struct gl_video *p = priv;
|
|
|
|
struct gl_user_shader_hook *copy = talloc_ptrtype(p, copy);
|
|
|
|
*copy = hook;
|
|
|
|
|
|
|
|
struct tex_hook texhook = {
|
|
|
|
.save_tex = bstrdup0(copy, hook.save_tex),
|
|
|
|
.components = hook.components,
|
|
|
|
.hook = user_hook,
|
|
|
|
.cond = user_hook_cond,
|
|
|
|
.priv = copy,
|
|
|
|
};
|
|
|
|
|
|
|
|
for (int h = 0; h < SHADER_MAX_HOOKS; h++)
|
|
|
|
texhook.hook_tex[h] = bstrdup0(copy, hook.hook_tex[h]);
|
|
|
|
for (int h = 0; h < SHADER_MAX_BINDS; h++)
|
|
|
|
texhook.bind_tex[h] = bstrdup0(copy, hook.bind_tex[h]);
|
|
|
|
|
2017-09-27 21:38:54 +00:00
|
|
|
MP_TARRAY_APPEND(p, p->tex_hooks, p->num_tex_hooks, texhook);
|
|
|
|
return true;
|
2017-07-10 23:59:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static bool add_user_tex(void *priv, struct gl_user_shader_tex tex)
|
|
|
|
{
|
|
|
|
struct gl_video *p = priv;
|
|
|
|
|
2017-08-03 14:08:18 +00:00
|
|
|
tex.tex = ra_tex_create(p->ra, &tex.params);
|
|
|
|
TA_FREEP(&tex.params.initial_data);
|
2017-07-10 23:59:21 +00:00
|
|
|
|
2017-09-27 21:38:54 +00:00
|
|
|
if (!tex.tex)
|
|
|
|
return false;
|
2017-07-10 23:59:21 +00:00
|
|
|
|
2017-09-27 21:38:54 +00:00
|
|
|
MP_TARRAY_APPEND(p, p->user_textures, p->num_user_textures, tex);
|
|
|
|
return true;
|
2017-07-10 23:59:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void load_user_shaders(struct gl_video *p, char **shaders)
|
2016-04-16 16:14:32 +00:00
|
|
|
{
|
2016-04-20 23:33:13 +00:00
|
|
|
if (!shaders)
|
|
|
|
return;
|
|
|
|
|
|
|
|
for (int n = 0; shaders[n] != NULL; n++) {
|
|
|
|
struct bstr file = load_cached_file(p, shaders[n]);
|
2017-08-03 14:08:18 +00:00
|
|
|
parse_user_shader(p->log, p->ra, file, p, add_user_hook, add_user_tex);
|
2016-04-20 23:33:13 +00:00
|
|
|
}
|
|
|
|
}
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
|
2016-04-20 23:33:13 +00:00
|
|
|
static void gl_video_setup_hooks(struct gl_video *p)
|
|
|
|
{
|
2016-05-18 15:47:10 +00:00
|
|
|
gl_video_reset_hooks(p);
|
|
|
|
|
2016-04-16 16:14:32 +00:00
|
|
|
if (p->opts.deband) {
|
2017-09-27 21:38:54 +00:00
|
|
|
MP_TARRAY_APPEND(p, p->tex_hooks, p->num_tex_hooks, (struct tex_hook) {
|
2017-07-10 20:52:39 +00:00
|
|
|
.hook_tex = {"LUMA", "CHROMA", "RGB", "XYZ"},
|
|
|
|
.bind_tex = {"HOOKED"},
|
|
|
|
.hook = deband_hook,
|
|
|
|
});
|
2016-04-16 16:14:32 +00:00
|
|
|
}
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
|
2016-04-19 18:45:40 +00:00
|
|
|
if (p->opts.unsharp != 0.0) {
|
2017-09-27 21:38:54 +00:00
|
|
|
MP_TARRAY_APPEND(p, p->tex_hooks, p->num_tex_hooks, (struct tex_hook) {
|
2017-07-10 20:52:39 +00:00
|
|
|
.hook_tex = {"MAIN"},
|
2016-04-19 18:45:40 +00:00
|
|
|
.bind_tex = {"HOOKED"},
|
|
|
|
.hook = unsharp_hook,
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2017-07-10 23:59:21 +00:00
|
|
|
load_user_shaders(p, p->opts.user_shaders);
|
2016-04-16 16:14:32 +00:00
|
|
|
}
|
2016-01-26 19:47:32 +00:00
|
|
|
|
2016-04-16 16:14:32 +00:00
|
|
|
// sample from video textures, set "color" variable to yuv value
|
|
|
|
static void pass_read_video(struct gl_video *p)
|
|
|
|
{
|
2017-09-20 08:45:33 +00:00
|
|
|
struct image img[4];
|
2016-04-16 16:14:32 +00:00
|
|
|
struct gl_transform offsets[4];
|
2017-09-20 08:45:33 +00:00
|
|
|
pass_get_images(p, &p->image, img, offsets);
|
2016-04-16 16:14:32 +00:00
|
|
|
|
|
|
|
// To keep the code as simple as possibly, we currently run all shader
|
|
|
|
// stages even if they would be unnecessary (e.g. no hooks for a texture).
|
2017-09-20 08:45:33 +00:00
|
|
|
// In the future, deferred image should optimize this away.
|
2016-04-16 16:14:32 +00:00
|
|
|
|
|
|
|
// Merge semantically identical textures. This loop is done from back
|
|
|
|
// to front so that merged textures end up in the right order while
|
|
|
|
// simultaneously allowing us to skip unnecessary merges
|
|
|
|
for (int n = 3; n >= 0; n--) {
|
2017-09-20 08:45:33 +00:00
|
|
|
if (img[n].type == PLANE_NONE)
|
2016-04-16 16:14:32 +00:00
|
|
|
continue;
|
2016-01-26 19:47:32 +00:00
|
|
|
|
2016-04-16 16:14:32 +00:00
|
|
|
int first = n;
|
|
|
|
int num = 0;
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
|
2016-04-16 16:14:32 +00:00
|
|
|
for (int i = 0; i < n; i++) {
|
2017-09-20 08:45:33 +00:00
|
|
|
if (image_equiv(img[n], img[i]) &&
|
2016-04-16 16:14:32 +00:00
|
|
|
gl_transform_eq(offsets[n], offsets[i]))
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
{
|
2016-04-16 16:14:32 +00:00
|
|
|
GLSLF("// merging plane %d ...\n", i);
|
2017-09-20 08:45:33 +00:00
|
|
|
copy_image(p, &num, img[i]);
|
2016-04-16 16:14:32 +00:00
|
|
|
first = MPMIN(first, i);
|
2017-09-20 08:45:33 +00:00
|
|
|
img[i] = (struct image){0};
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
}
|
2015-09-05 15:39:27 +00:00
|
|
|
}
|
2015-03-27 12:27:40 +00:00
|
|
|
|
2016-04-16 16:14:32 +00:00
|
|
|
if (num > 0) {
|
|
|
|
GLSLF("// merging plane %d ... into %d\n", n, first);
|
2017-09-20 08:45:33 +00:00
|
|
|
copy_image(p, &num, img[n]);
|
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number
of changes and improvements:
1. mpv users can now introspect the vo_opengl passes, which is something
that has been requested multiple times.
2. performance data is now measured per-pass, which helps both
development and debugging.
3. since adding more passes is cheap, we can now report information for
more passes (e.g. the blit pass, and the osd pass). Note: we also
switch to nanosecond scale, to be able to measure these passes
better.
4. `--user-shaders` authors can now describe their own passes, helping
users both identify which user shaders are active at any given time
as well as helping shader authors identify performance issues.
5. the timing data per pass is now exported as a full list of samples,
so projects like Argon-/mpv-stats can immediately read out all of the
samples and render a graph without having to manually poll this
option constantly.
Due to gl_timer's design being complicated (directly reading performance
data would block, so we delay the actual read-back until the next _start
command), it's vital not to conflate different passes that might be
doing different things from one frame to another. To accomplish this,
the actual timers are stored as part of the gl_shader_cache's sc_entry,
which makes them unique for that exact shader.
Starting and stopping the time measurement is easy to unify with the
gl_sc architecture, because the existing API already relies on a
"generate, render, reset" flow, so we can just put timer_start and
timer_stop in sc_generate and sc_reset, respectively.
The ugliest thing about this code is that due to the need to keep pass
information relatively stable in between frames, we need to distinguish
between "new" and "redrawn" frames, which bloats the code somewhat and
also feels hacky and vo_opengl-specific. (But then again, this entire
thing is vo_opengl-specific)
2017-06-29 15:00:06 +00:00
|
|
|
pass_describe(p, "merging planes");
|
2017-09-20 08:45:33 +00:00
|
|
|
finish_pass_tex(p, &p->merge_tex[n], img[n].w, img[n].h);
|
|
|
|
img[first] = image_wrap(p->merge_tex[n], img[n].type, num);
|
|
|
|
img[n] = (struct image){0};
|
2015-09-05 15:39:27 +00:00
|
|
|
}
|
2016-04-16 16:14:32 +00:00
|
|
|
}
|
2015-04-12 01:34:38 +00:00
|
|
|
|
2016-04-16 16:14:32 +00:00
|
|
|
// If any textures are still in integer format by this point, we need
|
|
|
|
// to introduce an explicit conversion pass to avoid breaking hooks/scaling
|
|
|
|
for (int n = 0; n < 4; n++) {
|
2017-09-20 08:45:33 +00:00
|
|
|
if (img[n].tex && img[n].tex->params.format->ctype == RA_CTYPE_UINT) {
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
GLSLF("// use_integer fix for plane %d\n", n);
|
2017-09-20 08:45:33 +00:00
|
|
|
copy_image(p, &(int){0}, img[n]);
|
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number
of changes and improvements:
1. mpv users can now introspect the vo_opengl passes, which is something
that has been requested multiple times.
2. performance data is now measured per-pass, which helps both
development and debugging.
3. since adding more passes is cheap, we can now report information for
more passes (e.g. the blit pass, and the osd pass). Note: we also
switch to nanosecond scale, to be able to measure these passes
better.
4. `--user-shaders` authors can now describe their own passes, helping
users both identify which user shaders are active at any given time
as well as helping shader authors identify performance issues.
5. the timing data per pass is now exported as a full list of samples,
so projects like Argon-/mpv-stats can immediately read out all of the
samples and render a graph without having to manually poll this
option constantly.
Due to gl_timer's design being complicated (directly reading performance
data would block, so we delay the actual read-back until the next _start
command), it's vital not to conflate different passes that might be
doing different things from one frame to another. To accomplish this,
the actual timers are stored as part of the gl_shader_cache's sc_entry,
which makes them unique for that exact shader.
Starting and stopping the time measurement is easy to unify with the
gl_sc architecture, because the existing API already relies on a
"generate, render, reset" flow, so we can just put timer_start and
timer_stop in sc_generate and sc_reset, respectively.
The ugliest thing about this code is that due to the need to keep pass
information relatively stable in between frames, we need to distinguish
between "new" and "redrawn" frames, which bloats the code somewhat and
also feels hacky and vo_opengl-specific. (But then again, this entire
thing is vo_opengl-specific)
2017-06-29 15:00:06 +00:00
|
|
|
pass_describe(p, "use_integer fix");
|
2017-09-20 08:45:33 +00:00
|
|
|
finish_pass_tex(p, &p->integer_tex[n], img[n].w, img[n].h);
|
|
|
|
img[n] = image_wrap(p->integer_tex[n], img[n].type,
|
|
|
|
img[n].components);
|
2015-03-27 12:27:40 +00:00
|
|
|
}
|
2016-04-16 16:14:32 +00:00
|
|
|
}
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
|
2016-04-16 16:14:32 +00:00
|
|
|
// Dispatch the hooks for all of these textures, saving and perhaps
|
|
|
|
// modifying them in the process
|
|
|
|
for (int n = 0; n < 4; n++) {
|
|
|
|
const char *name;
|
2017-09-20 08:45:33 +00:00
|
|
|
switch (img[n].type) {
|
2016-04-16 16:14:32 +00:00
|
|
|
case PLANE_RGB: name = "RGB"; break;
|
|
|
|
case PLANE_LUMA: name = "LUMA"; break;
|
|
|
|
case PLANE_CHROMA: name = "CHROMA"; break;
|
|
|
|
case PLANE_ALPHA: name = "ALPHA"; break;
|
|
|
|
case PLANE_XYZ: name = "XYZ"; break;
|
|
|
|
default: continue;
|
|
|
|
}
|
2015-03-27 12:27:40 +00:00
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
img[n] = pass_hook(p, name, img[n], &offsets[n]);
|
2016-04-16 16:14:32 +00:00
|
|
|
}
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
|
2016-04-16 16:14:32 +00:00
|
|
|
// At this point all planes are finalized but they may not be at the
|
|
|
|
// required size yet. Furthermore, they may have texture offsets that
|
|
|
|
// require realignment. For lack of something better to do, we assume
|
|
|
|
// the rgb/luma texture is the "reference" and scale everything else
|
|
|
|
// to match.
|
|
|
|
for (int n = 0; n < 4; n++) {
|
2017-09-20 08:45:33 +00:00
|
|
|
switch (img[n].type) {
|
2016-04-16 16:14:32 +00:00
|
|
|
case PLANE_RGB:
|
|
|
|
case PLANE_XYZ:
|
|
|
|
case PLANE_LUMA: break;
|
|
|
|
default: continue;
|
2015-10-26 22:43:48 +00:00
|
|
|
}
|
2015-03-27 12:27:40 +00:00
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
p->texture_w = img[n].w;
|
|
|
|
p->texture_h = img[n].h;
|
2016-04-16 16:14:32 +00:00
|
|
|
p->texture_offset = offsets[n];
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Compute the reference rect
|
|
|
|
struct mp_rect_f src = {0.0, 0.0, p->image_params.w, p->image_params.h};
|
|
|
|
struct mp_rect_f ref = src;
|
|
|
|
gl_transform_rect(p->texture_offset, &ref);
|
|
|
|
|
|
|
|
// Explicitly scale all of the textures that don't match
|
|
|
|
for (int n = 0; n < 4; n++) {
|
2017-09-20 08:45:33 +00:00
|
|
|
if (img[n].type == PLANE_NONE)
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
continue;
|
2015-10-26 22:43:48 +00:00
|
|
|
|
2016-04-16 16:14:32 +00:00
|
|
|
// If the planes are aligned identically, we will end up with the
|
|
|
|
// exact same source rectangle.
|
|
|
|
struct mp_rect_f rect = src;
|
|
|
|
gl_transform_rect(offsets[n], &rect);
|
|
|
|
if (mp_rect_f_seq(ref, rect))
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
continue;
|
2016-04-16 16:14:32 +00:00
|
|
|
|
|
|
|
// If the rectangles differ, then our planes have a different
|
|
|
|
// alignment and/or size. First of all, we have to compute the
|
|
|
|
// corrections required to meet the target rectangle
|
|
|
|
struct gl_transform fix = {
|
|
|
|
.m = {{(ref.x1 - ref.x0) / (rect.x1 - rect.x0), 0.0},
|
|
|
|
{0.0, (ref.y1 - ref.y0) / (rect.y1 - rect.y0)}},
|
|
|
|
.t = {ref.x0, ref.y0},
|
|
|
|
};
|
|
|
|
|
|
|
|
// Since the scale in texture space is different from the scale in
|
|
|
|
// absolute terms, we have to scale the coefficients down to be
|
|
|
|
// relative to the texture's physical dimensions and local offset
|
|
|
|
struct gl_transform scale = {
|
2017-09-20 08:45:33 +00:00
|
|
|
.m = {{(float)img[n].w / p->texture_w, 0.0},
|
|
|
|
{0.0, (float)img[n].h / p->texture_h}},
|
2016-04-16 16:14:32 +00:00
|
|
|
.t = {-rect.x0, -rect.y0},
|
|
|
|
};
|
2016-12-28 01:46:55 +00:00
|
|
|
if (p->image_params.rotate % 180 == 90)
|
|
|
|
MPSWAP(double, scale.m[0][0], scale.m[1][1]);
|
|
|
|
|
2016-04-16 16:14:32 +00:00
|
|
|
gl_transform_trans(scale, &fix);
|
|
|
|
|
|
|
|
// Since the texture transform is a function of the texture coordinates
|
|
|
|
// to texture space, rather than the other way around, we have to
|
|
|
|
// actually apply the *inverse* of this. Fortunately, calculating
|
|
|
|
// the inverse is relatively easy here.
|
|
|
|
fix.m[0][0] = 1.0 / fix.m[0][0];
|
|
|
|
fix.m[1][1] = 1.0 / fix.m[1][1];
|
|
|
|
fix.t[0] = fix.m[0][0] * -fix.t[0];
|
|
|
|
fix.t[1] = fix.m[1][1] * -fix.t[1];
|
2017-09-20 08:45:33 +00:00
|
|
|
gl_transform_trans(fix, &img[n].transform);
|
2016-04-16 16:14:32 +00:00
|
|
|
|
|
|
|
int scaler_id = -1;
|
|
|
|
const char *name = NULL;
|
2017-09-20 08:45:33 +00:00
|
|
|
switch (img[n].type) {
|
2016-04-16 16:14:32 +00:00
|
|
|
case PLANE_RGB:
|
|
|
|
case PLANE_LUMA:
|
|
|
|
case PLANE_XYZ:
|
|
|
|
scaler_id = SCALER_SCALE;
|
|
|
|
// these aren't worth hooking, fringe hypothetical cases only
|
|
|
|
break;
|
|
|
|
case PLANE_CHROMA:
|
|
|
|
scaler_id = SCALER_CSCALE;
|
|
|
|
name = "CHROMA_SCALED";
|
|
|
|
break;
|
|
|
|
case PLANE_ALPHA:
|
|
|
|
// alpha always uses bilinear
|
|
|
|
name = "ALPHA_SCALED";
|
2015-10-26 22:43:48 +00:00
|
|
|
}
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
|
2016-04-16 16:14:32 +00:00
|
|
|
if (scaler_id < 0)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
const struct scaler_config *conf = &p->opts.scaler[scaler_id];
|
|
|
|
struct scaler *scaler = &p->scaler[scaler_id];
|
|
|
|
|
|
|
|
// bilinear scaling is a free no-op thanks to GPU sampling
|
|
|
|
if (strcmp(conf->kernel.name, "bilinear") != 0) {
|
|
|
|
GLSLF("// upscaling plane %d\n", n);
|
2017-09-20 08:45:33 +00:00
|
|
|
pass_sample(p, img[n], scaler, conf, 1.0, p->texture_w, p->texture_h);
|
|
|
|
finish_pass_tex(p, &p->scale_tex[n], p->texture_w, p->texture_h);
|
|
|
|
img[n] = image_wrap(p->scale_tex[n], img[n].type, img[n].components);
|
2016-04-16 16:14:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Run any post-scaling hooks
|
2017-09-20 08:45:33 +00:00
|
|
|
img[n] = pass_hook(p, name, img[n], NULL);
|
2015-03-27 12:27:40 +00:00
|
|
|
}
|
2015-10-26 22:43:48 +00:00
|
|
|
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
// All planes are of the same size and properly aligned at this point
|
2017-09-16 01:28:38 +00:00
|
|
|
pass_describe(p, "combining planes");
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
int coord = 0;
|
|
|
|
for (int i = 0; i < 4; i++) {
|
2017-09-20 08:45:33 +00:00
|
|
|
if (img[i].type != PLANE_NONE)
|
|
|
|
copy_image(p, &coord, img[i]);
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
}
|
2016-03-05 11:38:51 +00:00
|
|
|
p->components = coord;
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
}
|
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
// Utility function that simply binds a texture and reads from it, without any
|
2017-07-04 23:31:31 +00:00
|
|
|
// transformations.
|
2017-09-20 08:45:33 +00:00
|
|
|
static void pass_read_tex(struct gl_video *p, struct ra_tex *tex)
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
{
|
2017-09-20 08:45:33 +00:00
|
|
|
struct image img = image_wrap(tex, PLANE_RGB, p->components);
|
|
|
|
copy_image(p, &(int){0}, img);
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// yuv conversion, and any other conversions before main up/down-scaling
|
|
|
|
static void pass_convert_yuv(struct gl_video *p)
|
|
|
|
{
|
|
|
|
struct gl_shader_cache *sc = p->sc;
|
vo_opengl: greatly increase smoothmotion performance
Instead of rendering and upscaling each video frame on every vsync, this
version of the algorithm only draws them once and caches the result,
so the only operation that has to run on every vsync is a cheap linear
interpolation, plus CMS/dithering.
On my machine, this is a huge speedup for 24 Hz content (on a 60 Hz
monitor), up to 120% faster. (The speedup is not quite 250% because of
the overhead that the larger FBOs and CMS provides)
In terms of the implementation, this commit basically swaps
interpolation and upscaling - upscaling is moved to inter_program, and
interpolation is moved to the final_program.
Furthermore, the main bulk of the frame rendering logic (upscaling etc.)
was moved to a separete function, which is called from
gl_video_interpolate_frame only if it's actually necessarily, and
skipped otherwise.
2015-02-20 21:12:02 +00:00
|
|
|
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
struct mp_csp_params cparams = MP_CSP_PARAMS_DEFAULTS;
|
2017-06-30 15:13:58 +00:00
|
|
|
cparams.gray = p->is_gray;
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
mp_csp_set_image_params(&cparams, &p->image_params);
|
video: redo video equalizer option handling
I really wouldn't care much about this, but some parts of the core code
are under HAVE_GPL, so there's some need to get rid of it. Simply turn
the video equalizer from its current fine-grained handling with vf/vo
fallbacks into global options. This makes updating them much simpler.
This removes any possibility of applying video equalizers in filters,
which affects vf_scale, and the previously removed vf_eq. Not a big
loss, since the preferred VOs have this builtin.
Remove video equalizer handling from vo_direct3d, vo_sdl, vo_vaapi, and
vo_xv. I'm not going to waste my time on these legacy VOs.
vo.eq_opts_cache exists _only_ to send a VOCTRL_SET_EQUALIZER, which
exists _only_ to trigger a redraw. This seems silly, but for now I feel
like this is less of a pain. The rest of the equalizer using code is
self-updating.
See commit 96b906a51d5 for how some video equalizer code was GPL only.
Some command line option names and ranges can probably be traced back to
a GPL only committer, but we don't consider these copyrightable.
2017-08-22 15:01:35 +00:00
|
|
|
mp_csp_equalizer_state_get(p->video_eq, &cparams);
|
2015-03-23 01:42:19 +00:00
|
|
|
p->user_gamma = 1.0 / (cparams.gamma * p->opts.gamma);
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
|
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number
of changes and improvements:
1. mpv users can now introspect the vo_opengl passes, which is something
that has been requested multiple times.
2. performance data is now measured per-pass, which helps both
development and debugging.
3. since adding more passes is cheap, we can now report information for
more passes (e.g. the blit pass, and the osd pass). Note: we also
switch to nanosecond scale, to be able to measure these passes
better.
4. `--user-shaders` authors can now describe their own passes, helping
users both identify which user shaders are active at any given time
as well as helping shader authors identify performance issues.
5. the timing data per pass is now exported as a full list of samples,
so projects like Argon-/mpv-stats can immediately read out all of the
samples and render a graph without having to manually poll this
option constantly.
Due to gl_timer's design being complicated (directly reading performance
data would block, so we delay the actual read-back until the next _start
command), it's vital not to conflate different passes that might be
doing different things from one frame to another. To accomplish this,
the actual timers are stored as part of the gl_shader_cache's sc_entry,
which makes them unique for that exact shader.
Starting and stopping the time measurement is easy to unify with the
gl_sc architecture, because the existing API already relies on a
"generate, render, reset" flow, so we can just put timer_start and
timer_stop in sc_generate and sc_reset, respectively.
The ugliest thing about this code is that due to the need to keep pass
information relatively stable in between frames, we need to distinguish
between "new" and "redrawn" frames, which bloats the code somewhat and
also feels hacky and vo_opengl-specific. (But then again, this entire
thing is vo_opengl-specific)
2017-06-29 15:00:06 +00:00
|
|
|
pass_describe(p, "color conversion");
|
vo_opengl: greatly increase smoothmotion performance
Instead of rendering and upscaling each video frame on every vsync, this
version of the algorithm only draws them once and caches the result,
so the only operation that has to run on every vsync is a cheap linear
interpolation, plus CMS/dithering.
On my machine, this is a huge speedup for 24 Hz content (on a 60 Hz
monitor), up to 120% faster. (The speedup is not quite 250% because of
the overhead that the larger FBOs and CMS provides)
In terms of the implementation, this commit basically swaps
interpolation and upscaling - upscaling is moved to inter_program, and
interpolation is moved to the final_program.
Furthermore, the main bulk of the frame rendering logic (upscaling etc.)
was moved to a separete function, which is called from
gl_video_interpolate_frame only if it's actually necessarily, and
skipped otherwise.
2015-02-20 21:12:02 +00:00
|
|
|
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
if (p->color_swizzle[0])
|
|
|
|
GLSLF("color = color.%s;\n", p->color_swizzle);
|
|
|
|
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
// Pre-colormatrix input gamma correction
|
2016-06-29 07:16:13 +00:00
|
|
|
if (cparams.color.space == MP_CSP_XYZ)
|
2015-12-09 16:10:38 +00:00
|
|
|
GLSL(color.rgb = pow(color.rgb, vec3(2.6));) // linear light
|
vo_opengl: greatly increase smoothmotion performance
Instead of rendering and upscaling each video frame on every vsync, this
version of the algorithm only draws them once and caches the result,
so the only operation that has to run on every vsync is a cheap linear
interpolation, plus CMS/dithering.
On my machine, this is a huge speedup for 24 Hz content (on a 60 Hz
monitor), up to 120% faster. (The speedup is not quite 250% because of
the overhead that the larger FBOs and CMS provides)
In terms of the implementation, this commit basically swaps
interpolation and upscaling - upscaling is moved to inter_program, and
interpolation is moved to the final_program.
Furthermore, the main bulk of the frame rendering logic (upscaling etc.)
was moved to a separete function, which is called from
gl_video_interpolate_frame only if it's actually necessarily, and
skipped otherwise.
2015-02-20 21:12:02 +00:00
|
|
|
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
// We always explicitly normalize the range in pass_read_video
|
|
|
|
cparams.input_bits = cparams.texture_bits = 0;
|
2015-03-27 12:27:40 +00:00
|
|
|
|
2015-12-07 22:45:41 +00:00
|
|
|
// Conversion to RGB. For RGB itself, this still applies e.g. brightness
|
|
|
|
// and contrast controls, or expansion of e.g. LSB-packed 10 bit data.
|
|
|
|
struct mp_cmat m = {{{0}}};
|
2015-12-08 23:22:12 +00:00
|
|
|
mp_get_csp_matrix(&cparams, &m);
|
2015-12-07 22:45:41 +00:00
|
|
|
gl_sc_uniform_mat3(sc, "colormatrix", true, &m.m[0][0]);
|
|
|
|
gl_sc_uniform_vec3(sc, "colormatrix_c", m.c);
|
|
|
|
|
|
|
|
GLSL(color.rgb = mat3(colormatrix) * color.rgb + colormatrix_c;)
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
|
2016-06-29 07:16:13 +00:00
|
|
|
if (p->image_params.color.space == MP_CSP_BT_2020_C) {
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
// Conversion for C'rcY'cC'bc via the BT.2020 CL system:
|
|
|
|
// C'bc = (B'-Y'c) / 1.9404 | C'bc <= 0
|
|
|
|
// = (B'-Y'c) / 1.5816 | C'bc > 0
|
|
|
|
//
|
|
|
|
// C'rc = (R'-Y'c) / 1.7184 | C'rc <= 0
|
|
|
|
// = (R'-Y'c) / 0.9936 | C'rc > 0
|
|
|
|
//
|
|
|
|
// as per the BT.2020 specification, table 4. This is a non-linear
|
|
|
|
// transformation because (constant) luminance receives non-equal
|
|
|
|
// contributions from the three different channels.
|
|
|
|
GLSLF("// constant luminance conversion\n");
|
|
|
|
GLSL(color.br = color.br * mix(vec2(1.5816, 0.9936),
|
|
|
|
vec2(1.9404, 1.7184),
|
|
|
|
lessThanEqual(color.br, vec2(0)))
|
|
|
|
+ color.gg;)
|
|
|
|
// Expand channels to camera-linear light. This shader currently just
|
|
|
|
// assumes everything uses the BT.2020 12-bit gamma function, since the
|
|
|
|
// difference between 10 and 12-bit is negligible for anything other
|
|
|
|
// than 12-bit content.
|
2017-07-17 03:23:55 +00:00
|
|
|
GLSL(color.rgb = mix(color.rgb * vec3(1.0/4.5),
|
|
|
|
pow((color.rgb + vec3(0.0993))*vec3(1.0/1.0993),
|
|
|
|
vec3(1.0/0.45)),
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
lessThanEqual(vec3(0.08145), color.rgb));)
|
|
|
|
// Calculate the green channel from the expanded RYcB
|
|
|
|
// The BT.2020 specification says Yc = 0.2627*R + 0.6780*G + 0.0593*B
|
2017-07-17 03:23:55 +00:00
|
|
|
GLSL(color.g = (color.g - 0.2627*color.r - 0.0593*color.b)*1.0/0.6780;)
|
2015-03-14 02:04:23 +00:00
|
|
|
// Recompress to receive the R'G'B' result, same as other systems
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
GLSL(color.rgb = mix(color.rgb * vec3(4.5),
|
|
|
|
vec3(1.0993) * pow(color.rgb, vec3(0.45)) - vec3(0.0993),
|
|
|
|
lessThanEqual(vec3(0.0181), color.rgb));)
|
|
|
|
}
|
|
|
|
|
2016-03-05 11:38:51 +00:00
|
|
|
p->components = 3;
|
2016-05-15 22:14:02 +00:00
|
|
|
if (!p->has_alpha || p->opts.alpha_mode == ALPHA_NO) {
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
GLSL(color.a = 1.0;)
|
2016-03-05 11:38:51 +00:00
|
|
|
} else { // alpha present in image
|
|
|
|
p->components = 4;
|
2016-03-29 19:56:38 +00:00
|
|
|
GLSL(color = vec4(color.rgb * color.a, color.a);)
|
2015-03-13 20:29:04 +00:00
|
|
|
}
|
vo_opengl: greatly increase smoothmotion performance
Instead of rendering and upscaling each video frame on every vsync, this
version of the algorithm only draws them once and caches the result,
so the only operation that has to run on every vsync is a cheap linear
interpolation, plus CMS/dithering.
On my machine, this is a huge speedup for 24 Hz content (on a 60 Hz
monitor), up to 120% faster. (The speedup is not quite 250% because of
the overhead that the larger FBOs and CMS provides)
In terms of the implementation, this commit basically swaps
interpolation and upscaling - upscaling is moved to inter_program, and
interpolation is moved to the final_program.
Furthermore, the main bulk of the frame rendering logic (upscaling etc.)
was moved to a separete function, which is called from
gl_video_interpolate_frame only if it's actually necessarily, and
skipped otherwise.
2015-02-20 21:12:02 +00:00
|
|
|
}
|
|
|
|
|
2016-04-08 20:21:31 +00:00
|
|
|
static void get_scale_factors(struct gl_video *p, bool transpose_rot, double xy[2])
|
2014-11-23 19:06:05 +00:00
|
|
|
{
|
2016-03-28 14:30:48 +00:00
|
|
|
double target_w = p->src_rect.x1 - p->src_rect.x0;
|
|
|
|
double target_h = p->src_rect.y1 - p->src_rect.y0;
|
2016-04-08 20:21:31 +00:00
|
|
|
if (transpose_rot && p->image_params.rotate % 180 == 90)
|
2016-03-28 14:30:48 +00:00
|
|
|
MPSWAP(double, target_w, target_h);
|
|
|
|
xy[0] = (p->dst_rect.x1 - p->dst_rect.x0) / target_w;
|
|
|
|
xy[1] = (p->dst_rect.y1 - p->dst_rect.y0) / target_h;
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
}
|
|
|
|
|
2016-04-08 20:21:31 +00:00
|
|
|
// Cropping.
|
2016-03-28 14:30:48 +00:00
|
|
|
static void compute_src_transform(struct gl_video *p, struct gl_transform *tr)
|
2015-09-07 19:02:49 +00:00
|
|
|
{
|
2015-10-23 17:52:03 +00:00
|
|
|
float sx = (p->src_rect.x1 - p->src_rect.x0) / (float)p->texture_w,
|
|
|
|
sy = (p->src_rect.y1 - p->src_rect.y0) / (float)p->texture_h,
|
2015-09-07 19:02:49 +00:00
|
|
|
ox = p->src_rect.x0,
|
|
|
|
oy = p->src_rect.y0;
|
2016-03-28 14:30:48 +00:00
|
|
|
struct gl_transform transform = {{{sx, 0}, {0, sy}}, {ox, oy}};
|
|
|
|
|
2015-10-26 22:43:48 +00:00
|
|
|
gl_transform_trans(p->texture_offset, &transform);
|
|
|
|
|
2015-09-07 19:02:49 +00:00
|
|
|
*tr = transform;
|
|
|
|
}
|
|
|
|
|
2015-03-15 21:52:34 +00:00
|
|
|
// Takes care of the main scaling and pre/post-conversions
|
|
|
|
static void pass_scale_main(struct gl_video *p)
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
{
|
|
|
|
// Figure out the main scaler.
|
|
|
|
double xy[2];
|
2016-04-08 20:21:31 +00:00
|
|
|
get_scale_factors(p, true, xy);
|
2015-10-26 22:43:48 +00:00
|
|
|
|
|
|
|
// actual scale factor should be divided by the scale factor of prescaling.
|
|
|
|
xy[0] /= p->texture_offset.m[0][0];
|
|
|
|
xy[1] /= p->texture_offset.m[1][1];
|
|
|
|
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
bool downscaling = xy[0] < 1.0 || xy[1] < 1.0;
|
|
|
|
bool upscaling = !downscaling && (xy[0] > 1.0 || xy[1] > 1.0);
|
|
|
|
double scale_factor = 1.0;
|
|
|
|
|
2016-03-05 08:42:57 +00:00
|
|
|
struct scaler *scaler = &p->scaler[SCALER_SCALE];
|
|
|
|
struct scaler_config scaler_conf = p->opts.scaler[SCALER_SCALE];
|
2015-10-26 22:43:48 +00:00
|
|
|
if (p->opts.scaler_resizes_only && !downscaling && !upscaling) {
|
vo_opengl: refactor scaler configuration
This merges all of the scaler-related options into a single
configuration struct, and also cleans up the way they're passed through
the code. (For example, the scaler index is no longer threaded through
pass_sample, just the scaler configuration itself, and there's no longer
duplication of the params etc.)
In addition, this commit makes scale-down more principled, and turns it
into a scaler in its own right - so there's no longer an ugly separation
between scale and scale-down in the code.
Finally, the radius stuff has been made more proper - filters always
have a radius now (there's no more radius -1), and get a new .resizable
attribute instead for when it's tunable.
User-visible changes:
1. scale-down has been renamed dscale and now has its own set of config
options (dscale-param1, dscale-radius) etc., instead of reusing
scale-param1 (which was arguably a bug).
2. The default radius is no longer fixed at 3, but instead uses that
filter's preferred radius by default. (Scalers with a default radius
other than 3 include sinc, gaussian, box and triangle)
3. scale-radius etc. now goes down to 0.5, rather than 1.0. 0.5 is the
smallest radius that theoretically makes sense, and indeed it's used
by at least one filter (nearest).
Apart from that, it should just be internal changes only.
Note that this sets up for the refactor discussed in #1720, which would
be to merge scaler and window configurations (include parameters etc.)
into a single, simplified string. In the code, this would now basically
just mean getting rid of all the OPT_FLOATRANGE etc. lines related to
scalers and replacing them by a single function that parses a string and
updates the struct scaler_config as appropriate.
2015-03-26 00:55:32 +00:00
|
|
|
scaler_conf.kernel.name = "bilinear";
|
2016-04-17 11:07:14 +00:00
|
|
|
// For scaler-resizes-only, we round the texture offset to
|
|
|
|
// the nearest round value in order to prevent ugly blurriness
|
|
|
|
// (in exchange for slightly shifting the image by up to half a
|
|
|
|
// subpixel)
|
|
|
|
p->texture_offset.t[0] = roundf(p->texture_offset.t[0]);
|
|
|
|
p->texture_offset.t[1] = roundf(p->texture_offset.t[1]);
|
2015-10-26 22:43:48 +00:00
|
|
|
}
|
2016-03-05 08:42:57 +00:00
|
|
|
if (downscaling && p->opts.scaler[SCALER_DSCALE].kernel.name) {
|
|
|
|
scaler_conf = p->opts.scaler[SCALER_DSCALE];
|
|
|
|
scaler = &p->scaler[SCALER_DSCALE];
|
vo_opengl: refactor scaler configuration
This merges all of the scaler-related options into a single
configuration struct, and also cleans up the way they're passed through
the code. (For example, the scaler index is no longer threaded through
pass_sample, just the scaler configuration itself, and there's no longer
duplication of the params etc.)
In addition, this commit makes scale-down more principled, and turns it
into a scaler in its own right - so there's no longer an ugly separation
between scale and scale-down in the code.
Finally, the radius stuff has been made more proper - filters always
have a radius now (there's no more radius -1), and get a new .resizable
attribute instead for when it's tunable.
User-visible changes:
1. scale-down has been renamed dscale and now has its own set of config
options (dscale-param1, dscale-radius) etc., instead of reusing
scale-param1 (which was arguably a bug).
2. The default radius is no longer fixed at 3, but instead uses that
filter's preferred radius by default. (Scalers with a default radius
other than 3 include sinc, gaussian, box and triangle)
3. scale-radius etc. now goes down to 0.5, rather than 1.0. 0.5 is the
smallest radius that theoretically makes sense, and indeed it's used
by at least one filter (nearest).
Apart from that, it should just be internal changes only.
Note that this sets up for the refactor discussed in #1720, which would
be to merge scaler and window configurations (include parameters etc.)
into a single, simplified string. In the code, this would now basically
just mean getting rid of all the OPT_FLOATRANGE etc. lines related to
scalers and replacing them by a single function that parses a string and
updates the struct scaler_config as appropriate.
2015-03-26 00:55:32 +00:00
|
|
|
}
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
|
2015-11-07 16:49:14 +00:00
|
|
|
// When requesting correct-downscaling and the clip is anamorphic, and
|
|
|
|
// because only a single scale factor is used for both axes, enable it only
|
2015-08-10 00:57:53 +00:00
|
|
|
// when both axes are downscaled, and use the milder of the factors to not
|
|
|
|
// end up with too much blur on one axis (even if we end up with sub-optimal
|
2015-11-07 16:49:14 +00:00
|
|
|
// scale factor on the other axis). This is better than not respecting
|
|
|
|
// correct scaling at all for anamorphic clips.
|
2015-08-10 00:57:53 +00:00
|
|
|
double f = MPMAX(xy[0], xy[1]);
|
2015-11-07 16:49:14 +00:00
|
|
|
if (p->opts.correct_downscaling && f < 1.0)
|
2015-08-10 00:57:53 +00:00
|
|
|
scale_factor = 1.0 / f;
|
vo_opengl: greatly increase smoothmotion performance
Instead of rendering and upscaling each video frame on every vsync, this
version of the algorithm only draws them once and caches the result,
so the only operation that has to run on every vsync is a cheap linear
interpolation, plus CMS/dithering.
On my machine, this is a huge speedup for 24 Hz content (on a 60 Hz
monitor), up to 120% faster. (The speedup is not quite 250% because of
the overhead that the larger FBOs and CMS provides)
In terms of the implementation, this commit basically swaps
interpolation and upscaling - upscaling is moved to inter_program, and
interpolation is moved to the final_program.
Furthermore, the main bulk of the frame rendering logic (upscaling etc.)
was moved to a separete function, which is called from
gl_video_interpolate_frame only if it's actually necessarily, and
skipped otherwise.
2015-02-20 21:12:02 +00:00
|
|
|
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
// Pre-conversion, like linear light/sigmoidization
|
|
|
|
GLSLF("// scaler pre-conversion\n");
|
2017-07-24 21:22:30 +00:00
|
|
|
bool use_linear = p->opts.linear_scaling || p->opts.sigmoid_upscaling;
|
|
|
|
|
|
|
|
// Linear light downscaling results in nasty artifacts for HDR curves due
|
|
|
|
// to the potentially extreme brightness differences severely compounding
|
|
|
|
// any ringing. So just scale in gamma light instead.
|
|
|
|
if (mp_trc_is_hdr(p->image_params.color.gamma) && downscaling)
|
|
|
|
use_linear = false;
|
|
|
|
|
|
|
|
if (use_linear) {
|
|
|
|
p->use_linear = true;
|
2016-06-29 07:16:13 +00:00
|
|
|
pass_linearize(p->sc, p->image_params.color.gamma);
|
2016-04-19 18:45:40 +00:00
|
|
|
pass_opt_hook_point(p, "LINEAR", NULL);
|
|
|
|
}
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
|
2017-07-24 21:22:30 +00:00
|
|
|
bool use_sigmoid = use_linear && p->opts.sigmoid_upscaling && upscaling;
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
float sig_center, sig_slope, sig_offset, sig_scale;
|
|
|
|
if (use_sigmoid) {
|
|
|
|
// Coefficients for the sigmoidal transform are taken from the
|
|
|
|
// formula here: http://www.imagemagick.org/Usage/color_mods/#sigmoidal
|
|
|
|
sig_center = p->opts.sigmoid_center;
|
|
|
|
sig_slope = p->opts.sigmoid_slope;
|
|
|
|
// This function needs to go through (0,0) and (1,1) so we compute the
|
|
|
|
// values at 1 and 0, and then scale/shift them, respectively.
|
|
|
|
sig_offset = 1.0/(1+expf(sig_slope * sig_center));
|
|
|
|
sig_scale = 1.0/(1+expf(sig_slope * (sig_center-1))) - sig_offset;
|
2017-07-17 03:23:55 +00:00
|
|
|
GLSLF("color.rgb = %f - log(1.0/(color.rgb * %f + %f) - 1.0) * 1.0/%f;\n",
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
sig_center, sig_scale, sig_offset, sig_slope);
|
2016-04-19 18:45:40 +00:00
|
|
|
pass_opt_hook_point(p, "SIGMOID", NULL);
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
}
|
|
|
|
|
2016-04-19 18:45:40 +00:00
|
|
|
pass_opt_hook_point(p, "PREKERNEL", NULL);
|
|
|
|
|
2016-03-28 14:30:48 +00:00
|
|
|
int vp_w = p->dst_rect.x1 - p->dst_rect.x0;
|
|
|
|
int vp_h = p->dst_rect.y1 - p->dst_rect.y0;
|
2015-09-07 19:02:49 +00:00
|
|
|
struct gl_transform transform;
|
2016-03-28 14:30:48 +00:00
|
|
|
compute_src_transform(p, &transform);
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
GLSLF("// main scaling\n");
|
2017-09-20 08:45:33 +00:00
|
|
|
finish_pass_tex(p, &p->indirect_tex, p->texture_w, p->texture_h);
|
|
|
|
struct image src = image_wrap(p->indirect_tex, PLANE_RGB, p->components);
|
2016-04-16 16:14:32 +00:00
|
|
|
gl_transform_trans(transform, &src.transform);
|
2016-03-05 11:38:51 +00:00
|
|
|
pass_sample(p, src, scaler, &scaler_conf, scale_factor, vp_w, vp_h);
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
|
2015-10-23 17:52:03 +00:00
|
|
|
// Changes the texture size to display size after main scaler.
|
|
|
|
p->texture_w = vp_w;
|
|
|
|
p->texture_h = vp_h;
|
|
|
|
|
2016-04-19 18:45:40 +00:00
|
|
|
pass_opt_hook_point(p, "POSTKERNEL", NULL);
|
|
|
|
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
GLSLF("// scaler post-conversion\n");
|
|
|
|
if (use_sigmoid) {
|
|
|
|
// Inverse of the transformation above
|
2017-07-17 03:23:55 +00:00
|
|
|
GLSLF("color.rgb = (1.0/(1.0 + exp(%f * (%f - color.rgb))) - %f) * 1.0/%f;\n",
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
sig_slope, sig_center, sig_offset, sig_scale);
|
|
|
|
}
|
2015-03-15 21:52:34 +00:00
|
|
|
}
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
|
2016-06-29 07:28:17 +00:00
|
|
|
// Adapts the colors to the right output color space. (Final pass during
|
|
|
|
// rendering)
|
|
|
|
// If OSD is true, ignore any changes that may have been made to the video
|
|
|
|
// by previous passes (i.e. linear scaling)
|
|
|
|
static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool osd)
|
|
|
|
{
|
2017-08-05 20:29:48 +00:00
|
|
|
struct ra *ra = p->ra;
|
2017-07-17 19:39:06 +00:00
|
|
|
|
2016-06-29 07:28:17 +00:00
|
|
|
// Figure out the target color space from the options, or auto-guess if
|
|
|
|
// none were set
|
|
|
|
struct mp_colorspace dst = {
|
|
|
|
.gamma = p->opts.target_trc,
|
|
|
|
.primaries = p->opts.target_prim,
|
2017-06-14 18:06:56 +00:00
|
|
|
.light = MP_CSP_LIGHT_DISPLAY,
|
2018-02-14 15:10:51 +00:00
|
|
|
.sig_peak = p->opts.target_peak / MP_REF_WHITE,
|
2016-06-29 07:28:17 +00:00
|
|
|
};
|
2015-03-23 01:42:19 +00:00
|
|
|
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
if (p->use_lut_3d) {
|
2016-06-29 07:28:17 +00:00
|
|
|
// The 3DLUT is always generated against the video's original source
|
|
|
|
// space, *not* the reference space. (To avoid having to regenerate
|
|
|
|
// the 3DLUT for the OSD on every frame)
|
2016-06-29 07:16:13 +00:00
|
|
|
enum mp_csp_prim prim_orig = p->image_params.color.primaries;
|
|
|
|
enum mp_csp_trc trc_orig = p->image_params.color.gamma;
|
2016-02-13 14:33:00 +00:00
|
|
|
|
2016-06-26 17:04:36 +00:00
|
|
|
// One exception: HDR is not implemented by LittleCMS for technical
|
|
|
|
// limitation reasons, so we use a gamma 2.2 input curve here instead.
|
|
|
|
// We could pick any value we want here, the difference is just coding
|
|
|
|
// efficiency.
|
2017-06-13 15:09:02 +00:00
|
|
|
if (mp_trc_is_hdr(trc_orig))
|
vo_opengl: implement HDR (SMPTE ST2084)
Currently, this relies on the user manually entering their display
brightness (since we have no way to detect this at runtime or from ICC
metadata). The default value of 250 was picked by looking at ~10 reviews
on tftcentral.co.uk and realizing they all come with around 250 cd/m^2
out of the box. (In addition, ITU-R Rec. BT.2022 supports this)
Since there is no metadata in FFmpeg to indicate usage of this TRC, the
only way to actually play HDR content currently is to set
``--vf=format=gamma=st2084``. (It could be guessed based on SEI, but
this is not implemented yet)
Incidentally, since SEI is ignored, it's currently assumed that all
content is scaled to 10,000 cd/m^2 (and hard-clipped where out of
range). I don't see this assumption changing much, though.
As an unfortunate consequence of the fact that we don't know the display
brightness, mixed with the fact that LittleCMS' parametric tone curves
are not flexible enough to support PQ, we have to build the 3DLUT
against gamma 2.2 if it's used. This might be a good thing, though,
consdering the PQ source space is probably not fantastic for
interpolation either way.
Partially addresses #2572.
2016-05-15 18:16:12 +00:00
|
|
|
trc_orig = MP_CSP_TRC_GAMMA22;
|
|
|
|
|
2016-02-13 14:33:00 +00:00
|
|
|
if (gl_video_get_lut3d(p, prim_orig, trc_orig)) {
|
2016-06-29 07:28:17 +00:00
|
|
|
dst.primaries = prim_orig;
|
|
|
|
dst.gamma = trc_orig;
|
2018-02-14 13:14:48 +00:00
|
|
|
assert(dst.primaries && dst.gamma);
|
2016-02-13 14:33:00 +00:00
|
|
|
}
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
}
|
|
|
|
|
2016-06-29 07:28:17 +00:00
|
|
|
if (dst.primaries == MP_CSP_PRIM_AUTO) {
|
2016-06-28 11:55:10 +00:00
|
|
|
// The vast majority of people are on sRGB or BT.709 displays, so pick
|
|
|
|
// this as the default output color space.
|
2016-06-29 07:28:17 +00:00
|
|
|
dst.primaries = MP_CSP_PRIM_BT_709;
|
2016-06-07 09:31:22 +00:00
|
|
|
|
2017-06-10 12:01:25 +00:00
|
|
|
if (src.primaries == MP_CSP_PRIM_BT_601_525 ||
|
|
|
|
src.primaries == MP_CSP_PRIM_BT_601_625)
|
2016-06-27 23:18:55 +00:00
|
|
|
{
|
2016-06-28 11:55:10 +00:00
|
|
|
// Since we auto-pick BT.601 and BT.709 based on the dimensions,
|
|
|
|
// combined with the fact that they're very similar to begin with,
|
|
|
|
// and to avoid confusing the average user, just don't adapt BT.601
|
|
|
|
// content automatically at all.
|
2017-06-10 12:01:25 +00:00
|
|
|
dst.primaries = src.primaries;
|
2016-06-27 23:18:55 +00:00
|
|
|
}
|
2016-06-07 09:31:22 +00:00
|
|
|
}
|
|
|
|
|
2016-06-29 07:28:17 +00:00
|
|
|
if (dst.gamma == MP_CSP_TRC_AUTO) {
|
2016-06-28 12:28:32 +00:00
|
|
|
// Most people seem to complain when the image is darker or brighter
|
|
|
|
// than what they're "used to", so just avoid changing the gamma
|
|
|
|
// altogether by default. The only exceptions to this rule apply to
|
|
|
|
// very unusual TRCs, which even hardcode technoluddites would probably
|
|
|
|
// not enjoy viewing unaltered.
|
2017-06-10 12:01:25 +00:00
|
|
|
dst.gamma = src.gamma;
|
2016-06-27 23:18:55 +00:00
|
|
|
|
2016-06-28 12:28:32 +00:00
|
|
|
// Avoid outputting linear light or HDR content "by default". For these
|
|
|
|
// just pick gamma 2.2 as a default, since it's a good estimate for
|
|
|
|
// the response of typical displays
|
2016-06-29 07:28:17 +00:00
|
|
|
if (dst.gamma == MP_CSP_TRC_LINEAR || mp_trc_is_hdr(dst.gamma))
|
|
|
|
dst.gamma = MP_CSP_TRC_GAMMA22;
|
2016-05-30 17:56:58 +00:00
|
|
|
}
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
|
2018-02-14 15:10:51 +00:00
|
|
|
// If there's no specific signal peak known for the output display, infer
|
|
|
|
// it from the chosen transfer function
|
|
|
|
if (!dst.sig_peak)
|
|
|
|
dst.sig_peak = mp_trc_nom_peak(dst.gamma);
|
2018-02-14 13:14:48 +00:00
|
|
|
|
2018-02-03 13:45:01 +00:00
|
|
|
bool detect_peak = p->opts.compute_hdr_peak >= 0 && mp_trc_is_hdr(src.gamma);
|
2017-08-05 20:29:48 +00:00
|
|
|
if (detect_peak && !p->hdr_peak_ssbo) {
|
|
|
|
struct {
|
vo_gpu: refactor HDR peak detection algorithm
The major changes are as follows:
1. Use `uint32_t` instead of `unsigned int` for the SSBO size
calculation. This doesn't really matter, since a too-big buffer will
still work just fine, but since `uint` is a 32-bit integer by
definition this is the correct way to do it.
2. Pre-divide the frame_sum by the num_wg immediately at the end of a
frame. This change was made to prevent overflow. At 4K screen size,
this code is currently already very at risk of overflow, especially
once I started playing with longer averaging sizes. Pre-dividing this
out makes it just about fit into 32-bit even for worst-case PQ
content. (It's technically also faster and easier this way, so I
should have done it to begin with). Rename `frame_sum` to `frame_avg`
to clearly signal the change in semantics.
3. Implement a scene transition detection algorithm. This basically
compares the current frame's average brightness against the
(averaged) value of the past frames. If it exceeds a threshold, which
I experimentally configured, we reset the peak detection SSBO's state
immediately - so that it just contains the current frame. This
prevents annoying "eye adaptation"-like effects on scene transitions.
4. As a result of the previous change, we can now use a much larger
buffer size by default, which results in a more stable and less
flickery result. I experimented with values between 20 and 256 and
settled on the new value of 64. (I also switched to a power-of-2
array size, because I like powers of two)
2018-02-10 21:42:11 +00:00
|
|
|
uint32_t counter;
|
|
|
|
uint32_t frame_idx;
|
|
|
|
uint32_t frame_num;
|
|
|
|
uint32_t frame_max[PEAK_DETECT_FRAMES+1];
|
|
|
|
uint32_t frame_sum[PEAK_DETECT_FRAMES+1];
|
|
|
|
uint32_t total_max;
|
|
|
|
uint32_t total_sum;
|
2017-08-05 20:29:48 +00:00
|
|
|
} peak_ssbo = {0};
|
|
|
|
|
|
|
|
struct ra_buf_params params = {
|
|
|
|
.type = RA_BUF_TYPE_SHADER_STORAGE,
|
|
|
|
.size = sizeof(peak_ssbo),
|
|
|
|
.initial_data = &peak_ssbo,
|
|
|
|
};
|
2017-07-17 19:39:06 +00:00
|
|
|
|
2017-08-05 20:29:48 +00:00
|
|
|
p->hdr_peak_ssbo = ra_buf_create(ra, ¶ms);
|
2017-07-17 19:39:06 +00:00
|
|
|
if (!p->hdr_peak_ssbo) {
|
2017-08-05 20:29:48 +00:00
|
|
|
MP_WARN(p, "Failed to create HDR peak detection SSBO, disabling.\n");
|
2018-02-03 13:45:01 +00:00
|
|
|
detect_peak = false;
|
|
|
|
p->opts.compute_hdr_peak = -1;
|
2017-07-17 19:39:06 +00:00
|
|
|
}
|
2017-08-05 20:29:48 +00:00
|
|
|
}
|
2017-07-17 19:39:06 +00:00
|
|
|
|
2017-08-05 20:29:48 +00:00
|
|
|
if (detect_peak) {
|
|
|
|
pass_describe(p, "detect HDR peak");
|
|
|
|
pass_is_compute(p, 8, 8); // 8x8 is good for performance
|
2017-07-17 19:39:06 +00:00
|
|
|
gl_sc_ssbo(p->sc, "PeakDetect", p->hdr_peak_ssbo,
|
2018-02-03 13:45:01 +00:00
|
|
|
"uint counter;"
|
|
|
|
"uint frame_idx;"
|
|
|
|
"uint frame_num;"
|
|
|
|
"uint frame_max[%d];"
|
vo_gpu: refactor HDR peak detection algorithm
The major changes are as follows:
1. Use `uint32_t` instead of `unsigned int` for the SSBO size
calculation. This doesn't really matter, since a too-big buffer will
still work just fine, but since `uint` is a 32-bit integer by
definition this is the correct way to do it.
2. Pre-divide the frame_sum by the num_wg immediately at the end of a
frame. This change was made to prevent overflow. At 4K screen size,
this code is currently already very at risk of overflow, especially
once I started playing with longer averaging sizes. Pre-dividing this
out makes it just about fit into 32-bit even for worst-case PQ
content. (It's technically also faster and easier this way, so I
should have done it to begin with). Rename `frame_sum` to `frame_avg`
to clearly signal the change in semantics.
3. Implement a scene transition detection algorithm. This basically
compares the current frame's average brightness against the
(averaged) value of the past frames. If it exceeds a threshold, which
I experimentally configured, we reset the peak detection SSBO's state
immediately - so that it just contains the current frame. This
prevents annoying "eye adaptation"-like effects on scene transitions.
4. As a result of the previous change, we can now use a much larger
buffer size by default, which results in a more stable and less
flickery result. I experimented with values between 20 and 256 and
settled on the new value of 64. (I also switched to a power-of-2
array size, because I like powers of two)
2018-02-10 21:42:11 +00:00
|
|
|
"uint frame_avg[%d];"
|
2018-02-03 13:45:01 +00:00
|
|
|
"uint total_max;"
|
vo_gpu: refactor HDR peak detection algorithm
The major changes are as follows:
1. Use `uint32_t` instead of `unsigned int` for the SSBO size
calculation. This doesn't really matter, since a too-big buffer will
still work just fine, but since `uint` is a 32-bit integer by
definition this is the correct way to do it.
2. Pre-divide the frame_sum by the num_wg immediately at the end of a
frame. This change was made to prevent overflow. At 4K screen size,
this code is currently already very at risk of overflow, especially
once I started playing with longer averaging sizes. Pre-dividing this
out makes it just about fit into 32-bit even for worst-case PQ
content. (It's technically also faster and easier this way, so I
should have done it to begin with). Rename `frame_sum` to `frame_avg`
to clearly signal the change in semantics.
3. Implement a scene transition detection algorithm. This basically
compares the current frame's average brightness against the
(averaged) value of the past frames. If it exceeds a threshold, which
I experimentally configured, we reset the peak detection SSBO's state
immediately - so that it just contains the current frame. This
prevents annoying "eye adaptation"-like effects on scene transitions.
4. As a result of the previous change, we can now use a much larger
buffer size by default, which results in a more stable and less
flickery result. I experimented with values between 20 and 256 and
settled on the new value of 64. (I also switched to a power-of-2
array size, because I like powers of two)
2018-02-10 21:42:11 +00:00
|
|
|
"uint total_avg;",
|
2018-02-03 13:45:01 +00:00
|
|
|
PEAK_DETECT_FRAMES + 1,
|
|
|
|
PEAK_DETECT_FRAMES + 1
|
2017-07-17 19:39:06 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2016-06-29 07:28:17 +00:00
|
|
|
// Adapt from src to dst as necessary
|
2017-08-03 10:46:57 +00:00
|
|
|
pass_color_map(p->sc, src, dst, p->opts.tone_mapping,
|
2017-07-06 03:43:00 +00:00
|
|
|
p->opts.tone_mapping_param, p->opts.tone_mapping_desat,
|
2017-09-10 16:18:31 +00:00
|
|
|
detect_peak, p->opts.gamut_warning, p->use_linear && !osd);
|
2016-02-13 14:33:00 +00:00
|
|
|
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
if (p->use_lut_3d) {
|
2017-07-29 19:22:11 +00:00
|
|
|
gl_sc_uniform_texture(p->sc, "lut_3d", p->lut_3d_texture);
|
2016-07-25 08:18:41 +00:00
|
|
|
GLSL(vec3 cpos;)
|
|
|
|
for (int i = 0; i < 3; i++)
|
|
|
|
GLSLF("cpos[%d] = LUT_POS(color[%d], %d.0);\n", i, i, p->lut_3d_size[i]);
|
2017-08-20 02:27:42 +00:00
|
|
|
GLSL(color.rgb = tex3D(lut_3d, cpos).rgb;)
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
}
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
}
|
2014-11-23 19:06:05 +00:00
|
|
|
|
2017-08-07 17:14:18 +00:00
|
|
|
void gl_video_set_fb_depth(struct gl_video *p, int fb_depth)
|
|
|
|
{
|
|
|
|
p->fb_depth = fb_depth;
|
|
|
|
}
|
|
|
|
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
static void pass_dither(struct gl_video *p)
|
|
|
|
{
|
|
|
|
// Assume 8 bits per component if unknown.
|
2017-08-07 17:14:18 +00:00
|
|
|
int dst_depth = p->fb_depth > 0 ? p->fb_depth : 8;
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
if (p->opts.dither_depth > 0)
|
|
|
|
dst_depth = p->opts.dither_depth;
|
|
|
|
|
2016-05-15 22:14:02 +00:00
|
|
|
if (p->opts.dither_depth < 0 || p->opts.dither_algo == DITHER_NONE)
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
return;
|
|
|
|
|
|
|
|
if (!p->dither_texture) {
|
|
|
|
MP_VERBOSE(p, "Dither to %d.\n", dst_depth);
|
|
|
|
|
2017-06-19 20:46:04 +00:00
|
|
|
int tex_size = 0;
|
|
|
|
void *tex_data = NULL;
|
2017-07-29 18:14:48 +00:00
|
|
|
const struct ra_format *fmt = NULL;
|
2017-07-29 18:12:43 +00:00
|
|
|
void *temp = NULL;
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
|
2016-05-15 22:14:02 +00:00
|
|
|
if (p->opts.dither_algo == DITHER_FRUIT) {
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
int sizeb = p->opts.dither_size;
|
|
|
|
int size = 1 << sizeb;
|
|
|
|
|
|
|
|
if (p->last_dither_matrix_size != size) {
|
|
|
|
p->last_dither_matrix = talloc_realloc(p, p->last_dither_matrix,
|
|
|
|
float, size * size);
|
|
|
|
mp_make_fruit_dither_matrix(p->last_dither_matrix, sizeb);
|
|
|
|
p->last_dither_matrix_size = size;
|
|
|
|
}
|
|
|
|
|
2015-12-08 22:22:08 +00:00
|
|
|
// Prefer R16 texture since they provide higher precision.
|
2017-07-29 18:14:48 +00:00
|
|
|
fmt = ra_find_unorm_format(p->ra, 2, 1);
|
2017-07-29 18:12:43 +00:00
|
|
|
if (!fmt)
|
2017-07-29 18:14:48 +00:00
|
|
|
fmt = ra_find_float16_format(p->ra, 1);
|
2016-05-12 18:08:49 +00:00
|
|
|
if (fmt) {
|
2017-06-17 11:54:21 +00:00
|
|
|
tex_size = size;
|
|
|
|
tex_data = p->last_dither_matrix;
|
2017-07-29 18:14:48 +00:00
|
|
|
if (fmt->ctype == RA_CTYPE_UNORM) {
|
2017-07-29 18:12:43 +00:00
|
|
|
uint16_t *t = temp = talloc_array(NULL, uint16_t, size * size);
|
|
|
|
for (int n = 0; n < size * size; n++)
|
|
|
|
t[n] = p->last_dither_matrix[n] * UINT16_MAX;
|
|
|
|
tex_data = t;
|
|
|
|
}
|
2017-06-17 11:54:21 +00:00
|
|
|
} else {
|
|
|
|
MP_VERBOSE(p, "GL too old. Falling back to ordered dither.\n");
|
|
|
|
p->opts.dither_algo = DITHER_ORDERED;
|
2015-12-08 22:22:08 +00:00
|
|
|
}
|
2017-06-17 11:54:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (p->opts.dither_algo == DITHER_ORDERED) {
|
2017-07-29 18:12:43 +00:00
|
|
|
temp = talloc_array(NULL, char, 8 * 8);
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
mp_make_ordered_dither_matrix(temp, 8);
|
|
|
|
|
2017-07-29 18:14:48 +00:00
|
|
|
fmt = ra_find_unorm_format(p->ra, 1, 1);
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
tex_size = 8;
|
|
|
|
tex_data = temp;
|
|
|
|
}
|
|
|
|
|
2017-07-29 18:14:48 +00:00
|
|
|
struct ra_tex_params params = {
|
|
|
|
.dimensions = 2,
|
|
|
|
.w = tex_size,
|
|
|
|
.h = tex_size,
|
|
|
|
.d = 1,
|
|
|
|
.format = fmt,
|
|
|
|
.render_src = true,
|
|
|
|
.src_repeat = true,
|
|
|
|
.initial_data = tex_data,
|
|
|
|
};
|
|
|
|
p->dither_texture = ra_tex_create(p->ra, ¶ms);
|
2015-02-19 13:03:18 +00:00
|
|
|
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
debug_check_gl(p, "dither setup");
|
2017-07-29 18:12:43 +00:00
|
|
|
|
|
|
|
talloc_free(temp);
|
vo_opengl: greatly increase smoothmotion performance
Instead of rendering and upscaling each video frame on every vsync, this
version of the algorithm only draws them once and caches the result,
so the only operation that has to run on every vsync is a cheap linear
interpolation, plus CMS/dithering.
On my machine, this is a huge speedup for 24 Hz content (on a 60 Hz
monitor), up to 120% faster. (The speedup is not quite 250% because of
the overhead that the larger FBOs and CMS provides)
In terms of the implementation, this commit basically swaps
interpolation and upscaling - upscaling is moved to inter_program, and
interpolation is moved to the final_program.
Furthermore, the main bulk of the frame rendering logic (upscaling etc.)
was moved to a separete function, which is called from
gl_video_interpolate_frame only if it's actually necessarily, and
skipped otherwise.
2015-02-20 21:12:02 +00:00
|
|
|
}
|
|
|
|
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
GLSLF("// dithering\n");
|
|
|
|
|
|
|
|
// This defines how many bits are considered significant for output on
|
|
|
|
// screen. The superfluous bits will be used for rounding according to the
|
|
|
|
// dither matrix. The precision of the source implicitly decides how many
|
|
|
|
// dither patterns can be visible.
|
2015-03-16 19:22:09 +00:00
|
|
|
int dither_quantization = (1 << dst_depth) - 1;
|
2017-07-29 18:14:48 +00:00
|
|
|
int dither_size = p->dither_texture->params.w;
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
|
2017-07-29 18:14:48 +00:00
|
|
|
gl_sc_uniform_texture(p->sc, "dither", p->dither_texture);
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
|
2017-07-29 18:14:48 +00:00
|
|
|
GLSLF("vec2 dither_pos = gl_FragCoord.xy * 1.0/%d.0;\n", dither_size);
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
|
|
|
|
if (p->opts.temporal_dither) {
|
2015-07-20 17:09:22 +00:00
|
|
|
int phase = (p->frames_rendered / p->opts.temporal_dither_period) % 8u;
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
float r = phase * (M_PI / 2); // rotate
|
|
|
|
float m = phase < 4 ? 1 : -1; // mirror
|
|
|
|
|
|
|
|
float matrix[2][2] = {{cos(r), -sin(r) },
|
|
|
|
{sin(r) * m, cos(r) * m}};
|
2017-09-17 08:55:43 +00:00
|
|
|
gl_sc_uniform_dynamic(p->sc);
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
gl_sc_uniform_mat2(p->sc, "dither_trafo", true, &matrix[0][0]);
|
|
|
|
|
|
|
|
GLSL(dither_pos = dither_trafo * dither_pos;)
|
2015-02-19 13:03:18 +00:00
|
|
|
}
|
|
|
|
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
GLSL(float dither_value = texture(dither, dither_pos).r;)
|
2017-07-17 03:23:55 +00:00
|
|
|
GLSLF("color = floor(color * %d.0 + dither_value + 0.5 / %d.0) * 1.0/%d.0;\n",
|
2017-07-29 18:14:48 +00:00
|
|
|
dither_quantization, dither_size * dither_size, dither_quantization);
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
}
|
|
|
|
|
2015-03-29 04:34:34 +00:00
|
|
|
// Draws the OSD, in scene-referred colors.. If cms is true, subtitles are
|
|
|
|
// instead adapted to the display's gamut.
|
2015-03-23 01:42:19 +00:00
|
|
|
static void pass_draw_osd(struct gl_video *p, int draw_flags, double pts,
|
2017-09-20 08:45:33 +00:00
|
|
|
struct mp_osd_res rect, struct ra_fbo fbo, bool cms)
|
2015-03-23 01:42:19 +00:00
|
|
|
{
|
2018-02-08 00:55:31 +00:00
|
|
|
if ((draw_flags & OSD_DRAW_SUB_ONLY) && (draw_flags & OSD_DRAW_OSD_ONLY))
|
|
|
|
return;
|
|
|
|
|
2015-03-23 01:42:19 +00:00
|
|
|
mpgl_osd_generate(p->osd, rect, pts, p->image_params.stereo_out, draw_flags);
|
|
|
|
|
2017-08-05 16:59:28 +00:00
|
|
|
timer_pool_start(p->osd_timer);
|
2015-03-23 01:42:19 +00:00
|
|
|
for (int n = 0; n < MAX_OSD_PARTS; n++) {
|
2017-07-22 19:53:50 +00:00
|
|
|
// (This returns false if this part is empty with nothing to draw.)
|
|
|
|
if (!mpgl_osd_draw_prepare(p->osd, n, p->sc))
|
2015-03-23 01:42:19 +00:00
|
|
|
continue;
|
2016-06-29 07:28:17 +00:00
|
|
|
// When subtitles need to be color managed, assume they're in sRGB
|
|
|
|
// (for lack of anything saner to do)
|
2016-05-30 17:56:58 +00:00
|
|
|
if (cms) {
|
2016-06-29 07:28:17 +00:00
|
|
|
static const struct mp_colorspace csp_srgb = {
|
|
|
|
.primaries = MP_CSP_PRIM_BT_709,
|
|
|
|
.gamma = MP_CSP_TRC_SRGB,
|
2017-06-14 18:06:56 +00:00
|
|
|
.light = MP_CSP_LIGHT_DISPLAY,
|
2016-06-29 07:28:17 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
pass_colormanage(p, csp_srgb, true);
|
2016-05-30 17:56:58 +00:00
|
|
|
}
|
2017-09-20 08:45:33 +00:00
|
|
|
mpgl_osd_draw_finish(p->osd, n, p->sc, fbo);
|
2015-03-23 01:42:19 +00:00
|
|
|
}
|
2017-08-05 16:59:28 +00:00
|
|
|
|
|
|
|
timer_pool_stop(p->osd_timer);
|
|
|
|
pass_describe(p, "drawing osd");
|
|
|
|
pass_record(p, timer_pool_measure(p->osd_timer));
|
2015-03-23 01:42:19 +00:00
|
|
|
}
|
|
|
|
|
2017-06-30 15:13:58 +00:00
|
|
|
static float chroma_realign(int size, int pixel)
|
2016-09-30 20:14:47 +00:00
|
|
|
{
|
2017-06-30 15:13:58 +00:00
|
|
|
return size / (float)chroma_upsize(size, pixel);
|
2016-09-30 20:14:47 +00:00
|
|
|
}
|
|
|
|
|
vo_opengl: restore single pass optimization as separate code path
The single path optimization, rendering the video in one shader pass and
without FBO indirections, was removed soem commits ago. It didn't have a
place in this code, and caused considerable complexity and maintenance
issues.
On the other hand, it still has some worth, such as for use with
extremely crappy hardware (GLES only or OpenGL 2.1 without FBO
extension). Ideally, these use cases would be handled by a separate VO
(say, vo_gles). While cleaner, this would still cause code duplication
and other complexity.
The third option is making the single-pass optimization a completely
separate code path, with most vo_opengl features disabled. While this
does duplicate some functionality (such as "unpacking" the video data
from textures), it's also relatively unintrusive, and the high quality
code path doesn't need to take it into account at all. On another
positive node, this "dumb-mode" could be forced in other cases where
OpenGL 2.1 is not enough, and where we don't want to care about versions
this old.
2015-09-07 19:09:06 +00:00
|
|
|
// Minimal rendering code path, for GLES or OpenGL 2.1 without proper FBOs.
|
2017-08-04 13:47:50 +00:00
|
|
|
static void pass_render_frame_dumb(struct gl_video *p)
|
vo_opengl: restore single pass optimization as separate code path
The single path optimization, rendering the video in one shader pass and
without FBO indirections, was removed soem commits ago. It didn't have a
place in this code, and caused considerable complexity and maintenance
issues.
On the other hand, it still has some worth, such as for use with
extremely crappy hardware (GLES only or OpenGL 2.1 without FBO
extension). Ideally, these use cases would be handled by a separate VO
(say, vo_gles). While cleaner, this would still cause code duplication
and other complexity.
The third option is making the single-pass optimization a completely
separate code path, with most vo_opengl features disabled. While this
does duplicate some functionality (such as "unpacking" the video data
from textures), it's also relatively unintrusive, and the high quality
code path doesn't need to take it into account at all. On another
positive node, this "dumb-mode" could be forced in other cases where
OpenGL 2.1 is not enough, and where we don't want to care about versions
this old.
2015-09-07 19:09:06 +00:00
|
|
|
{
|
2017-09-20 08:45:33 +00:00
|
|
|
struct image img[4];
|
2016-04-16 16:14:32 +00:00
|
|
|
struct gl_transform off[4];
|
2017-09-20 08:45:33 +00:00
|
|
|
pass_get_images(p, &p->image, img, off);
|
vo_opengl: restore single pass optimization as separate code path
The single path optimization, rendering the video in one shader pass and
without FBO indirections, was removed soem commits ago. It didn't have a
place in this code, and caused considerable complexity and maintenance
issues.
On the other hand, it still has some worth, such as for use with
extremely crappy hardware (GLES only or OpenGL 2.1 without FBO
extension). Ideally, these use cases would be handled by a separate VO
(say, vo_gles). While cleaner, this would still cause code duplication
and other complexity.
The third option is making the single-pass optimization a completely
separate code path, with most vo_opengl features disabled. While this
does duplicate some functionality (such as "unpacking" the video data
from textures), it's also relatively unintrusive, and the high quality
code path doesn't need to take it into account at all. On another
positive node, this "dumb-mode" could be forced in other cases where
OpenGL 2.1 is not enough, and where we don't want to care about versions
this old.
2015-09-07 19:09:06 +00:00
|
|
|
|
|
|
|
struct gl_transform transform;
|
2016-03-28 14:30:48 +00:00
|
|
|
compute_src_transform(p, &transform);
|
vo_opengl: restore single pass optimization as separate code path
The single path optimization, rendering the video in one shader pass and
without FBO indirections, was removed soem commits ago. It didn't have a
place in this code, and caused considerable complexity and maintenance
issues.
On the other hand, it still has some worth, such as for use with
extremely crappy hardware (GLES only or OpenGL 2.1 without FBO
extension). Ideally, these use cases would be handled by a separate VO
(say, vo_gles). While cleaner, this would still cause code duplication
and other complexity.
The third option is making the single-pass optimization a completely
separate code path, with most vo_opengl features disabled. While this
does duplicate some functionality (such as "unpacking" the video data
from textures), it's also relatively unintrusive, and the high quality
code path doesn't need to take it into account at all. On another
positive node, this "dumb-mode" could be forced in other cases where
OpenGL 2.1 is not enough, and where we don't want to care about versions
this old.
2015-09-07 19:09:06 +00:00
|
|
|
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
int index = 0;
|
|
|
|
for (int i = 0; i < p->plane_count; i++) {
|
2017-09-20 08:45:33 +00:00
|
|
|
int cw = img[i].type == PLANE_CHROMA ? p->ra_format.chroma_w : 1;
|
|
|
|
int ch = img[i].type == PLANE_CHROMA ? p->ra_format.chroma_h : 1;
|
2016-09-30 20:14:47 +00:00
|
|
|
if (p->image_params.rotate % 180 == 90)
|
2017-06-30 15:13:58 +00:00
|
|
|
MPSWAP(int, cw, ch);
|
2016-09-30 20:14:47 +00:00
|
|
|
|
|
|
|
struct gl_transform t = transform;
|
2017-06-30 15:13:58 +00:00
|
|
|
t.m[0][0] *= chroma_realign(p->texture_w, cw);
|
|
|
|
t.m[1][1] *= chroma_realign(p->texture_h, ch);
|
2016-09-30 20:14:47 +00:00
|
|
|
|
2017-06-30 15:13:58 +00:00
|
|
|
t.t[0] /= cw;
|
|
|
|
t.t[1] /= ch;
|
2016-09-30 20:14:47 +00:00
|
|
|
|
|
|
|
t.t[0] += off[i].t[0];
|
|
|
|
t.t[1] += off[i].t[1];
|
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
gl_transform_trans(img[i].transform, &t);
|
|
|
|
img[i].transform = t;
|
2016-09-30 20:14:47 +00:00
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
copy_image(p, &index, img[i]);
|
vo_opengl: restore single pass optimization as separate code path
The single path optimization, rendering the video in one shader pass and
without FBO indirections, was removed soem commits ago. It didn't have a
place in this code, and caused considerable complexity and maintenance
issues.
On the other hand, it still has some worth, such as for use with
extremely crappy hardware (GLES only or OpenGL 2.1 without FBO
extension). Ideally, these use cases would be handled by a separate VO
(say, vo_gles). While cleaner, this would still cause code duplication
and other complexity.
The third option is making the single-pass optimization a completely
separate code path, with most vo_opengl features disabled. While this
does duplicate some functionality (such as "unpacking" the video data
from textures), it's also relatively unintrusive, and the high quality
code path doesn't need to take it into account at all. On another
positive node, this "dumb-mode" could be forced in other cases where
OpenGL 2.1 is not enough, and where we don't want to care about versions
this old.
2015-09-07 19:09:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
pass_convert_yuv(p);
|
|
|
|
}
|
|
|
|
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
// The main rendering function, takes care of everything up to and including
|
2015-07-17 21:21:04 +00:00
|
|
|
// upscaling. p->image is rendered.
|
2018-02-08 00:55:31 +00:00
|
|
|
// flags: bit set of RENDER_FRAME_* flags
|
|
|
|
static bool pass_render_frame(struct gl_video *p, struct mp_image *mpi,
|
|
|
|
uint64_t id, int flags)
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
{
|
2017-06-29 15:09:11 +00:00
|
|
|
// initialize the texture parameters and temporary variables
|
2015-10-23 17:52:03 +00:00
|
|
|
p->texture_w = p->image_params.w;
|
|
|
|
p->texture_h = p->image_params.h;
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
p->texture_offset = identity_trans;
|
2016-03-05 11:38:51 +00:00
|
|
|
p->components = 0;
|
2017-09-27 21:38:54 +00:00
|
|
|
p->num_saved_imgs = 0;
|
|
|
|
p->idx_hook_textures = 0;
|
2016-09-07 16:48:24 +00:00
|
|
|
p->use_linear = false;
|
2015-10-23 17:52:03 +00:00
|
|
|
|
2017-06-29 15:09:11 +00:00
|
|
|
// try uploading the frame
|
|
|
|
if (!pass_upload_image(p, mpi, id))
|
|
|
|
return false;
|
|
|
|
|
2016-04-08 20:21:31 +00:00
|
|
|
if (p->image_params.rotate % 180 == 90)
|
|
|
|
MPSWAP(int, p->texture_w, p->texture_h);
|
|
|
|
|
2015-11-19 20:22:24 +00:00
|
|
|
if (p->dumb_mode)
|
2017-06-29 15:09:11 +00:00
|
|
|
return true;
|
2016-06-05 19:55:30 +00:00
|
|
|
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
pass_read_video(p);
|
2016-04-19 18:45:40 +00:00
|
|
|
pass_opt_hook_point(p, "NATIVE", &p->texture_offset);
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
pass_convert_yuv(p);
|
2016-04-19 18:45:40 +00:00
|
|
|
pass_opt_hook_point(p, "MAINPRESUB", &p->texture_offset);
|
2015-04-10 20:19:44 +00:00
|
|
|
|
|
|
|
// For subtitles
|
|
|
|
double vpts = p->image.mpi->pts;
|
|
|
|
if (vpts == MP_NOPTS_VALUE)
|
|
|
|
vpts = p->osd_pts;
|
|
|
|
|
2018-02-08 00:55:31 +00:00
|
|
|
if (p->osd && p->opts.blend_subs == BLEND_SUBS_VIDEO &&
|
|
|
|
(flags & RENDER_FRAME_SUBS))
|
|
|
|
{
|
2015-04-11 13:53:00 +00:00
|
|
|
double scale[2];
|
2016-04-08 20:21:31 +00:00
|
|
|
get_scale_factors(p, false, scale);
|
2015-04-11 13:53:00 +00:00
|
|
|
struct mp_osd_res rect = {
|
2015-10-23 17:52:03 +00:00
|
|
|
.w = p->texture_w, .h = p->texture_h,
|
2015-04-11 13:53:00 +00:00
|
|
|
.display_par = scale[1] / scale[0], // counter compensate scaling
|
|
|
|
};
|
2017-09-20 08:45:33 +00:00
|
|
|
finish_pass_tex(p, &p->blend_subs_tex, rect.w, rect.h);
|
|
|
|
struct ra_fbo fbo = { p->blend_subs_tex };
|
|
|
|
pass_draw_osd(p, OSD_DRAW_SUB_ONLY, vpts, rect, fbo, false);
|
|
|
|
pass_read_tex(p, p->blend_subs_tex);
|
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number
of changes and improvements:
1. mpv users can now introspect the vo_opengl passes, which is something
that has been requested multiple times.
2. performance data is now measured per-pass, which helps both
development and debugging.
3. since adding more passes is cheap, we can now report information for
more passes (e.g. the blit pass, and the osd pass). Note: we also
switch to nanosecond scale, to be able to measure these passes
better.
4. `--user-shaders` authors can now describe their own passes, helping
users both identify which user shaders are active at any given time
as well as helping shader authors identify performance issues.
5. the timing data per pass is now exported as a full list of samples,
so projects like Argon-/mpv-stats can immediately read out all of the
samples and render a graph without having to manually poll this
option constantly.
Due to gl_timer's design being complicated (directly reading performance
data would block, so we delay the actual read-back until the next _start
command), it's vital not to conflate different passes that might be
doing different things from one frame to another. To accomplish this,
the actual timers are stored as part of the gl_shader_cache's sc_entry,
which makes them unique for that exact shader.
Starting and stopping the time measurement is easy to unify with the
gl_sc architecture, because the existing API already relies on a
"generate, render, reset" flow, so we can just put timer_start and
timer_stop in sc_generate and sc_reset, respectively.
The ugliest thing about this code is that due to the need to keep pass
information relatively stable in between frames, we need to distinguish
between "new" and "redrawn" frames, which bloats the code somewhat and
also feels hacky and vo_opengl-specific. (But then again, this entire
thing is vo_opengl-specific)
2017-06-29 15:00:06 +00:00
|
|
|
pass_describe(p, "blend subs video");
|
2015-04-10 20:19:44 +00:00
|
|
|
}
|
2016-04-19 18:45:40 +00:00
|
|
|
pass_opt_hook_point(p, "MAIN", &p->texture_offset);
|
2015-09-23 20:43:27 +00:00
|
|
|
|
2015-03-15 21:52:34 +00:00
|
|
|
pass_scale_main(p);
|
2015-03-23 01:42:19 +00:00
|
|
|
|
2015-03-27 12:27:40 +00:00
|
|
|
int vp_w = p->dst_rect.x1 - p->dst_rect.x0,
|
|
|
|
vp_h = p->dst_rect.y1 - p->dst_rect.y0;
|
2018-02-08 00:55:31 +00:00
|
|
|
if (p->osd && p->opts.blend_subs == BLEND_SUBS_YES &&
|
|
|
|
(flags & RENDER_FRAME_SUBS))
|
|
|
|
{
|
2015-03-23 01:42:19 +00:00
|
|
|
// Recreate the real video size from the src/dst rects
|
|
|
|
struct mp_osd_res rect = {
|
|
|
|
.w = vp_w, .h = vp_h,
|
2015-10-23 17:52:03 +00:00
|
|
|
.ml = -p->src_rect.x0, .mr = p->src_rect.x1 - p->image_params.w,
|
|
|
|
.mt = -p->src_rect.y0, .mb = p->src_rect.y1 - p->image_params.h,
|
2015-03-23 01:42:19 +00:00
|
|
|
.display_par = 1.0,
|
|
|
|
};
|
|
|
|
// Adjust margins for scale
|
|
|
|
double scale[2];
|
2016-04-08 20:21:31 +00:00
|
|
|
get_scale_factors(p, true, scale);
|
2015-03-23 01:42:19 +00:00
|
|
|
rect.ml *= scale[0]; rect.mr *= scale[0];
|
|
|
|
rect.mt *= scale[1]; rect.mb *= scale[1];
|
2015-03-29 04:34:34 +00:00
|
|
|
// We should always blend subtitles in non-linear light
|
2016-05-15 22:19:25 +00:00
|
|
|
if (p->use_linear) {
|
2016-06-29 07:16:13 +00:00
|
|
|
pass_delinearize(p->sc, p->image_params.color.gamma);
|
2016-05-15 22:19:25 +00:00
|
|
|
p->use_linear = false;
|
|
|
|
}
|
2017-09-20 08:45:33 +00:00
|
|
|
finish_pass_tex(p, &p->blend_subs_tex, p->texture_w, p->texture_h);
|
|
|
|
struct ra_fbo fbo = { p->blend_subs_tex };
|
|
|
|
pass_draw_osd(p, OSD_DRAW_SUB_ONLY, vpts, rect, fbo, false);
|
|
|
|
pass_read_tex(p, p->blend_subs_tex);
|
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number
of changes and improvements:
1. mpv users can now introspect the vo_opengl passes, which is something
that has been requested multiple times.
2. performance data is now measured per-pass, which helps both
development and debugging.
3. since adding more passes is cheap, we can now report information for
more passes (e.g. the blit pass, and the osd pass). Note: we also
switch to nanosecond scale, to be able to measure these passes
better.
4. `--user-shaders` authors can now describe their own passes, helping
users both identify which user shaders are active at any given time
as well as helping shader authors identify performance issues.
5. the timing data per pass is now exported as a full list of samples,
so projects like Argon-/mpv-stats can immediately read out all of the
samples and render a graph without having to manually poll this
option constantly.
Due to gl_timer's design being complicated (directly reading performance
data would block, so we delay the actual read-back until the next _start
command), it's vital not to conflate different passes that might be
doing different things from one frame to another. To accomplish this,
the actual timers are stored as part of the gl_shader_cache's sc_entry,
which makes them unique for that exact shader.
Starting and stopping the time measurement is easy to unify with the
gl_sc architecture, because the existing API already relies on a
"generate, render, reset" flow, so we can just put timer_start and
timer_stop in sc_generate and sc_reset, respectively.
The ugliest thing about this code is that due to the need to keep pass
information relatively stable in between frames, we need to distinguish
between "new" and "redrawn" frames, which bloats the code somewhat and
also feels hacky and vo_opengl-specific. (But then again, this entire
thing is vo_opengl-specific)
2017-06-29 15:00:06 +00:00
|
|
|
pass_describe(p, "blend subs");
|
2015-03-23 01:42:19 +00:00
|
|
|
}
|
2015-03-27 12:27:40 +00:00
|
|
|
|
2016-04-19 18:45:40 +00:00
|
|
|
pass_opt_hook_point(p, "SCALED", NULL);
|
2017-06-29 15:09:11 +00:00
|
|
|
|
|
|
|
return true;
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
}
|
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
static void pass_draw_to_screen(struct gl_video *p, struct ra_fbo fbo)
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
{
|
2015-11-19 20:22:24 +00:00
|
|
|
if (p->dumb_mode)
|
2017-08-04 13:47:50 +00:00
|
|
|
pass_render_frame_dumb(p);
|
vo_opengl: restore single pass optimization as separate code path
The single path optimization, rendering the video in one shader pass and
without FBO indirections, was removed soem commits ago. It didn't have a
place in this code, and caused considerable complexity and maintenance
issues.
On the other hand, it still has some worth, such as for use with
extremely crappy hardware (GLES only or OpenGL 2.1 without FBO
extension). Ideally, these use cases would be handled by a separate VO
(say, vo_gles). While cleaner, this would still cause code duplication
and other complexity.
The third option is making the single-pass optimization a completely
separate code path, with most vo_opengl features disabled. While this
does duplicate some functionality (such as "unpacking" the video data
from textures), it's also relatively unintrusive, and the high quality
code path doesn't need to take it into account at all. On another
positive node, this "dumb-mode" could be forced in other cases where
OpenGL 2.1 is not enough, and where we don't want to care about versions
this old.
2015-09-07 19:09:06 +00:00
|
|
|
|
2015-03-27 10:12:46 +00:00
|
|
|
// Adjust the overall gamma before drawing to screen
|
|
|
|
if (p->user_gamma != 1) {
|
|
|
|
gl_sc_uniform_f(p->sc, "user_gamma", p->user_gamma);
|
|
|
|
GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);)
|
|
|
|
GLSL(color.rgb = pow(color.rgb, vec3(user_gamma));)
|
|
|
|
}
|
2016-03-29 20:26:24 +00:00
|
|
|
|
2016-06-29 07:28:17 +00:00
|
|
|
pass_colormanage(p, p->image_params.color, false);
|
2016-03-29 20:26:24 +00:00
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
// Since finish_pass_fbo doesn't work with compute shaders, and neither
|
2017-07-17 19:39:06 +00:00
|
|
|
// does the checkerboard/dither code, we may need an indirection via
|
2017-09-20 08:45:33 +00:00
|
|
|
// p->screen_tex here.
|
2017-07-29 18:41:50 +00:00
|
|
|
if (p->pass_compute.active) {
|
2017-07-17 19:39:06 +00:00
|
|
|
int o_w = p->dst_rect.x1 - p->dst_rect.x0,
|
|
|
|
o_h = p->dst_rect.y1 - p->dst_rect.y0;
|
2017-09-20 08:45:33 +00:00
|
|
|
finish_pass_tex(p, &p->screen_tex, o_w, o_h);
|
|
|
|
struct image tmp = image_wrap(p->screen_tex, PLANE_RGB, p->components);
|
|
|
|
copy_image(p, &(int){0}, tmp);
|
2017-07-17 19:39:06 +00:00
|
|
|
}
|
|
|
|
|
2016-11-12 18:19:40 +00:00
|
|
|
if (p->has_alpha){
|
|
|
|
if (p->opts.alpha_mode == ALPHA_BLEND_TILES) {
|
|
|
|
// Draw checkerboard pattern to indicate transparency
|
|
|
|
GLSLF("// transparency checkerboard\n");
|
2017-07-17 03:23:55 +00:00
|
|
|
GLSL(bvec2 tile = lessThan(fract(gl_FragCoord.xy * 1.0/32.0), vec2(0.5));)
|
2017-09-22 19:08:08 +00:00
|
|
|
GLSL(vec3 background = vec3(tile.x == tile.y ? 0.93 : 0.87);)
|
2017-10-19 17:01:33 +00:00
|
|
|
GLSL(color.rgb += background.rgb * (1.0 - color.a);)
|
|
|
|
GLSL(color.a = 1.0;)
|
2016-11-12 18:19:40 +00:00
|
|
|
} else if (p->opts.alpha_mode == ALPHA_BLEND) {
|
|
|
|
// Blend into background color (usually black)
|
|
|
|
struct m_color c = p->opts.background;
|
|
|
|
GLSLF("vec4 background = vec4(%f, %f, %f, %f);\n",
|
|
|
|
c.r / 255.0, c.g / 255.0, c.b / 255.0, c.a / 255.0);
|
2017-10-19 17:01:33 +00:00
|
|
|
GLSL(color.rgb += background.rgb * (1.0 - color.a);)
|
|
|
|
GLSL(color.a = background.a;)
|
2016-11-12 18:19:40 +00:00
|
|
|
}
|
2016-03-29 20:26:24 +00:00
|
|
|
}
|
|
|
|
|
2016-04-19 18:45:40 +00:00
|
|
|
pass_opt_hook_point(p, "OUTPUT", NULL);
|
|
|
|
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
pass_dither(p);
|
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number
of changes and improvements:
1. mpv users can now introspect the vo_opengl passes, which is something
that has been requested multiple times.
2. performance data is now measured per-pass, which helps both
development and debugging.
3. since adding more passes is cheap, we can now report information for
more passes (e.g. the blit pass, and the osd pass). Note: we also
switch to nanosecond scale, to be able to measure these passes
better.
4. `--user-shaders` authors can now describe their own passes, helping
users both identify which user shaders are active at any given time
as well as helping shader authors identify performance issues.
5. the timing data per pass is now exported as a full list of samples,
so projects like Argon-/mpv-stats can immediately read out all of the
samples and render a graph without having to manually poll this
option constantly.
Due to gl_timer's design being complicated (directly reading performance
data would block, so we delay the actual read-back until the next _start
command), it's vital not to conflate different passes that might be
doing different things from one frame to another. To accomplish this,
the actual timers are stored as part of the gl_shader_cache's sc_entry,
which makes them unique for that exact shader.
Starting and stopping the time measurement is easy to unify with the
gl_sc architecture, because the existing API already relies on a
"generate, render, reset" flow, so we can just put timer_start and
timer_stop in sc_generate and sc_reset, respectively.
The ugliest thing about this code is that due to the need to keep pass
information relatively stable in between frames, we need to distinguish
between "new" and "redrawn" frames, which bloats the code somewhat and
also feels hacky and vo_opengl-specific. (But then again, this entire
thing is vo_opengl-specific)
2017-06-29 15:00:06 +00:00
|
|
|
pass_describe(p, "output to screen");
|
2017-08-18 00:31:58 +00:00
|
|
|
finish_pass_fbo(p, fbo, false, &p->dst_rect);
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
}
|
|
|
|
|
2018-02-08 00:55:31 +00:00
|
|
|
// flags: bit set of RENDER_FRAME_* flags
|
2017-09-20 08:45:33 +00:00
|
|
|
static bool update_surface(struct gl_video *p, struct mp_image *mpi,
|
2018-02-08 00:55:31 +00:00
|
|
|
uint64_t id, struct surface *surf, int flags)
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
{
|
|
|
|
int vp_w = p->dst_rect.x1 - p->dst_rect.x0,
|
2015-03-25 22:06:46 +00:00
|
|
|
vp_h = p->dst_rect.y1 - p->dst_rect.y0;
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
|
2017-07-24 21:22:30 +00:00
|
|
|
pass_info_reset(p, false);
|
2018-02-08 00:55:31 +00:00
|
|
|
if (!pass_render_frame(p, mpi, id, flags))
|
2017-07-24 21:22:30 +00:00
|
|
|
return false;
|
|
|
|
|
|
|
|
// Frame blending should always be done in linear light to preserve the
|
|
|
|
// overall brightness, otherwise this will result in flashing dark frames
|
|
|
|
// because mixing in compressed light artificially darkens the results
|
|
|
|
if (!p->use_linear) {
|
|
|
|
p->use_linear = true;
|
|
|
|
pass_linearize(p->sc, p->image_params.color.gamma);
|
|
|
|
}
|
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
finish_pass_tex(p, &surf->tex, vp_w, vp_h);
|
2017-07-24 21:22:30 +00:00
|
|
|
surf->id = id;
|
|
|
|
surf->pts = mpi->pts;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Draws an interpolate frame to fbo, based on the frame timing in t
|
2018-02-08 00:55:31 +00:00
|
|
|
// flags: bit set of RENDER_FRAME_* flags
|
2017-07-24 21:22:30 +00:00
|
|
|
static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t,
|
2018-02-07 19:18:36 +00:00
|
|
|
struct ra_fbo fbo, int flags)
|
2017-07-24 21:22:30 +00:00
|
|
|
{
|
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number
of changes and improvements:
1. mpv users can now introspect the vo_opengl passes, which is something
that has been requested multiple times.
2. performance data is now measured per-pass, which helps both
development and debugging.
3. since adding more passes is cheap, we can now report information for
more passes (e.g. the blit pass, and the osd pass). Note: we also
switch to nanosecond scale, to be able to measure these passes
better.
4. `--user-shaders` authors can now describe their own passes, helping
users both identify which user shaders are active at any given time
as well as helping shader authors identify performance issues.
5. the timing data per pass is now exported as a full list of samples,
so projects like Argon-/mpv-stats can immediately read out all of the
samples and render a graph without having to manually poll this
option constantly.
Due to gl_timer's design being complicated (directly reading performance
data would block, so we delay the actual read-back until the next _start
command), it's vital not to conflate different passes that might be
doing different things from one frame to another. To accomplish this,
the actual timers are stored as part of the gl_shader_cache's sc_entry,
which makes them unique for that exact shader.
Starting and stopping the time measurement is easy to unify with the
gl_sc architecture, because the existing API already relies on a
"generate, render, reset" flow, so we can just put timer_start and
timer_stop in sc_generate and sc_reset, respectively.
The ugliest thing about this code is that due to the need to keep pass
information relatively stable in between frames, we need to distinguish
between "new" and "redrawn" frames, which bloats the code somewhat and
also feels hacky and vo_opengl-specific. (But then again, this entire
thing is vo_opengl-specific)
2017-06-29 15:00:06 +00:00
|
|
|
bool is_new = false;
|
|
|
|
|
2015-06-30 23:25:30 +00:00
|
|
|
// Reset the queue completely if this is a still image, to avoid any
|
|
|
|
// interpolation artifacts from surrounding frames when unpausing or
|
|
|
|
// framestepping
|
|
|
|
if (t->still)
|
|
|
|
gl_video_reset_surfaces(p);
|
|
|
|
|
2016-06-25 18:07:38 +00:00
|
|
|
// First of all, figure out if we have a frame available at all, and draw
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
// it manually + reset the queue if not
|
2016-11-01 12:06:48 +00:00
|
|
|
if (p->surfaces[p->surface_now].id == 0) {
|
2017-09-20 08:45:33 +00:00
|
|
|
struct surface *now = &p->surfaces[p->surface_now];
|
2018-02-08 00:55:31 +00:00
|
|
|
if (!update_surface(p, t->current, t->frame_id, now, flags))
|
2016-05-19 10:18:48 +00:00
|
|
|
return;
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
p->surface_idx = p->surface_now;
|
2017-07-24 21:22:30 +00:00
|
|
|
is_new = true;
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
}
|
|
|
|
|
2015-06-26 08:59:57 +00:00
|
|
|
// Find the right frame for this instant
|
2016-11-01 12:06:48 +00:00
|
|
|
if (t->current) {
|
2017-09-20 08:45:33 +00:00
|
|
|
int next = surface_wrap(p->surface_now + 1);
|
2016-11-01 12:06:48 +00:00
|
|
|
while (p->surfaces[next].id &&
|
|
|
|
p->surfaces[next].id > p->surfaces[p->surface_now].id &&
|
|
|
|
p->surfaces[p->surface_now].id < t->frame_id)
|
2015-06-26 08:59:57 +00:00
|
|
|
{
|
|
|
|
p->surface_now = next;
|
2017-09-20 08:45:33 +00:00
|
|
|
next = surface_wrap(next + 1);
|
2015-06-26 08:59:57 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-03-13 18:30:31 +00:00
|
|
|
// Figure out the queue size. For illustration, a filter radius of 2 would
|
|
|
|
// look like this: _ A [B] C D _
|
2015-11-28 14:45:35 +00:00
|
|
|
// A is surface_bse, B is surface_now, C is surface_now+1 and D is
|
2015-03-13 18:30:31 +00:00
|
|
|
// surface_end.
|
2016-03-05 08:42:57 +00:00
|
|
|
struct scaler *tscale = &p->scaler[SCALER_TSCALE];
|
|
|
|
reinit_scaler(p, tscale, &p->opts.scaler[SCALER_TSCALE], 1, tscale_sizes);
|
2015-07-11 11:55:45 +00:00
|
|
|
bool oversample = strcmp(tscale->conf.kernel.name, "oversample") == 0;
|
2016-07-19 18:12:33 +00:00
|
|
|
bool linear = strcmp(tscale->conf.kernel.name, "linear") == 0;
|
2015-03-15 06:11:51 +00:00
|
|
|
int size;
|
2015-03-13 18:30:31 +00:00
|
|
|
|
2016-07-19 18:12:33 +00:00
|
|
|
if (oversample || linear) {
|
2015-07-11 11:55:45 +00:00
|
|
|
size = 2;
|
|
|
|
} else {
|
|
|
|
assert(tscale->kernel && !tscale->kernel->polar);
|
|
|
|
size = ceil(tscale->kernel->size);
|
|
|
|
}
|
2015-06-26 08:59:57 +00:00
|
|
|
|
|
|
|
int radius = size/2;
|
2015-03-13 18:30:31 +00:00
|
|
|
int surface_now = p->surface_now;
|
2017-09-20 08:45:33 +00:00
|
|
|
int surface_bse = surface_wrap(surface_now - (radius-1));
|
|
|
|
int surface_end = surface_wrap(surface_now + radius);
|
|
|
|
assert(surface_wrap(surface_bse + size-1) == surface_end);
|
2015-03-13 18:30:31 +00:00
|
|
|
|
2015-06-26 08:59:57 +00:00
|
|
|
// Render new frames while there's room in the queue. Note that technically,
|
|
|
|
// this should be done before the step where we find the right frame, but
|
|
|
|
// it only barely matters at the very beginning of playback, and this way
|
|
|
|
// makes the code much more linear.
|
2017-09-20 08:45:33 +00:00
|
|
|
int surface_dst = surface_wrap(p->surface_idx + 1);
|
2015-07-01 17:24:28 +00:00
|
|
|
for (int i = 0; i < t->num_frames; i++) {
|
2015-06-26 08:59:57 +00:00
|
|
|
// Avoid overwriting data we might still need
|
|
|
|
if (surface_dst == surface_bse - 1)
|
|
|
|
break;
|
|
|
|
|
2015-07-01 17:24:28 +00:00
|
|
|
struct mp_image *f = t->frames[i];
|
2016-11-01 12:06:48 +00:00
|
|
|
uint64_t f_id = t->frame_id + i;
|
|
|
|
if (!mp_image_params_equal(&f->params, &p->real_image_params))
|
2015-06-26 08:59:57 +00:00
|
|
|
continue;
|
|
|
|
|
2016-11-01 12:06:48 +00:00
|
|
|
if (f_id > p->surfaces[p->surface_idx].id) {
|
2017-09-20 08:45:33 +00:00
|
|
|
struct surface *dst = &p->surfaces[surface_dst];
|
2018-02-08 00:55:31 +00:00
|
|
|
if (!update_surface(p, f, f_id, dst, flags))
|
2016-05-19 10:18:48 +00:00
|
|
|
return;
|
2015-06-26 08:59:57 +00:00
|
|
|
p->surface_idx = surface_dst;
|
2017-09-20 08:45:33 +00:00
|
|
|
surface_dst = surface_wrap(surface_dst + 1);
|
2017-07-24 21:22:30 +00:00
|
|
|
is_new = true;
|
2015-06-26 08:59:57 +00:00
|
|
|
}
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
}
|
|
|
|
|
2015-03-13 18:30:31 +00:00
|
|
|
// Figure out whether the queue is "valid". A queue is invalid if the
|
|
|
|
// frames' PTS is not monotonically increasing. Anything else is invalid,
|
|
|
|
// so avoid blending incorrect data and just draw the latest frame as-is.
|
|
|
|
// Possible causes for failure of this condition include seeks, pausing,
|
|
|
|
// end of playback or start of playback.
|
|
|
|
bool valid = true;
|
2015-03-15 22:25:01 +00:00
|
|
|
for (int i = surface_bse, ii; valid && i != surface_end; i = ii) {
|
2017-09-20 08:45:33 +00:00
|
|
|
ii = surface_wrap(i + 1);
|
2016-11-01 12:06:48 +00:00
|
|
|
if (p->surfaces[i].id == 0 || p->surfaces[ii].id == 0) {
|
2015-03-13 18:30:31 +00:00
|
|
|
valid = false;
|
2016-11-01 12:06:48 +00:00
|
|
|
} else if (p->surfaces[ii].id < p->surfaces[i].id) {
|
2015-03-15 22:25:01 +00:00
|
|
|
valid = false;
|
|
|
|
MP_DBG(p, "interpolation queue underrun\n");
|
2015-03-13 18:30:31 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-03-25 21:40:10 +00:00
|
|
|
// Update OSD PTS to synchronize subtitles with the displayed frame
|
2015-06-26 08:59:57 +00:00
|
|
|
p->osd_pts = p->surfaces[surface_now].pts;
|
2015-03-25 21:40:10 +00:00
|
|
|
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
// Finally, draw the right mix of frames to the screen.
|
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number
of changes and improvements:
1. mpv users can now introspect the vo_opengl passes, which is something
that has been requested multiple times.
2. performance data is now measured per-pass, which helps both
development and debugging.
3. since adding more passes is cheap, we can now report information for
more passes (e.g. the blit pass, and the osd pass). Note: we also
switch to nanosecond scale, to be able to measure these passes
better.
4. `--user-shaders` authors can now describe their own passes, helping
users both identify which user shaders are active at any given time
as well as helping shader authors identify performance issues.
5. the timing data per pass is now exported as a full list of samples,
so projects like Argon-/mpv-stats can immediately read out all of the
samples and render a graph without having to manually poll this
option constantly.
Due to gl_timer's design being complicated (directly reading performance
data would block, so we delay the actual read-back until the next _start
command), it's vital not to conflate different passes that might be
doing different things from one frame to another. To accomplish this,
the actual timers are stored as part of the gl_shader_cache's sc_entry,
which makes them unique for that exact shader.
Starting and stopping the time measurement is easy to unify with the
gl_sc architecture, because the existing API already relies on a
"generate, render, reset" flow, so we can just put timer_start and
timer_stop in sc_generate and sc_reset, respectively.
The ugliest thing about this code is that due to the need to keep pass
information relatively stable in between frames, we need to distinguish
between "new" and "redrawn" frames, which bloats the code somewhat and
also feels hacky and vo_opengl-specific. (But then again, this entire
thing is vo_opengl-specific)
2017-06-29 15:00:06 +00:00
|
|
|
if (!is_new)
|
|
|
|
pass_info_reset(p, true);
|
|
|
|
pass_describe(p, "interpolation");
|
2015-06-30 23:25:30 +00:00
|
|
|
if (!valid || t->still) {
|
2015-03-13 18:30:31 +00:00
|
|
|
// surface_now is guaranteed to be valid, so we can safely use it.
|
2017-09-20 08:45:33 +00:00
|
|
|
pass_read_tex(p, p->surfaces[surface_now].tex);
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
p->is_interpolated = false;
|
|
|
|
} else {
|
2015-11-28 14:45:35 +00:00
|
|
|
double mix = t->vsync_offset / t->ideal_frame_duration;
|
2015-06-26 08:59:57 +00:00
|
|
|
// The scaler code always wants the fcoord to be between 0 and 1,
|
|
|
|
// so we try to adjust by using the previous set of N frames instead
|
|
|
|
// (which requires some extra checking to make sure it's valid)
|
|
|
|
if (mix < 0.0) {
|
2017-09-20 08:45:33 +00:00
|
|
|
int prev = surface_wrap(surface_bse - 1);
|
2016-11-01 12:06:48 +00:00
|
|
|
if (p->surfaces[prev].id != 0 &&
|
|
|
|
p->surfaces[prev].id < p->surfaces[surface_bse].id)
|
2015-06-26 08:59:57 +00:00
|
|
|
{
|
|
|
|
mix += 1.0;
|
|
|
|
surface_bse = prev;
|
|
|
|
} else {
|
|
|
|
mix = 0.0; // at least don't blow up, this should only
|
|
|
|
// ever happen at the start of playback
|
|
|
|
}
|
2015-03-15 06:11:51 +00:00
|
|
|
}
|
2015-06-26 08:59:57 +00:00
|
|
|
|
2015-07-11 11:55:45 +00:00
|
|
|
if (oversample) {
|
2016-07-19 18:12:33 +00:00
|
|
|
// Oversample uses the frame area as mix ratio, not the the vsync
|
|
|
|
// position itself
|
2015-11-28 14:45:35 +00:00
|
|
|
double vsync_dist = t->vsync_interval / t->ideal_frame_duration,
|
2015-07-11 11:55:45 +00:00
|
|
|
threshold = tscale->conf.kernel.params[0];
|
|
|
|
threshold = isnan(threshold) ? 0.0 : threshold;
|
|
|
|
mix = (1 - mix) / vsync_dist;
|
|
|
|
mix = mix <= 0 + threshold ? 0 : mix;
|
|
|
|
mix = mix >= 1 - threshold ? 1 : mix;
|
|
|
|
mix = 1 - mix;
|
2016-07-19 18:12:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Blend the frames together
|
|
|
|
if (oversample || linear) {
|
2017-09-17 08:55:43 +00:00
|
|
|
gl_sc_uniform_dynamic(p->sc);
|
2015-07-11 11:55:45 +00:00
|
|
|
gl_sc_uniform_f(p->sc, "inter_coeff", mix);
|
2016-02-23 15:18:17 +00:00
|
|
|
GLSL(color = mix(texture(texture0, texcoord0),
|
|
|
|
texture(texture1, texcoord1),
|
|
|
|
inter_coeff);)
|
2015-07-11 11:55:45 +00:00
|
|
|
} else {
|
2017-09-17 08:55:43 +00:00
|
|
|
gl_sc_uniform_dynamic(p->sc);
|
2015-07-11 11:55:45 +00:00
|
|
|
gl_sc_uniform_f(p->sc, "fcoord", mix);
|
2015-09-05 12:03:00 +00:00
|
|
|
pass_sample_separated_gen(p->sc, tscale, 0, 0);
|
2015-07-11 11:55:45 +00:00
|
|
|
}
|
2015-06-26 08:59:57 +00:00
|
|
|
|
|
|
|
// Load all the required frames
|
2015-03-13 18:30:31 +00:00
|
|
|
for (int i = 0; i < size; i++) {
|
2017-09-20 08:45:33 +00:00
|
|
|
struct image img =
|
|
|
|
image_wrap(p->surfaces[surface_wrap(surface_bse+i)].tex,
|
|
|
|
PLANE_RGB, p->components);
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
// Since the code in pass_sample_separated currently assumes
|
|
|
|
// the textures are bound in-order and starting at 0, we just
|
|
|
|
// assert to make sure this is the case (which it should always be)
|
|
|
|
int id = pass_bind(p, img);
|
|
|
|
assert(id == i);
|
2015-03-13 18:30:31 +00:00
|
|
|
}
|
2015-06-26 08:59:57 +00:00
|
|
|
|
2017-09-28 09:53:57 +00:00
|
|
|
MP_TRACE(p, "inter frame dur: %f vsync: %f, mix: %f\n",
|
|
|
|
t->ideal_frame_duration, t->vsync_interval, mix);
|
2015-03-13 18:30:31 +00:00
|
|
|
p->is_interpolated = true;
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
}
|
|
|
|
pass_draw_to_screen(p, fbo);
|
2015-07-02 11:17:20 +00:00
|
|
|
|
|
|
|
p->frames_drawn += 1;
|
2014-11-23 19:06:05 +00:00
|
|
|
}
|
|
|
|
|
2017-08-07 17:14:18 +00:00
|
|
|
void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame,
|
2018-02-08 00:55:31 +00:00
|
|
|
struct ra_fbo fbo, int flags)
|
2015-03-23 15:28:33 +00:00
|
|
|
{
|
2017-10-17 07:07:35 +00:00
|
|
|
gl_video_update_options(p);
|
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
struct mp_rect target_rc = {0, 0, fbo.tex->params.w, fbo.tex->params.h};
|
2016-05-19 10:18:48 +00:00
|
|
|
|
2017-08-04 13:47:50 +00:00
|
|
|
p->broken_frame = false;
|
2015-03-23 15:28:33 +00:00
|
|
|
|
2016-09-08 13:54:08 +00:00
|
|
|
bool has_frame = !!frame->current;
|
2015-07-01 17:24:28 +00:00
|
|
|
|
2017-08-07 17:14:18 +00:00
|
|
|
if (!has_frame || !mp_rect_equals(&p->dst_rect, &target_rc)) {
|
2017-08-11 11:02:13 +00:00
|
|
|
struct m_color c = p->clear_color;
|
2017-08-04 13:47:50 +00:00
|
|
|
float color[4] = {c.r / 255.0, c.g / 255.0, c.b / 255.0, c.a / 255.0};
|
2017-09-20 08:45:33 +00:00
|
|
|
p->ra->fns->clear(p->ra, fbo.tex, color, &target_rc);
|
2015-03-23 15:28:33 +00:00
|
|
|
}
|
|
|
|
|
vo_gpu: make it possible to load multiple hwdec interop drivers
Make the VO<->decoder interface capable of supporting multiple hwdec
APIs at once. The main gain is that this simplifies autoprobing a lot.
Before this change, it could happen that the VO loaded the "wrong" hwdec
API, and the decoder was stuck with the choice (breaking hw decoding).
With the change applied, the VO simply loads all available APIs, so
autoprobing trickery is left entirely to the decoder.
In the past, we were quite careful about not accidentally loading the
wrong interop drivers. This was in part to make sure autoprobing works,
but also because libva had this obnoxious bug of dumping garbage to
stderr when using the API. libva was fixed, so this is not a problem
anymore.
The --opengl-hwdec-interop option is changed in various ways (again...),
and renamed to --gpu-hwdec-interop. It does not have much use anymore,
other than debugging. It's notable that the order in the hwdec interop
array ra_hwdec_drivers[] still matters if multiple drivers support the
same image formats, so the option can explicitly force one, if that
should ever be necessary, or more likely, for debugging. One example are
the ra_hwdec_d3d11egl and ra_hwdec_d3d11eglrgb drivers, which both
support d3d11 input.
vo_gpu now always loads the interop lazily by default, but when it does,
it loads them all. vo_opengl_cb now always loads them when the GL
context handle is initialized. I don't expect that this causes any
problems.
It's now possible to do things like changing between vdpau and nvdec
decoding at runtime.
This is also preparation for cleaning up vd_lavc.c hwdec autoprobing.
It's another reason why hwdec_devices_request_all() does not take a
hwdec type anymore.
2017-12-01 04:05:00 +00:00
|
|
|
if (p->hwdec_overlay) {
|
2016-09-12 13:08:38 +00:00
|
|
|
if (has_frame) {
|
vo_gpu: make it possible to load multiple hwdec interop drivers
Make the VO<->decoder interface capable of supporting multiple hwdec
APIs at once. The main gain is that this simplifies autoprobing a lot.
Before this change, it could happen that the VO loaded the "wrong" hwdec
API, and the decoder was stuck with the choice (breaking hw decoding).
With the change applied, the VO simply loads all available APIs, so
autoprobing trickery is left entirely to the decoder.
In the past, we were quite careful about not accidentally loading the
wrong interop drivers. This was in part to make sure autoprobing works,
but also because libva had this obnoxious bug of dumping garbage to
stderr when using the API. libva was fixed, so this is not a problem
anymore.
The --opengl-hwdec-interop option is changed in various ways (again...),
and renamed to --gpu-hwdec-interop. It does not have much use anymore,
other than debugging. It's notable that the order in the hwdec interop
array ra_hwdec_drivers[] still matters if multiple drivers support the
same image formats, so the option can explicitly force one, if that
should ever be necessary, or more likely, for debugging. One example are
the ra_hwdec_d3d11egl and ra_hwdec_d3d11eglrgb drivers, which both
support d3d11 input.
vo_gpu now always loads the interop lazily by default, but when it does,
it loads them all. vo_opengl_cb now always loads them when the GL
context handle is initialized. I don't expect that this causes any
problems.
It's now possible to do things like changing between vdpau and nvdec
decoding at runtime.
This is also preparation for cleaning up vd_lavc.c hwdec autoprobing.
It's another reason why hwdec_devices_request_all() does not take a
hwdec type anymore.
2017-12-01 04:05:00 +00:00
|
|
|
float *color = p->hwdec_overlay->overlay_colorkey;
|
2017-09-20 08:45:33 +00:00
|
|
|
p->ra->fns->clear(p->ra, fbo.tex, color, &p->dst_rect);
|
2016-09-12 13:08:38 +00:00
|
|
|
}
|
|
|
|
|
vo_gpu: make it possible to load multiple hwdec interop drivers
Make the VO<->decoder interface capable of supporting multiple hwdec
APIs at once. The main gain is that this simplifies autoprobing a lot.
Before this change, it could happen that the VO loaded the "wrong" hwdec
API, and the decoder was stuck with the choice (breaking hw decoding).
With the change applied, the VO simply loads all available APIs, so
autoprobing trickery is left entirely to the decoder.
In the past, we were quite careful about not accidentally loading the
wrong interop drivers. This was in part to make sure autoprobing works,
but also because libva had this obnoxious bug of dumping garbage to
stderr when using the API. libva was fixed, so this is not a problem
anymore.
The --opengl-hwdec-interop option is changed in various ways (again...),
and renamed to --gpu-hwdec-interop. It does not have much use anymore,
other than debugging. It's notable that the order in the hwdec interop
array ra_hwdec_drivers[] still matters if multiple drivers support the
same image formats, so the option can explicitly force one, if that
should ever be necessary, or more likely, for debugging. One example are
the ra_hwdec_d3d11egl and ra_hwdec_d3d11eglrgb drivers, which both
support d3d11 input.
vo_gpu now always loads the interop lazily by default, but when it does,
it loads them all. vo_opengl_cb now always loads them when the GL
context handle is initialized. I don't expect that this causes any
problems.
It's now possible to do things like changing between vdpau and nvdec
decoding at runtime.
This is also preparation for cleaning up vd_lavc.c hwdec autoprobing.
It's another reason why hwdec_devices_request_all() does not take a
hwdec type anymore.
2017-12-01 04:05:00 +00:00
|
|
|
p->hwdec_overlay->driver->overlay_frame(p->hwdec_overlay, frame->current,
|
|
|
|
&p->src_rect, &p->dst_rect,
|
|
|
|
frame->frame_id != p->image.id);
|
2016-09-12 13:08:38 +00:00
|
|
|
|
|
|
|
if (frame->current)
|
|
|
|
p->osd_pts = frame->current->pts;
|
|
|
|
|
|
|
|
// Disable GL rendering
|
|
|
|
has_frame = false;
|
|
|
|
}
|
|
|
|
|
2015-07-01 17:24:28 +00:00
|
|
|
if (has_frame) {
|
2016-01-27 20:07:17 +00:00
|
|
|
bool interpolate = p->opts.interpolation && frame->display_synced &&
|
|
|
|
(p->frames_drawn || !frame->still);
|
|
|
|
if (interpolate) {
|
|
|
|
double ratio = frame->ideal_frame_duration / frame->vsync_interval;
|
|
|
|
if (fabs(ratio - 1.0) < p->opts.interpolation_threshold)
|
|
|
|
interpolate = false;
|
|
|
|
}
|
2016-01-25 20:46:01 +00:00
|
|
|
|
2016-01-27 20:07:17 +00:00
|
|
|
if (interpolate) {
|
2018-02-08 00:55:31 +00:00
|
|
|
gl_video_interpolate_frame(p, frame, fbo, flags);
|
2015-07-01 17:24:28 +00:00
|
|
|
} else {
|
2016-11-01 12:06:48 +00:00
|
|
|
bool is_new = frame->frame_id != p->image.id;
|
|
|
|
|
|
|
|
// Redrawing a frame might update subtitles.
|
2016-11-07 21:49:24 +00:00
|
|
|
if (frame->still && p->opts.blend_subs)
|
|
|
|
is_new = true;
|
2016-11-01 12:06:48 +00:00
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
if (is_new || !p->output_tex_valid) {
|
|
|
|
p->output_tex_valid = false;
|
2015-11-15 17:30:54 +00:00
|
|
|
|
2017-08-06 00:51:11 +00:00
|
|
|
pass_info_reset(p, !is_new);
|
2018-02-08 00:55:31 +00:00
|
|
|
if (!pass_render_frame(p, frame->current, frame->frame_id, flags))
|
2016-05-19 10:18:48 +00:00
|
|
|
goto done;
|
2015-09-05 10:02:02 +00:00
|
|
|
|
2016-06-28 17:41:40 +00:00
|
|
|
// For the non-interpolation case, we draw to a single "cache"
|
2017-09-20 08:45:33 +00:00
|
|
|
// texture to speed up subsequent re-draws (if any exist)
|
|
|
|
struct ra_fbo dest_fbo = fbo;
|
2015-11-15 17:30:54 +00:00
|
|
|
if (frame->num_vsyncs > 1 && frame->display_synced &&
|
2018-02-08 00:38:24 +00:00
|
|
|
!p->dumb_mode && (p->ra->caps & RA_CAP_BLIT) &&
|
|
|
|
fbo.tex->params.blit_dst)
|
2015-10-30 11:53:43 +00:00
|
|
|
{
|
2017-09-29 12:41:52 +00:00
|
|
|
// Attempt to use the same format as the destination FBO
|
|
|
|
// if possible. Some RAs use a wrapped dummy format here,
|
|
|
|
// so fall back to the fbo_format in that case.
|
|
|
|
const struct ra_format *fmt = fbo.tex->params.format;
|
|
|
|
if (fmt->dummy_format)
|
|
|
|
fmt = p->fbo_format;
|
2017-09-23 07:54:42 +00:00
|
|
|
bool r = ra_tex_resize(p->ra, p->log, &p->output_tex,
|
|
|
|
fbo.tex->params.w, fbo.tex->params.h,
|
2017-09-29 12:41:52 +00:00
|
|
|
fmt);
|
2017-09-23 07:54:42 +00:00
|
|
|
if (r) {
|
|
|
|
dest_fbo = (struct ra_fbo) { p->output_tex };
|
|
|
|
p->output_tex_valid = true;
|
|
|
|
}
|
2015-09-05 10:02:02 +00:00
|
|
|
}
|
2015-11-15 17:30:54 +00:00
|
|
|
pass_draw_to_screen(p, dest_fbo);
|
2015-09-05 10:02:02 +00:00
|
|
|
}
|
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
// "output tex valid" and "output tex needed" are equivalent
|
2018-02-08 00:38:24 +00:00
|
|
|
if (p->output_tex_valid && fbo.tex->params.blit_dst) {
|
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number
of changes and improvements:
1. mpv users can now introspect the vo_opengl passes, which is something
that has been requested multiple times.
2. performance data is now measured per-pass, which helps both
development and debugging.
3. since adding more passes is cheap, we can now report information for
more passes (e.g. the blit pass, and the osd pass). Note: we also
switch to nanosecond scale, to be able to measure these passes
better.
4. `--user-shaders` authors can now describe their own passes, helping
users both identify which user shaders are active at any given time
as well as helping shader authors identify performance issues.
5. the timing data per pass is now exported as a full list of samples,
so projects like Argon-/mpv-stats can immediately read out all of the
samples and render a graph without having to manually poll this
option constantly.
Due to gl_timer's design being complicated (directly reading performance
data would block, so we delay the actual read-back until the next _start
command), it's vital not to conflate different passes that might be
doing different things from one frame to another. To accomplish this,
the actual timers are stored as part of the gl_shader_cache's sc_entry,
which makes them unique for that exact shader.
Starting and stopping the time measurement is easy to unify with the
gl_sc architecture, because the existing API already relies on a
"generate, render, reset" flow, so we can just put timer_start and
timer_stop in sc_generate and sc_reset, respectively.
The ugliest thing about this code is that due to the need to keep pass
information relatively stable in between frames, we need to distinguish
between "new" and "redrawn" frames, which bloats the code somewhat and
also feels hacky and vo_opengl-specific. (But then again, this entire
thing is vo_opengl-specific)
2017-06-29 15:00:06 +00:00
|
|
|
pass_info_reset(p, true);
|
|
|
|
pass_describe(p, "redraw cached frame");
|
2017-08-07 14:44:15 +00:00
|
|
|
struct mp_rect src = p->dst_rect;
|
|
|
|
struct mp_rect dst = src;
|
2017-09-20 08:45:33 +00:00
|
|
|
if (fbo.flip) {
|
|
|
|
dst.y0 = fbo.tex->params.h - src.y0;
|
|
|
|
dst.y1 = fbo.tex->params.h - src.y1;
|
2015-11-15 17:30:54 +00:00
|
|
|
}
|
2017-08-05 16:20:45 +00:00
|
|
|
timer_pool_start(p->blit_timer);
|
2017-09-20 08:45:33 +00:00
|
|
|
p->ra->fns->blit(p->ra, fbo.tex, p->output_tex, &dst, &src);
|
2017-08-05 16:20:45 +00:00
|
|
|
timer_pool_stop(p->blit_timer);
|
|
|
|
pass_record(p, timer_pool_measure(p->blit_timer));
|
2015-09-05 10:02:02 +00:00
|
|
|
}
|
2015-07-01 17:24:28 +00:00
|
|
|
}
|
2015-03-23 15:28:33 +00:00
|
|
|
}
|
|
|
|
|
2016-05-19 10:18:48 +00:00
|
|
|
done:
|
|
|
|
|
2015-06-26 08:59:57 +00:00
|
|
|
debug_check_gl(p, "after video rendering");
|
|
|
|
|
2018-02-08 00:55:31 +00:00
|
|
|
if (p->osd && (flags & (RENDER_FRAME_SUBS | RENDER_FRAME_OSD))) {
|
2017-07-01 02:27:09 +00:00
|
|
|
// If we haven't actually drawn anything so far, then we technically
|
|
|
|
// need to consider this the start of a new pass. Let's call it a
|
|
|
|
// redraw just because, since it's basically a blank frame anyway
|
|
|
|
if (!has_frame)
|
|
|
|
pass_info_reset(p, true);
|
|
|
|
|
2018-02-08 00:55:31 +00:00
|
|
|
int osd_flags = p->opts.blend_subs ? OSD_DRAW_OSD_ONLY : 0;
|
|
|
|
if (!(flags & RENDER_FRAME_SUBS))
|
|
|
|
osd_flags |= OSD_DRAW_OSD_ONLY;
|
|
|
|
if (!(flags & RENDER_FRAME_OSD))
|
|
|
|
osd_flags |= OSD_DRAW_SUB_ONLY;
|
|
|
|
|
|
|
|
pass_draw_osd(p, osd_flags, p->osd_pts, p->osd_rect, fbo, true);
|
2015-03-23 01:42:19 +00:00
|
|
|
debug_check_gl(p, "after OSD rendering");
|
|
|
|
}
|
2016-05-19 10:18:48 +00:00
|
|
|
|
2018-03-02 11:15:03 +00:00
|
|
|
p->broken_frame |= gl_sc_error_state(p->sc);
|
|
|
|
if (p->broken_frame) {
|
2016-05-19 10:18:48 +00:00
|
|
|
// Make the screen solid blue to make it visually clear that an
|
|
|
|
// error has occurred
|
2017-08-04 13:47:50 +00:00
|
|
|
float color[4] = {0.0, 0.05, 0.5, 1.0};
|
2017-09-20 08:45:33 +00:00
|
|
|
p->ra->fns->clear(p->ra, fbo.tex, color, &target_rc);
|
2016-05-19 10:18:48 +00:00
|
|
|
}
|
|
|
|
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
p->frames_rendered++;
|
2017-07-03 14:59:38 +00:00
|
|
|
pass_report_performance(p);
|
2013-03-01 20:19:20 +00:00
|
|
|
}
|
|
|
|
|
2018-02-07 19:18:36 +00:00
|
|
|
void gl_video_screenshot(struct gl_video *p, struct vo_frame *frame,
|
|
|
|
struct voctrl_screenshot *args)
|
|
|
|
{
|
|
|
|
bool ok = false;
|
|
|
|
struct mp_image *res = NULL;
|
|
|
|
|
|
|
|
if (!p->ra->fns->tex_download)
|
|
|
|
return;
|
|
|
|
|
|
|
|
struct mp_rect old_src = p->src_rect;
|
|
|
|
struct mp_rect old_dst = p->dst_rect;
|
|
|
|
struct mp_osd_res old_osd = p->osd_rect;
|
|
|
|
|
|
|
|
if (!args->scaled) {
|
|
|
|
int w = p->real_image_params.w;
|
|
|
|
int h = p->real_image_params.h;
|
|
|
|
if (w < 1 || h < 1)
|
|
|
|
return;
|
|
|
|
|
2018-03-02 11:13:21 +00:00
|
|
|
if (p->image_params.rotate % 180 == 90)
|
|
|
|
MPSWAP(int, w, h);
|
|
|
|
|
2018-02-07 19:18:36 +00:00
|
|
|
struct mp_rect rc = {0, 0, w, h};
|
|
|
|
struct mp_osd_res osd = {.w = w, .h = h, .display_par = 1.0};
|
|
|
|
gl_video_resize(p, &rc, &rc, &osd);
|
|
|
|
}
|
|
|
|
|
|
|
|
gl_video_reset_surfaces(p);
|
|
|
|
|
|
|
|
struct ra_tex_params params = {
|
|
|
|
.dimensions = 2,
|
|
|
|
.downloadable = true,
|
|
|
|
.w = p->osd_rect.w,
|
|
|
|
.h = p->osd_rect.h,
|
|
|
|
.render_dst = true,
|
|
|
|
};
|
|
|
|
|
|
|
|
params.format = ra_find_unorm_format(p->ra, 1, 4);
|
|
|
|
int mpfmt = IMGFMT_RGB0;
|
|
|
|
if (args->high_bit_depth && p->ra_format.component_bits > 8) {
|
|
|
|
const struct ra_format *fmt = ra_find_unorm_format(p->ra, 2, 4);
|
|
|
|
if (fmt && fmt->renderable) {
|
|
|
|
params.format = fmt;
|
|
|
|
mpfmt = IMGFMT_RGBA64;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!params.format || !params.format->renderable)
|
|
|
|
goto done;
|
|
|
|
struct ra_tex *target = ra_tex_create(p->ra, ¶ms);
|
|
|
|
if (!target)
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
int flags = 0;
|
|
|
|
if (args->subs)
|
|
|
|
flags |= RENDER_FRAME_SUBS;
|
|
|
|
if (args->osd)
|
|
|
|
flags |= RENDER_FRAME_OSD;
|
|
|
|
gl_video_render_frame(p, frame, (struct ra_fbo){target}, flags);
|
|
|
|
|
|
|
|
res = mp_image_alloc(mpfmt, params.w, params.h);
|
|
|
|
if (!res)
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
struct ra_tex_download_params download_params = {
|
|
|
|
.tex = target,
|
|
|
|
.dst = res->planes[0],
|
|
|
|
.stride = res->stride[0],
|
|
|
|
};
|
|
|
|
if (!p->ra->fns->tex_download(p->ra, &download_params))
|
|
|
|
goto done;
|
|
|
|
|
2018-03-02 11:15:03 +00:00
|
|
|
if (p->broken_frame)
|
|
|
|
goto done;
|
|
|
|
|
2018-02-07 19:18:36 +00:00
|
|
|
ok = true;
|
|
|
|
done:
|
|
|
|
ra_tex_free(p->ra, &target);
|
|
|
|
gl_video_resize(p, &old_src, &old_dst, &old_osd);
|
|
|
|
if (!ok)
|
|
|
|
TA_FREEP(&res);
|
|
|
|
args->res = res;
|
|
|
|
}
|
|
|
|
|
2017-08-11 11:02:13 +00:00
|
|
|
// Use this color instead of the global option.
|
|
|
|
void gl_video_set_clear_color(struct gl_video *p, struct m_color c)
|
|
|
|
{
|
|
|
|
p->force_clear_color = true;
|
|
|
|
p->clear_color = c;
|
|
|
|
}
|
|
|
|
|
2017-08-15 17:12:39 +00:00
|
|
|
void gl_video_set_osd_pts(struct gl_video *p, double pts)
|
|
|
|
{
|
|
|
|
p->osd_pts = pts;
|
|
|
|
}
|
|
|
|
|
2017-08-11 11:02:13 +00:00
|
|
|
bool gl_video_check_osd_change(struct gl_video *p, struct mp_osd_res *res,
|
|
|
|
double pts)
|
|
|
|
{
|
|
|
|
return p->osd ? mpgl_osd_check_change(p->osd, res, pts) : false;
|
|
|
|
}
|
|
|
|
|
2017-08-07 17:14:18 +00:00
|
|
|
void gl_video_resize(struct gl_video *p,
|
2013-03-01 20:19:20 +00:00
|
|
|
struct mp_rect *src, struct mp_rect *dst,
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
struct mp_osd_res *osd)
|
2013-03-01 20:19:20 +00:00
|
|
|
{
|
2017-08-29 13:15:34 +00:00
|
|
|
if (mp_rect_equals(&p->src_rect, src) &&
|
|
|
|
mp_rect_equals(&p->dst_rect, dst) &&
|
|
|
|
osd_res_equals(p->osd_rect, *osd))
|
|
|
|
return;
|
|
|
|
|
2013-03-01 20:19:20 +00:00
|
|
|
p->src_rect = *src;
|
|
|
|
p->dst_rect = *dst;
|
|
|
|
p->osd_rect = *osd;
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
|
|
|
|
gl_video_reset_surfaces(p);
|
2016-03-21 21:23:41 +00:00
|
|
|
|
2016-03-23 13:49:39 +00:00
|
|
|
if (p->osd)
|
|
|
|
mpgl_osd_resize(p->osd, p->osd_rect, p->image_params.stereo_out);
|
2013-03-01 20:19:20 +00:00
|
|
|
}
|
|
|
|
|
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number
of changes and improvements:
1. mpv users can now introspect the vo_opengl passes, which is something
that has been requested multiple times.
2. performance data is now measured per-pass, which helps both
development and debugging.
3. since adding more passes is cheap, we can now report information for
more passes (e.g. the blit pass, and the osd pass). Note: we also
switch to nanosecond scale, to be able to measure these passes
better.
4. `--user-shaders` authors can now describe their own passes, helping
users both identify which user shaders are active at any given time
as well as helping shader authors identify performance issues.
5. the timing data per pass is now exported as a full list of samples,
so projects like Argon-/mpv-stats can immediately read out all of the
samples and render a graph without having to manually poll this
option constantly.
Due to gl_timer's design being complicated (directly reading performance
data would block, so we delay the actual read-back until the next _start
command), it's vital not to conflate different passes that might be
doing different things from one frame to another. To accomplish this,
the actual timers are stored as part of the gl_shader_cache's sc_entry,
which makes them unique for that exact shader.
Starting and stopping the time measurement is easy to unify with the
gl_sc architecture, because the existing API already relies on a
"generate, render, reset" flow, so we can just put timer_start and
timer_stop in sc_generate and sc_reset, respectively.
The ugliest thing about this code is that due to the need to keep pass
information relatively stable in between frames, we need to distinguish
between "new" and "redrawn" frames, which bloats the code somewhat and
also feels hacky and vo_opengl-specific. (But then again, this entire
thing is vo_opengl-specific)
2017-06-29 15:00:06 +00:00
|
|
|
static void frame_perf_data(struct pass_info pass[], struct mp_frame_perf *out)
|
2016-06-06 00:44:15 +00:00
|
|
|
{
|
2017-09-27 21:38:54 +00:00
|
|
|
for (int i = 0; i < VO_PASS_PERF_MAX; i++) {
|
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number
of changes and improvements:
1. mpv users can now introspect the vo_opengl passes, which is something
that has been requested multiple times.
2. performance data is now measured per-pass, which helps both
development and debugging.
3. since adding more passes is cheap, we can now report information for
more passes (e.g. the blit pass, and the osd pass). Note: we also
switch to nanosecond scale, to be able to measure these passes
better.
4. `--user-shaders` authors can now describe their own passes, helping
users both identify which user shaders are active at any given time
as well as helping shader authors identify performance issues.
5. the timing data per pass is now exported as a full list of samples,
so projects like Argon-/mpv-stats can immediately read out all of the
samples and render a graph without having to manually poll this
option constantly.
Due to gl_timer's design being complicated (directly reading performance
data would block, so we delay the actual read-back until the next _start
command), it's vital not to conflate different passes that might be
doing different things from one frame to another. To accomplish this,
the actual timers are stored as part of the gl_shader_cache's sc_entry,
which makes them unique for that exact shader.
Starting and stopping the time measurement is easy to unify with the
gl_sc architecture, because the existing API already relies on a
"generate, render, reset" flow, so we can just put timer_start and
timer_stop in sc_generate and sc_reset, respectively.
The ugliest thing about this code is that due to the need to keep pass
information relatively stable in between frames, we need to distinguish
between "new" and "redrawn" frames, which bloats the code somewhat and
also feels hacky and vo_opengl-specific. (But then again, this entire
thing is vo_opengl-specific)
2017-06-29 15:00:06 +00:00
|
|
|
if (!pass[i].desc.len)
|
|
|
|
break;
|
|
|
|
out->perf[out->count] = pass[i].perf;
|
|
|
|
out->desc[out->count] = pass[i].desc.start;
|
|
|
|
out->count++;
|
|
|
|
}
|
2016-06-06 00:44:15 +00:00
|
|
|
}
|
|
|
|
|
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number
of changes and improvements:
1. mpv users can now introspect the vo_opengl passes, which is something
that has been requested multiple times.
2. performance data is now measured per-pass, which helps both
development and debugging.
3. since adding more passes is cheap, we can now report information for
more passes (e.g. the blit pass, and the osd pass). Note: we also
switch to nanosecond scale, to be able to measure these passes
better.
4. `--user-shaders` authors can now describe their own passes, helping
users both identify which user shaders are active at any given time
as well as helping shader authors identify performance issues.
5. the timing data per pass is now exported as a full list of samples,
so projects like Argon-/mpv-stats can immediately read out all of the
samples and render a graph without having to manually poll this
option constantly.
Due to gl_timer's design being complicated (directly reading performance
data would block, so we delay the actual read-back until the next _start
command), it's vital not to conflate different passes that might be
doing different things from one frame to another. To accomplish this,
the actual timers are stored as part of the gl_shader_cache's sc_entry,
which makes them unique for that exact shader.
Starting and stopping the time measurement is easy to unify with the
gl_sc architecture, because the existing API already relies on a
"generate, render, reset" flow, so we can just put timer_start and
timer_stop in sc_generate and sc_reset, respectively.
The ugliest thing about this code is that due to the need to keep pass
information relatively stable in between frames, we need to distinguish
between "new" and "redrawn" frames, which bloats the code somewhat and
also feels hacky and vo_opengl-specific. (But then again, this entire
thing is vo_opengl-specific)
2017-06-29 15:00:06 +00:00
|
|
|
void gl_video_perfdata(struct gl_video *p, struct voctrl_performance_data *out)
|
2016-06-06 00:44:15 +00:00
|
|
|
{
|
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number
of changes and improvements:
1. mpv users can now introspect the vo_opengl passes, which is something
that has been requested multiple times.
2. performance data is now measured per-pass, which helps both
development and debugging.
3. since adding more passes is cheap, we can now report information for
more passes (e.g. the blit pass, and the osd pass). Note: we also
switch to nanosecond scale, to be able to measure these passes
better.
4. `--user-shaders` authors can now describe their own passes, helping
users both identify which user shaders are active at any given time
as well as helping shader authors identify performance issues.
5. the timing data per pass is now exported as a full list of samples,
so projects like Argon-/mpv-stats can immediately read out all of the
samples and render a graph without having to manually poll this
option constantly.
Due to gl_timer's design being complicated (directly reading performance
data would block, so we delay the actual read-back until the next _start
command), it's vital not to conflate different passes that might be
doing different things from one frame to another. To accomplish this,
the actual timers are stored as part of the gl_shader_cache's sc_entry,
which makes them unique for that exact shader.
Starting and stopping the time measurement is easy to unify with the
gl_sc architecture, because the existing API already relies on a
"generate, render, reset" flow, so we can just put timer_start and
timer_stop in sc_generate and sc_reset, respectively.
The ugliest thing about this code is that due to the need to keep pass
information relatively stable in between frames, we need to distinguish
between "new" and "redrawn" frames, which bloats the code somewhat and
also feels hacky and vo_opengl-specific. (But then again, this entire
thing is vo_opengl-specific)
2017-06-29 15:00:06 +00:00
|
|
|
*out = (struct voctrl_performance_data){0};
|
|
|
|
frame_perf_data(p->pass_fresh, &out->fresh);
|
|
|
|
frame_perf_data(p->pass_redraw, &out->redraw);
|
2016-06-06 00:44:15 +00:00
|
|
|
}
|
|
|
|
|
2016-06-19 17:58:40 +00:00
|
|
|
// This assumes nv12, with textures set to GL_NEAREST filtering.
|
vo_opengl: separate hwdec context and mapping, port it to use ra
This does two separate rather intrusive things:
1. Make the hwdec context (which does initialization, provides the
device to the decoder, and other basic state) and frame mapping
(getting textures from a mp_image) separate. This is more
flexible, and you could map multiple images at once. It will
help removing some hwdec special-casing from video.c.
2. Switch all hwdec API use to ra. Of course all code is still
GL specific, but in theory it would be possible to support other
backends. The most important change is that the hwdec interop
returns ra objects, instead of anything GL specific. This removes
the last dependency on GL-specific header files from video.c.
I'm mixing these separate changes because both requires essentially
rewriting all the glue code, so better do them at once. For the same
reason, this change isn't done incrementally.
hwdec_ios.m is untested, since I can't test it. Apart from superficial
mistakes, this also requires dealing with Apple's texture format
fuckups: they force you to use GL_LUMINANCE[_ALPHA] instead of GL_RED
and GL_RG. We also need to report the correct format via ra_tex to
the renderer, which is done by find_la_variant(). It's unknown whether
this works correctly.
hwdec_rpi.c as well as vo_rpi.c are still broken. (I need to pull my
RPI out of a dusty pile of devices and cables, so, later.)
2017-08-10 15:48:33 +00:00
|
|
|
static void reinterleave_vdpau(struct gl_video *p,
|
|
|
|
struct ra_tex *input[4], struct ra_tex *output[2])
|
2016-06-19 17:58:40 +00:00
|
|
|
{
|
|
|
|
for (int n = 0; n < 2; n++) {
|
2017-09-20 08:45:33 +00:00
|
|
|
struct ra_tex **tex = &p->vdpau_deinterleave_tex[n];
|
2016-06-19 17:58:40 +00:00
|
|
|
// This is an array of the 2 to-merge planes.
|
vo_opengl: separate hwdec context and mapping, port it to use ra
This does two separate rather intrusive things:
1. Make the hwdec context (which does initialization, provides the
device to the decoder, and other basic state) and frame mapping
(getting textures from a mp_image) separate. This is more
flexible, and you could map multiple images at once. It will
help removing some hwdec special-casing from video.c.
2. Switch all hwdec API use to ra. Of course all code is still
GL specific, but in theory it would be possible to support other
backends. The most important change is that the hwdec interop
returns ra objects, instead of anything GL specific. This removes
the last dependency on GL-specific header files from video.c.
I'm mixing these separate changes because both requires essentially
rewriting all the glue code, so better do them at once. For the same
reason, this change isn't done incrementally.
hwdec_ios.m is untested, since I can't test it. Apart from superficial
mistakes, this also requires dealing with Apple's texture format
fuckups: they force you to use GL_LUMINANCE[_ALPHA] instead of GL_RED
and GL_RG. We also need to report the correct format via ra_tex to
the renderer, which is done by find_la_variant(). It's unknown whether
this works correctly.
hwdec_rpi.c as well as vo_rpi.c are still broken. (I need to pull my
RPI out of a dusty pile of devices and cables, so, later.)
2017-08-10 15:48:33 +00:00
|
|
|
struct ra_tex **src = &input[n * 2];
|
|
|
|
int w = src[0]->params.w;
|
|
|
|
int h = src[0]->params.h;
|
2016-06-19 17:58:40 +00:00
|
|
|
int ids[2];
|
|
|
|
for (int t = 0; t < 2; t++) {
|
2017-09-20 08:45:33 +00:00
|
|
|
ids[t] = pass_bind(p, (struct image){
|
vo_opengl: separate hwdec context and mapping, port it to use ra
This does two separate rather intrusive things:
1. Make the hwdec context (which does initialization, provides the
device to the decoder, and other basic state) and frame mapping
(getting textures from a mp_image) separate. This is more
flexible, and you could map multiple images at once. It will
help removing some hwdec special-casing from video.c.
2. Switch all hwdec API use to ra. Of course all code is still
GL specific, but in theory it would be possible to support other
backends. The most important change is that the hwdec interop
returns ra objects, instead of anything GL specific. This removes
the last dependency on GL-specific header files from video.c.
I'm mixing these separate changes because both requires essentially
rewriting all the glue code, so better do them at once. For the same
reason, this change isn't done incrementally.
hwdec_ios.m is untested, since I can't test it. Apart from superficial
mistakes, this also requires dealing with Apple's texture format
fuckups: they force you to use GL_LUMINANCE[_ALPHA] instead of GL_RED
and GL_RG. We also need to report the correct format via ra_tex to
the renderer, which is done by find_la_variant(). It's unknown whether
this works correctly.
hwdec_rpi.c as well as vo_rpi.c are still broken. (I need to pull my
RPI out of a dusty pile of devices and cables, so, later.)
2017-08-10 15:48:33 +00:00
|
|
|
.tex = src[t],
|
2016-06-19 17:58:40 +00:00
|
|
|
.multiplier = 1.0,
|
|
|
|
.transform = identity_trans,
|
|
|
|
.w = w,
|
|
|
|
.h = h,
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
pass_describe(p, "vdpau reinterleaving");
|
2017-07-17 03:23:55 +00:00
|
|
|
GLSLF("color = fract(gl_FragCoord.y * 0.5) < 0.5\n");
|
2016-06-19 17:58:40 +00:00
|
|
|
GLSLF(" ? texture(texture%d, texcoord%d)\n", ids[0], ids[0]);
|
|
|
|
GLSLF(" : texture(texture%d, texcoord%d);", ids[1], ids[1]);
|
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
int comps = n == 0 ? 1 : 2;
|
|
|
|
const struct ra_format *fmt = ra_find_unorm_format(p->ra, 1, comps);
|
|
|
|
ra_tex_resize(p->ra, p->log, tex, w, h * 2, fmt);
|
|
|
|
struct ra_fbo fbo = { *tex };
|
2017-08-18 00:31:58 +00:00
|
|
|
finish_pass_fbo(p, fbo, true, &(struct mp_rect){0, 0, w, h * 2});
|
2016-06-19 17:58:40 +00:00
|
|
|
|
2017-09-20 08:45:33 +00:00
|
|
|
output[n] = *tex;
|
2016-06-19 17:58:40 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-05-19 10:18:48 +00:00
|
|
|
// Returns false on failure.
|
2017-06-29 15:09:11 +00:00
|
|
|
static bool pass_upload_image(struct gl_video *p, struct mp_image *mpi, uint64_t id)
|
2015-03-22 00:32:03 +00:00
|
|
|
{
|
|
|
|
struct video_image *vimg = &p->image;
|
2015-05-01 16:44:45 +00:00
|
|
|
|
2016-11-01 12:06:48 +00:00
|
|
|
if (vimg->id == id)
|
|
|
|
return true;
|
|
|
|
|
2016-05-19 10:18:48 +00:00
|
|
|
unref_current_image(p);
|
|
|
|
|
2015-07-15 10:22:49 +00:00
|
|
|
mpi = mp_image_new_ref(mpi);
|
|
|
|
if (!mpi)
|
2016-05-19 10:18:48 +00:00
|
|
|
goto error;
|
vo_opengl: refactor how hwdec interop exports textures
Rename gl_hwdec_driver.map_image to map_frame, and let it fill out a
struct gl_hwdec_frame describing the exact texture layout. This gives
more flexibility to what the hwdec interop can export. In particular, it
can export strange component orders/permutations and textures with
padded size. (The latter originating from cropped video.)
The way gl_hwdec_frame works is in the spirit of the rest of the
vo_opengl video processing code, which tends to put as much information
in immediate state (as part of the dataflow), instead of declaring it
globally. To some degree this duplicates the texplane and img_tex
structs, but until we somehow unify those, it's better to give the hwdec
state its own struct. The fact that changing the hwdec struct would
require changes and testing on at least 4 platform/GPU combinations
makes duplicating it almost a requirement to avoid pain later.
Make gl_hwdec_driver.reinit set the new image format and remove the
gl_hwdec.converted_imgfmt field.
Likewise, gl_hwdec.gl_texture_target is replaced with
gl_hwdec_plane.gl_target.
Split out a init_image_desc function from init_format. The latter is not
called in the hwdec case at all anymore. Setting up most of struct
texplane is also completely separate in the hwdec and normal cases.
video.c does not check whether the hwdec "mapped" image format is
supported. This should not really happen anyway, and if it does, the
hwdec interop backend must fail at creation time, so this is not an
issue.
2016-05-10 16:29:10 +00:00
|
|
|
|
2015-07-15 10:22:49 +00:00
|
|
|
vimg->mpi = mpi;
|
2016-11-01 12:06:48 +00:00
|
|
|
vimg->id = id;
|
2015-05-01 16:44:45 +00:00
|
|
|
p->osd_pts = mpi->pts;
|
2015-07-15 10:22:49 +00:00
|
|
|
p->frames_uploaded++;
|
2013-03-28 19:40:19 +00:00
|
|
|
|
2015-07-26 18:13:53 +00:00
|
|
|
if (p->hwdec_active) {
|
2016-06-07 09:05:57 +00:00
|
|
|
// Hardware decoding
|
vo_opengl: separate hwdec context and mapping, port it to use ra
This does two separate rather intrusive things:
1. Make the hwdec context (which does initialization, provides the
device to the decoder, and other basic state) and frame mapping
(getting textures from a mp_image) separate. This is more
flexible, and you could map multiple images at once. It will
help removing some hwdec special-casing from video.c.
2. Switch all hwdec API use to ra. Of course all code is still
GL specific, but in theory it would be possible to support other
backends. The most important change is that the hwdec interop
returns ra objects, instead of anything GL specific. This removes
the last dependency on GL-specific header files from video.c.
I'm mixing these separate changes because both requires essentially
rewriting all the glue code, so better do them at once. For the same
reason, this change isn't done incrementally.
hwdec_ios.m is untested, since I can't test it. Apart from superficial
mistakes, this also requires dealing with Apple's texture format
fuckups: they force you to use GL_LUMINANCE[_ALPHA] instead of GL_RED
and GL_RG. We also need to report the correct format via ra_tex to
the renderer, which is done by find_la_variant(). It's unknown whether
this works correctly.
hwdec_rpi.c as well as vo_rpi.c are still broken. (I need to pull my
RPI out of a dusty pile of devices and cables, so, later.)
2017-08-10 15:48:33 +00:00
|
|
|
|
|
|
|
if (!p->hwdec_mapper)
|
|
|
|
goto error;
|
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number
of changes and improvements:
1. mpv users can now introspect the vo_opengl passes, which is something
that has been requested multiple times.
2. performance data is now measured per-pass, which helps both
development and debugging.
3. since adding more passes is cheap, we can now report information for
more passes (e.g. the blit pass, and the osd pass). Note: we also
switch to nanosecond scale, to be able to measure these passes
better.
4. `--user-shaders` authors can now describe their own passes, helping
users both identify which user shaders are active at any given time
as well as helping shader authors identify performance issues.
5. the timing data per pass is now exported as a full list of samples,
so projects like Argon-/mpv-stats can immediately read out all of the
samples and render a graph without having to manually poll this
option constantly.
Due to gl_timer's design being complicated (directly reading performance
data would block, so we delay the actual read-back until the next _start
command), it's vital not to conflate different passes that might be
doing different things from one frame to another. To accomplish this,
the actual timers are stored as part of the gl_shader_cache's sc_entry,
which makes them unique for that exact shader.
Starting and stopping the time measurement is easy to unify with the
gl_sc architecture, because the existing API already relies on a
"generate, render, reset" flow, so we can just put timer_start and
timer_stop in sc_generate and sc_reset, respectively.
The ugliest thing about this code is that due to the need to keep pass
information relatively stable in between frames, we need to distinguish
between "new" and "redrawn" frames, which bloats the code somewhat and
also feels hacky and vo_opengl-specific. (But then again, this entire
thing is vo_opengl-specific)
2017-06-29 15:00:06 +00:00
|
|
|
|
|
|
|
pass_describe(p, "map frame (hwdec)");
|
2017-08-05 16:20:45 +00:00
|
|
|
timer_pool_start(p->upload_timer);
|
vo_opengl: separate hwdec context and mapping, port it to use ra
This does two separate rather intrusive things:
1. Make the hwdec context (which does initialization, provides the
device to the decoder, and other basic state) and frame mapping
(getting textures from a mp_image) separate. This is more
flexible, and you could map multiple images at once. It will
help removing some hwdec special-casing from video.c.
2. Switch all hwdec API use to ra. Of course all code is still
GL specific, but in theory it would be possible to support other
backends. The most important change is that the hwdec interop
returns ra objects, instead of anything GL specific. This removes
the last dependency on GL-specific header files from video.c.
I'm mixing these separate changes because both requires essentially
rewriting all the glue code, so better do them at once. For the same
reason, this change isn't done incrementally.
hwdec_ios.m is untested, since I can't test it. Apart from superficial
mistakes, this also requires dealing with Apple's texture format
fuckups: they force you to use GL_LUMINANCE[_ALPHA] instead of GL_RED
and GL_RG. We also need to report the correct format via ra_tex to
the renderer, which is done by find_la_variant(). It's unknown whether
this works correctly.
hwdec_rpi.c as well as vo_rpi.c are still broken. (I need to pull my
RPI out of a dusty pile of devices and cables, so, later.)
2017-08-10 15:48:33 +00:00
|
|
|
bool ok = ra_hwdec_mapper_map(p->hwdec_mapper, vimg->mpi) >= 0;
|
2017-08-05 16:20:45 +00:00
|
|
|
timer_pool_stop(p->upload_timer);
|
|
|
|
pass_record(p, timer_pool_measure(p->upload_timer));
|
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number
of changes and improvements:
1. mpv users can now introspect the vo_opengl passes, which is something
that has been requested multiple times.
2. performance data is now measured per-pass, which helps both
development and debugging.
3. since adding more passes is cheap, we can now report information for
more passes (e.g. the blit pass, and the osd pass). Note: we also
switch to nanosecond scale, to be able to measure these passes
better.
4. `--user-shaders` authors can now describe their own passes, helping
users both identify which user shaders are active at any given time
as well as helping shader authors identify performance issues.
5. the timing data per pass is now exported as a full list of samples,
so projects like Argon-/mpv-stats can immediately read out all of the
samples and render a graph without having to manually poll this
option constantly.
Due to gl_timer's design being complicated (directly reading performance
data would block, so we delay the actual read-back until the next _start
command), it's vital not to conflate different passes that might be
doing different things from one frame to another. To accomplish this,
the actual timers are stored as part of the gl_shader_cache's sc_entry,
which makes them unique for that exact shader.
Starting and stopping the time measurement is easy to unify with the
gl_sc architecture, because the existing API already relies on a
"generate, render, reset" flow, so we can just put timer_start and
timer_stop in sc_generate and sc_reset, respectively.
The ugliest thing about this code is that due to the need to keep pass
information relatively stable in between frames, we need to distinguish
between "new" and "redrawn" frames, which bloats the code somewhat and
also feels hacky and vo_opengl-specific. (But then again, this entire
thing is vo_opengl-specific)
2017-06-29 15:00:06 +00:00
|
|
|
|
vo_opengl: refactor how hwdec interop exports textures
Rename gl_hwdec_driver.map_image to map_frame, and let it fill out a
struct gl_hwdec_frame describing the exact texture layout. This gives
more flexibility to what the hwdec interop can export. In particular, it
can export strange component orders/permutations and textures with
padded size. (The latter originating from cropped video.)
The way gl_hwdec_frame works is in the spirit of the rest of the
vo_opengl video processing code, which tends to put as much information
in immediate state (as part of the dataflow), instead of declaring it
globally. To some degree this duplicates the texplane and img_tex
structs, but until we somehow unify those, it's better to give the hwdec
state its own struct. The fact that changing the hwdec struct would
require changes and testing on at least 4 platform/GPU combinations
makes duplicating it almost a requirement to avoid pain later.
Make gl_hwdec_driver.reinit set the new image format and remove the
gl_hwdec.converted_imgfmt field.
Likewise, gl_hwdec.gl_texture_target is replaced with
gl_hwdec_plane.gl_target.
Split out a init_image_desc function from init_format. The latter is not
called in the hwdec case at all anymore. Setting up most of struct
texplane is also completely separate in the hwdec and normal cases.
video.c does not check whether the hwdec "mapped" image format is
supported. This should not really happen anyway, and if it does, the
hwdec interop backend must fail at creation time, so this is not an
issue.
2016-05-10 16:29:10 +00:00
|
|
|
vimg->hwdec_mapped = true;
|
|
|
|
if (ok) {
|
|
|
|
struct mp_image layout = {0};
|
|
|
|
mp_image_set_params(&layout, &p->image_params);
|
vo_opengl: separate hwdec context and mapping, port it to use ra
This does two separate rather intrusive things:
1. Make the hwdec context (which does initialization, provides the
device to the decoder, and other basic state) and frame mapping
(getting textures from a mp_image) separate. This is more
flexible, and you could map multiple images at once. It will
help removing some hwdec special-casing from video.c.
2. Switch all hwdec API use to ra. Of course all code is still
GL specific, but in theory it would be possible to support other
backends. The most important change is that the hwdec interop
returns ra objects, instead of anything GL specific. This removes
the last dependency on GL-specific header files from video.c.
I'm mixing these separate changes because both requires essentially
rewriting all the glue code, so better do them at once. For the same
reason, this change isn't done incrementally.
hwdec_ios.m is untested, since I can't test it. Apart from superficial
mistakes, this also requires dealing with Apple's texture format
fuckups: they force you to use GL_LUMINANCE[_ALPHA] instead of GL_RED
and GL_RG. We also need to report the correct format via ra_tex to
the renderer, which is done by find_la_variant(). It's unknown whether
this works correctly.
hwdec_rpi.c as well as vo_rpi.c are still broken. (I need to pull my
RPI out of a dusty pile of devices and cables, so, later.)
2017-08-10 15:48:33 +00:00
|
|
|
struct ra_tex **tex = p->hwdec_mapper->tex;
|
|
|
|
struct ra_tex *tmp[4] = {0};
|
|
|
|
if (p->hwdec_mapper->vdpau_fields) {
|
|
|
|
reinterleave_vdpau(p, tex, tmp);
|
|
|
|
tex = tmp;
|
|
|
|
}
|
vo_opengl: refactor how hwdec interop exports textures
Rename gl_hwdec_driver.map_image to map_frame, and let it fill out a
struct gl_hwdec_frame describing the exact texture layout. This gives
more flexibility to what the hwdec interop can export. In particular, it
can export strange component orders/permutations and textures with
padded size. (The latter originating from cropped video.)
The way gl_hwdec_frame works is in the spirit of the rest of the
vo_opengl video processing code, which tends to put as much information
in immediate state (as part of the dataflow), instead of declaring it
globally. To some degree this duplicates the texplane and img_tex
structs, but until we somehow unify those, it's better to give the hwdec
state its own struct. The fact that changing the hwdec struct would
require changes and testing on at least 4 platform/GPU combinations
makes duplicating it almost a requirement to avoid pain later.
Make gl_hwdec_driver.reinit set the new image format and remove the
gl_hwdec.converted_imgfmt field.
Likewise, gl_hwdec.gl_texture_target is replaced with
gl_hwdec_plane.gl_target.
Split out a init_image_desc function from init_format. The latter is not
called in the hwdec case at all anymore. Setting up most of struct
texplane is also completely separate in the hwdec and normal cases.
video.c does not check whether the hwdec "mapped" image format is
supported. This should not really happen anyway, and if it does, the
hwdec interop backend must fail at creation time, so this is not an
issue.
2016-05-10 16:29:10 +00:00
|
|
|
for (int n = 0; n < p->plane_count; n++) {
|
|
|
|
vimg->planes[n] = (struct texplane){
|
|
|
|
.w = mp_image_plane_w(&layout, n),
|
|
|
|
.h = mp_image_plane_h(&layout, n),
|
vo_opengl: separate hwdec context and mapping, port it to use ra
This does two separate rather intrusive things:
1. Make the hwdec context (which does initialization, provides the
device to the decoder, and other basic state) and frame mapping
(getting textures from a mp_image) separate. This is more
flexible, and you could map multiple images at once. It will
help removing some hwdec special-casing from video.c.
2. Switch all hwdec API use to ra. Of course all code is still
GL specific, but in theory it would be possible to support other
backends. The most important change is that the hwdec interop
returns ra objects, instead of anything GL specific. This removes
the last dependency on GL-specific header files from video.c.
I'm mixing these separate changes because both requires essentially
rewriting all the glue code, so better do them at once. For the same
reason, this change isn't done incrementally.
hwdec_ios.m is untested, since I can't test it. Apart from superficial
mistakes, this also requires dealing with Apple's texture format
fuckups: they force you to use GL_LUMINANCE[_ALPHA] instead of GL_RED
and GL_RG. We also need to report the correct format via ra_tex to
the renderer, which is done by find_la_variant(). It's unknown whether
this works correctly.
hwdec_rpi.c as well as vo_rpi.c are still broken. (I need to pull my
RPI out of a dusty pile of devices and cables, so, later.)
2017-08-10 15:48:33 +00:00
|
|
|
.tex = tex[n],
|
vo_opengl: refactor how hwdec interop exports textures
Rename gl_hwdec_driver.map_image to map_frame, and let it fill out a
struct gl_hwdec_frame describing the exact texture layout. This gives
more flexibility to what the hwdec interop can export. In particular, it
can export strange component orders/permutations and textures with
padded size. (The latter originating from cropped video.)
The way gl_hwdec_frame works is in the spirit of the rest of the
vo_opengl video processing code, which tends to put as much information
in immediate state (as part of the dataflow), instead of declaring it
globally. To some degree this duplicates the texplane and img_tex
structs, but until we somehow unify those, it's better to give the hwdec
state its own struct. The fact that changing the hwdec struct would
require changes and testing on at least 4 platform/GPU combinations
makes duplicating it almost a requirement to avoid pain later.
Make gl_hwdec_driver.reinit set the new image format and remove the
gl_hwdec.converted_imgfmt field.
Likewise, gl_hwdec.gl_texture_target is replaced with
gl_hwdec_plane.gl_target.
Split out a init_image_desc function from init_format. The latter is not
called in the hwdec case at all anymore. Setting up most of struct
texplane is also completely separate in the hwdec and normal cases.
video.c does not check whether the hwdec "mapped" image format is
supported. This should not really happen anyway, and if it does, the
hwdec interop backend must fail at creation time, so this is not an
issue.
2016-05-10 16:29:10 +00:00
|
|
|
};
|
|
|
|
}
|
|
|
|
} else {
|
2016-04-27 11:32:20 +00:00
|
|
|
MP_FATAL(p, "Mapping hardware decoded surface failed.\n");
|
2016-05-19 10:18:48 +00:00
|
|
|
goto error;
|
vo_opengl: refactor how hwdec interop exports textures
Rename gl_hwdec_driver.map_image to map_frame, and let it fill out a
struct gl_hwdec_frame describing the exact texture layout. This gives
more flexibility to what the hwdec interop can export. In particular, it
can export strange component orders/permutations and textures with
padded size. (The latter originating from cropped video.)
The way gl_hwdec_frame works is in the spirit of the rest of the
vo_opengl video processing code, which tends to put as much information
in immediate state (as part of the dataflow), instead of declaring it
globally. To some degree this duplicates the texplane and img_tex
structs, but until we somehow unify those, it's better to give the hwdec
state its own struct. The fact that changing the hwdec struct would
require changes and testing on at least 4 platform/GPU combinations
makes duplicating it almost a requirement to avoid pain later.
Make gl_hwdec_driver.reinit set the new image format and remove the
gl_hwdec.converted_imgfmt field.
Likewise, gl_hwdec.gl_texture_target is replaced with
gl_hwdec_plane.gl_target.
Split out a init_image_desc function from init_format. The latter is not
called in the hwdec case at all anymore. Setting up most of struct
texplane is also completely separate in the hwdec and normal cases.
video.c does not check whether the hwdec "mapped" image format is
supported. This should not really happen anyway, and if it does, the
hwdec interop backend must fail at creation time, so this is not an
issue.
2016-05-10 16:29:10 +00:00
|
|
|
}
|
2016-05-19 10:18:48 +00:00
|
|
|
return true;
|
2015-07-26 18:13:53 +00:00
|
|
|
}
|
2013-11-03 23:00:18 +00:00
|
|
|
|
2016-06-07 09:05:57 +00:00
|
|
|
// Software decoding
|
2013-11-03 23:00:18 +00:00
|
|
|
assert(mpi->num_planes == p->plane_count);
|
|
|
|
|
2017-08-05 16:20:45 +00:00
|
|
|
timer_pool_start(p->upload_timer);
|
2013-11-03 23:00:18 +00:00
|
|
|
for (int n = 0; n < p->plane_count; n++) {
|
2013-03-28 19:40:19 +00:00
|
|
|
struct texplane *plane = &vimg->planes[n];
|
2016-07-03 14:00:51 +00:00
|
|
|
|
2017-08-16 20:13:51 +00:00
|
|
|
struct ra_tex_upload_params params = {
|
|
|
|
.tex = plane->tex,
|
|
|
|
.src = mpi->planes[n],
|
|
|
|
.invalidate = true,
|
|
|
|
.stride = mpi->stride[n],
|
|
|
|
};
|
vo_opengl: start work on rendering API abstraction
This starts work on moving OpenGL-specific code out of the general
renderer code, so that we can support other other GPU APIs. This is in
a very early stage and it's only a proof of concept. It's unknown
whether this will succeed or result in other backends.
For now, the GL rendering API ("ra") and its only provider (ra_gl) does
texture creation/upload/destruction only. And it's used for the main
video texture only. All other code is still hardcoded to GL.
There is some duplication with ra_format and gl_format handling. In the
end, only the ra variants will be needed (plus the gl_format table of
course). For now, this is simpler, because for some reason lots of hwdec
code still requires the GL variants, and would have to be updated to
use the ra ones.
Currently, the video.c code accesses private ra_gl fields. In the end,
it should not do that of course, and it would not include ra_gl.h.
Probably adds bugs, but you can keep them.
2017-07-26 09:19:51 +00:00
|
|
|
|
2017-11-10 09:06:33 +00:00
|
|
|
plane->flipped = params.stride < 0;
|
|
|
|
if (plane->flipped) {
|
|
|
|
int h = mp_image_plane_h(mpi, n);
|
|
|
|
params.src = (char *)params.src + (h - 1) * params.stride;
|
|
|
|
params.stride = -params.stride;
|
|
|
|
}
|
|
|
|
|
2017-08-16 20:13:51 +00:00
|
|
|
struct dr_buffer *mapped = gl_find_dr_buffer(p, mpi->planes[n]);
|
|
|
|
if (mapped) {
|
|
|
|
params.buf = mapped->buf;
|
|
|
|
params.buf_offset = (uintptr_t)params.src -
|
|
|
|
(uintptr_t)mapped->buf->data;
|
|
|
|
params.src = NULL;
|
|
|
|
}
|
vo_opengl: start work on rendering API abstraction
This starts work on moving OpenGL-specific code out of the general
renderer code, so that we can support other other GPU APIs. This is in
a very early stage and it's only a proof of concept. It's unknown
whether this will succeed or result in other backends.
For now, the GL rendering API ("ra") and its only provider (ra_gl) does
texture creation/upload/destruction only. And it's used for the main
video texture only. All other code is still hardcoded to GL.
There is some duplication with ra_format and gl_format handling. In the
end, only the ra variants will be needed (plus the gl_format table of
course). For now, this is simpler, because for some reason lots of hwdec
code still requires the GL variants, and would have to be updated to
use the ra ones.
Currently, the video.c code accesses private ra_gl fields. In the end,
it should not do that of course, and it would not include ra_gl.h.
Probably adds bugs, but you can keep them.
2017-07-26 09:19:51 +00:00
|
|
|
|
2017-07-23 07:41:51 +00:00
|
|
|
if (p->using_dr_path != !!mapped) {
|
|
|
|
p->using_dr_path = !!mapped;
|
|
|
|
MP_VERBOSE(p, "DR enabled: %s\n", p->using_dr_path ? "yes" : "no");
|
|
|
|
}
|
2017-08-16 20:13:51 +00:00
|
|
|
|
2017-08-19 02:33:40 +00:00
|
|
|
if (!p->ra->fns->tex_upload(p->ra, ¶ms)) {
|
2017-08-16 20:13:51 +00:00
|
|
|
timer_pool_stop(p->upload_timer);
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (mapped && !mapped->mpi)
|
|
|
|
mapped->mpi = mp_image_new_ref(mpi);
|
2013-03-01 20:19:20 +00:00
|
|
|
}
|
2017-08-05 16:20:45 +00:00
|
|
|
timer_pool_stop(p->upload_timer);
|
2017-08-19 02:33:40 +00:00
|
|
|
|
|
|
|
bool using_pbo = p->ra->use_pbo || !(p->ra->caps & RA_CAP_DIRECT_UPLOAD);
|
|
|
|
const char *mode = p->using_dr_path ? "DR" : using_pbo ? "PBO" : "naive";
|
2017-07-26 00:42:23 +00:00
|
|
|
pass_describe(p, "upload frame (%s)", mode);
|
2017-08-05 16:20:45 +00:00
|
|
|
pass_record(p, timer_pool_measure(p->upload_timer));
|
2016-06-05 19:55:30 +00:00
|
|
|
|
2016-05-19 10:18:48 +00:00
|
|
|
return true;
|
|
|
|
|
|
|
|
error:
|
|
|
|
unref_current_image(p);
|
|
|
|
p->broken_frame = true;
|
|
|
|
return false;
|
2013-03-01 20:19:20 +00:00
|
|
|
}
|
|
|
|
|
2017-08-04 11:48:37 +00:00
|
|
|
static bool test_fbo(struct gl_video *p, const struct ra_format *fmt)
|
2013-05-30 13:37:13 +00:00
|
|
|
{
|
2017-08-04 11:48:37 +00:00
|
|
|
MP_VERBOSE(p, "Testing FBO format %s\n", fmt->name);
|
2017-09-20 08:45:33 +00:00
|
|
|
struct ra_tex *tex = NULL;
|
|
|
|
bool success = ra_tex_resize(p->ra, p->log, &tex, 16, 16, fmt);
|
|
|
|
ra_tex_free(p->ra, &tex);
|
2015-09-05 09:39:20 +00:00
|
|
|
return success;
|
2013-05-30 13:37:13 +00:00
|
|
|
}
|
|
|
|
|
2015-11-19 20:22:24 +00:00
|
|
|
// Return whether dumb-mode can be used without disabling any features.
|
2018-01-20 15:10:42 +00:00
|
|
|
// Essentially, vo_gpu with mostly default settings will return true.
|
2015-11-19 20:22:24 +00:00
|
|
|
static bool check_dumb_mode(struct gl_video *p)
|
|
|
|
{
|
|
|
|
struct gl_video_opts *o = &p->opts;
|
2016-01-26 19:47:32 +00:00
|
|
|
if (p->use_integer_conversion)
|
|
|
|
return false;
|
2017-07-07 12:46:46 +00:00
|
|
|
if (o->dumb_mode > 0) // requested by user
|
2015-11-19 20:22:24 +00:00
|
|
|
return true;
|
2017-07-07 12:46:46 +00:00
|
|
|
if (o->dumb_mode < 0) // disabled by user
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// otherwise, use auto-detection
|
2015-11-19 20:22:24 +00:00
|
|
|
if (o->target_prim || o->target_trc || o->linear_scaling ||
|
|
|
|
o->correct_downscaling || o->sigmoid_upscaling || o->interpolation ||
|
2016-06-10 12:35:09 +00:00
|
|
|
o->blend_subs || o->deband || o->unsharp)
|
2015-11-19 20:22:24 +00:00
|
|
|
return false;
|
2016-03-05 08:42:57 +00:00
|
|
|
// check remaining scalers (tscale is already implicitly excluded above)
|
|
|
|
for (int i = 0; i < SCALER_COUNT; i++) {
|
|
|
|
if (i != SCALER_TSCALE) {
|
|
|
|
const char *name = o->scaler[i].kernel.name;
|
|
|
|
if (name && strcmp(name, "bilinear") != 0)
|
|
|
|
return false;
|
|
|
|
}
|
2015-11-19 20:22:24 +00:00
|
|
|
}
|
2016-04-20 23:33:13 +00:00
|
|
|
if (o->user_shaders && o->user_shaders[0])
|
|
|
|
return false;
|
2015-11-19 20:22:24 +00:00
|
|
|
if (p->use_lut_3d)
|
|
|
|
return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2013-03-01 20:19:20 +00:00
|
|
|
// Disable features that are not supported with the current OpenGL version.
|
vo_opengl: restore single pass optimization as separate code path
The single path optimization, rendering the video in one shader pass and
without FBO indirections, was removed soem commits ago. It didn't have a
place in this code, and caused considerable complexity and maintenance
issues.
On the other hand, it still has some worth, such as for use with
extremely crappy hardware (GLES only or OpenGL 2.1 without FBO
extension). Ideally, these use cases would be handled by a separate VO
(say, vo_gles). While cleaner, this would still cause code duplication
and other complexity.
The third option is making the single-pass optimization a completely
separate code path, with most vo_opengl features disabled. While this
does duplicate some functionality (such as "unpacking" the video data
from textures), it's also relatively unintrusive, and the high quality
code path doesn't need to take it into account at all. On another
positive node, this "dumb-mode" could be forced in other cases where
OpenGL 2.1 is not enough, and where we don't want to care about versions
this old.
2015-09-07 19:09:06 +00:00
|
|
|
static void check_gl_features(struct gl_video *p)
|
2013-03-01 20:19:20 +00:00
|
|
|
{
|
2017-08-05 12:20:14 +00:00
|
|
|
struct ra *ra = p->ra;
|
|
|
|
bool have_float_tex = !!ra_find_float16_format(ra, 1);
|
|
|
|
bool have_mglsl = ra->glsl_version >= 130; // modern GLSL
|
2017-08-07 17:14:18 +00:00
|
|
|
const struct ra_format *rg_tex = ra_find_unorm_format(p->ra, 1, 2);
|
|
|
|
bool have_texrg = rg_tex && !rg_tex->luminance_alpha;
|
2017-08-05 12:20:14 +00:00
|
|
|
bool have_compute = ra->caps & RA_CAP_COMPUTE;
|
2017-08-05 20:29:48 +00:00
|
|
|
bool have_ssbo = ra->caps & RA_CAP_BUF_RW;
|
2018-02-12 12:06:57 +00:00
|
|
|
bool have_fragcoord = ra->caps & RA_CAP_FRAGCOORD;
|
2018-02-10 21:49:19 +00:00
|
|
|
bool have_numgroups = ra->caps & RA_CAP_NUM_GROUPS;
|
2013-03-01 20:19:20 +00:00
|
|
|
|
2017-10-18 12:40:23 +00:00
|
|
|
const char *auto_fbo_fmts[] = {"rgba16", "rgba16f", "rgba16hf",
|
|
|
|
"rgb10_a2", "rgba8", 0};
|
2017-08-04 11:48:37 +00:00
|
|
|
const char *user_fbo_fmts[] = {p->opts.fbo_format, 0};
|
|
|
|
const char **fbo_fmts = user_fbo_fmts[0] && strcmp(user_fbo_fmts[0], "auto")
|
|
|
|
? user_fbo_fmts : auto_fbo_fmts;
|
2016-05-12 19:08:51 +00:00
|
|
|
bool have_fbo = false;
|
2017-08-04 11:48:37 +00:00
|
|
|
p->fbo_format = NULL;
|
2016-05-12 19:08:51 +00:00
|
|
|
for (int n = 0; fbo_fmts[n]; n++) {
|
2017-08-04 11:48:37 +00:00
|
|
|
const char *fmt = fbo_fmts[n];
|
|
|
|
const struct ra_format *f = ra_find_named_format(p->ra, fmt);
|
|
|
|
if (!f && fbo_fmts == user_fbo_fmts)
|
|
|
|
MP_WARN(p, "FBO format '%s' not found!\n", fmt);
|
|
|
|
if (f && f->renderable && f->linear_filter && test_fbo(p, f)) {
|
|
|
|
MP_VERBOSE(p, "Using FBO format %s.\n", f->name);
|
2016-05-12 19:08:51 +00:00
|
|
|
have_fbo = true;
|
2017-08-04 11:48:37 +00:00
|
|
|
p->fbo_format = f;
|
2016-05-12 19:08:51 +00:00
|
|
|
break;
|
|
|
|
}
|
2015-11-19 20:20:50 +00:00
|
|
|
}
|
|
|
|
|
2018-02-12 12:06:57 +00:00
|
|
|
if (!have_fragcoord && p->opts.dither_depth >= 0 &&
|
2018-01-29 12:08:49 +00:00
|
|
|
p->opts.dither_algo != DITHER_NONE)
|
|
|
|
{
|
|
|
|
p->opts.dither_algo = DITHER_NONE;
|
|
|
|
MP_WARN(p, "Disabling dithering (no gl_FragCoord).\n");
|
|
|
|
}
|
2018-02-12 12:06:57 +00:00
|
|
|
if (!have_fragcoord && p->opts.alpha_mode == ALPHA_BLEND_TILES) {
|
2018-01-29 12:08:49 +00:00
|
|
|
p->opts.alpha_mode = ALPHA_BLEND;
|
|
|
|
// Verbose, since this is the default setting
|
|
|
|
MP_VERBOSE(p, "Disabling alpha checkerboard (no gl_FragCoord).\n");
|
|
|
|
}
|
|
|
|
|
2018-02-12 12:01:52 +00:00
|
|
|
bool have_compute_peak = have_compute && have_ssbo && have_numgroups;
|
|
|
|
if (!have_compute_peak && p->opts.compute_hdr_peak >= 0) {
|
|
|
|
int msgl = p->opts.compute_hdr_peak == 1 ? MSGL_WARN : MSGL_V;
|
|
|
|
MP_MSG(p, msgl, "Disabling HDR peak computation (no compute shaders).\n");
|
|
|
|
p->opts.compute_hdr_peak = -1;
|
|
|
|
}
|
|
|
|
|
2017-07-07 12:46:46 +00:00
|
|
|
p->forced_dumb_mode = p->opts.dumb_mode > 0 || !have_fbo || !have_texrg;
|
2015-11-19 20:22:24 +00:00
|
|
|
bool voluntarily_dumb = check_dumb_mode(p);
|
2016-01-26 19:47:32 +00:00
|
|
|
if (p->forced_dumb_mode || voluntarily_dumb) {
|
2015-11-19 20:22:24 +00:00
|
|
|
if (voluntarily_dumb) {
|
|
|
|
MP_VERBOSE(p, "No advanced processing required. Enabling dumb mode.\n");
|
2017-07-07 12:46:46 +00:00
|
|
|
} else if (p->opts.dumb_mode <= 0) {
|
vo_opengl: restore single pass optimization as separate code path
The single path optimization, rendering the video in one shader pass and
without FBO indirections, was removed soem commits ago. It didn't have a
place in this code, and caused considerable complexity and maintenance
issues.
On the other hand, it still has some worth, such as for use with
extremely crappy hardware (GLES only or OpenGL 2.1 without FBO
extension). Ideally, these use cases would be handled by a separate VO
(say, vo_gles). While cleaner, this would still cause code duplication
and other complexity.
The third option is making the single-pass optimization a completely
separate code path, with most vo_opengl features disabled. While this
does duplicate some functionality (such as "unpacking" the video data
from textures), it's also relatively unintrusive, and the high quality
code path doesn't need to take it into account at all. On another
positive node, this "dumb-mode" could be forced in other cases where
OpenGL 2.1 is not enough, and where we don't want to care about versions
this old.
2015-09-07 19:09:06 +00:00
|
|
|
MP_WARN(p, "High bit depth FBOs unsupported. Enabling dumb mode.\n"
|
|
|
|
"Most extended features will be disabled.\n");
|
|
|
|
}
|
2015-11-19 20:22:24 +00:00
|
|
|
p->dumb_mode = true;
|
2015-09-08 20:55:01 +00:00
|
|
|
// Most things don't work, so whitelist all options that still work.
|
2016-09-06 09:11:36 +00:00
|
|
|
p->opts = (struct gl_video_opts){
|
2015-09-08 20:55:01 +00:00
|
|
|
.gamma = p->opts.gamma,
|
|
|
|
.gamma_auto = p->opts.gamma_auto,
|
|
|
|
.pbo = p->opts.pbo,
|
|
|
|
.fbo_format = p->opts.fbo_format,
|
|
|
|
.alpha_mode = p->opts.alpha_mode,
|
|
|
|
.use_rectangle = p->opts.use_rectangle,
|
|
|
|
.background = p->opts.background,
|
2018-02-12 12:01:52 +00:00
|
|
|
.compute_hdr_peak = p->opts.compute_hdr_peak,
|
2017-03-20 03:44:24 +00:00
|
|
|
.dither_algo = p->opts.dither_algo,
|
|
|
|
.dither_depth = p->opts.dither_depth,
|
|
|
|
.dither_size = p->opts.dither_size,
|
|
|
|
.temporal_dither = p->opts.temporal_dither,
|
2017-04-21 05:16:26 +00:00
|
|
|
.temporal_dither_period = p->opts.temporal_dither_period,
|
2017-03-20 03:44:24 +00:00
|
|
|
.tex_pad_x = p->opts.tex_pad_x,
|
|
|
|
.tex_pad_y = p->opts.tex_pad_y,
|
2017-08-03 10:46:57 +00:00
|
|
|
.tone_mapping = p->opts.tone_mapping,
|
2016-05-16 12:20:48 +00:00
|
|
|
.tone_mapping_param = p->opts.tone_mapping_param,
|
2017-07-06 03:43:00 +00:00
|
|
|
.tone_mapping_desat = p->opts.tone_mapping_desat,
|
2016-10-05 18:35:00 +00:00
|
|
|
.early_flush = p->opts.early_flush,
|
2017-09-30 14:25:20 +00:00
|
|
|
.icc_opts = p->opts.icc_opts,
|
2017-12-10 21:54:32 +00:00
|
|
|
.hwdec_interop = p->opts.hwdec_interop,
|
2015-09-08 20:55:01 +00:00
|
|
|
};
|
2016-03-05 08:42:57 +00:00
|
|
|
for (int n = 0; n < SCALER_COUNT; n++)
|
2016-09-06 09:11:36 +00:00
|
|
|
p->opts.scaler[n] = gl_video_opts_def.scaler[n];
|
2017-09-30 14:25:20 +00:00
|
|
|
if (!have_fbo)
|
|
|
|
p->use_lut_3d = false;
|
vo_opengl: restore single pass optimization as separate code path
The single path optimization, rendering the video in one shader pass and
without FBO indirections, was removed soem commits ago. It didn't have a
place in this code, and caused considerable complexity and maintenance
issues.
On the other hand, it still has some worth, such as for use with
extremely crappy hardware (GLES only or OpenGL 2.1 without FBO
extension). Ideally, these use cases would be handled by a separate VO
(say, vo_gles). While cleaner, this would still cause code duplication
and other complexity.
The third option is making the single-pass optimization a completely
separate code path, with most vo_opengl features disabled. While this
does duplicate some functionality (such as "unpacking" the video data
from textures), it's also relatively unintrusive, and the high quality
code path doesn't need to take it into account at all. On another
positive node, this "dumb-mode" could be forced in other cases where
OpenGL 2.1 is not enough, and where we don't want to care about versions
this old.
2015-09-07 19:09:06 +00:00
|
|
|
return;
|
2015-09-05 09:39:20 +00:00
|
|
|
}
|
2015-11-19 20:22:24 +00:00
|
|
|
p->dumb_mode = false;
|
2015-09-05 09:39:20 +00:00
|
|
|
|
2013-03-01 20:19:20 +00:00
|
|
|
// Normally, we want to disable them by default if FBOs are unavailable,
|
|
|
|
// because they will be slow (not critically slow, but still slower).
|
|
|
|
// Without FP textures, we must always disable them.
|
2014-12-16 17:55:02 +00:00
|
|
|
// I don't know if luminance alpha float textures exist, so disregard them.
|
2016-03-05 08:42:57 +00:00
|
|
|
for (int n = 0; n < SCALER_COUNT; n++) {
|
vo_opengl: refactor scaler configuration
This merges all of the scaler-related options into a single
configuration struct, and also cleans up the way they're passed through
the code. (For example, the scaler index is no longer threaded through
pass_sample, just the scaler configuration itself, and there's no longer
duplication of the params etc.)
In addition, this commit makes scale-down more principled, and turns it
into a scaler in its own right - so there's no longer an ugly separation
between scale and scale-down in the code.
Finally, the radius stuff has been made more proper - filters always
have a radius now (there's no more radius -1), and get a new .resizable
attribute instead for when it's tunable.
User-visible changes:
1. scale-down has been renamed dscale and now has its own set of config
options (dscale-param1, dscale-radius) etc., instead of reusing
scale-param1 (which was arguably a bug).
2. The default radius is no longer fixed at 3, but instead uses that
filter's preferred radius by default. (Scalers with a default radius
other than 3 include sinc, gaussian, box and triangle)
3. scale-radius etc. now goes down to 0.5, rather than 1.0. 0.5 is the
smallest radius that theoretically makes sense, and indeed it's used
by at least one filter (nearest).
Apart from that, it should just be internal changes only.
Note that this sets up for the refactor discussed in #1720, which would
be to merge scaler and window configurations (include parameters etc.)
into a single, simplified string. In the code, this would now basically
just mean getting rid of all the OPT_FLOATRANGE etc. lines related to
scalers and replacing them by a single function that parses a string and
updates the struct scaler_config as appropriate.
2015-03-26 00:55:32 +00:00
|
|
|
const struct filter_kernel *kernel =
|
|
|
|
mp_find_filter_kernel(p->opts.scaler[n].kernel.name);
|
2015-02-26 09:35:49 +00:00
|
|
|
if (kernel) {
|
|
|
|
char *reason = NULL;
|
|
|
|
if (!have_float_tex)
|
2015-07-27 21:18:19 +00:00
|
|
|
reason = "(float tex. missing)";
|
2016-05-12 18:52:26 +00:00
|
|
|
if (!have_mglsl)
|
|
|
|
reason = "(GLSL version too old)";
|
2015-02-26 09:35:49 +00:00
|
|
|
if (reason) {
|
2016-06-21 15:57:07 +00:00
|
|
|
MP_WARN(p, "Disabling scaler #%d %s %s.\n", n,
|
|
|
|
p->opts.scaler[n].kernel.name, reason);
|
2016-09-06 09:11:36 +00:00
|
|
|
// p->opts is a copy => we can just mess with it.
|
vo_opengl: refactor scaler configuration
This merges all of the scaler-related options into a single
configuration struct, and also cleans up the way they're passed through
the code. (For example, the scaler index is no longer threaded through
pass_sample, just the scaler configuration itself, and there's no longer
duplication of the params etc.)
In addition, this commit makes scale-down more principled, and turns it
into a scaler in its own right - so there's no longer an ugly separation
between scale and scale-down in the code.
Finally, the radius stuff has been made more proper - filters always
have a radius now (there's no more radius -1), and get a new .resizable
attribute instead for when it's tunable.
User-visible changes:
1. scale-down has been renamed dscale and now has its own set of config
options (dscale-param1, dscale-radius) etc., instead of reusing
scale-param1 (which was arguably a bug).
2. The default radius is no longer fixed at 3, but instead uses that
filter's preferred radius by default. (Scalers with a default radius
other than 3 include sinc, gaussian, box and triangle)
3. scale-radius etc. now goes down to 0.5, rather than 1.0. 0.5 is the
smallest radius that theoretically makes sense, and indeed it's used
by at least one filter (nearest).
Apart from that, it should just be internal changes only.
Note that this sets up for the refactor discussed in #1720, which would
be to merge scaler and window configurations (include parameters etc.)
into a single, simplified string. In the code, this would now basically
just mean getting rid of all the OPT_FLOATRANGE etc. lines related to
scalers and replacing them by a single function that parses a string and
updates the struct scaler_config as appropriate.
2015-03-26 00:55:32 +00:00
|
|
|
p->opts.scaler[n].kernel.name = "bilinear";
|
2016-05-12 17:34:02 +00:00
|
|
|
if (n == SCALER_TSCALE)
|
|
|
|
p->opts.interpolation = 0;
|
2013-03-01 20:19:20 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
int use_cms = p->opts.target_prim != MP_CSP_PRIM_AUTO ||
|
|
|
|
p->opts.target_trc != MP_CSP_TRC_AUTO || p->use_lut_3d;
|
2014-03-05 14:01:32 +00:00
|
|
|
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
// mix() is needed for some gamma functions
|
2016-05-12 18:52:26 +00:00
|
|
|
if (!have_mglsl && (p->opts.linear_scaling || p->opts.sigmoid_upscaling)) {
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
p->opts.linear_scaling = false;
|
|
|
|
p->opts.sigmoid_upscaling = false;
|
2015-04-11 17:24:54 +00:00
|
|
|
MP_WARN(p, "Disabling linear/sigmoid scaling (GLSL version too old).\n");
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
}
|
2016-05-12 18:52:26 +00:00
|
|
|
if (!have_mglsl && use_cms) {
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
p->opts.target_prim = MP_CSP_PRIM_AUTO;
|
|
|
|
p->opts.target_trc = MP_CSP_TRC_AUTO;
|
|
|
|
p->use_lut_3d = false;
|
2015-04-11 17:24:54 +00:00
|
|
|
MP_WARN(p, "Disabling color management (GLSL version too old).\n");
|
2013-03-01 20:19:20 +00:00
|
|
|
}
|
2016-05-12 18:52:26 +00:00
|
|
|
if (!have_mglsl && p->opts.deband) {
|
2015-10-01 18:44:39 +00:00
|
|
|
p->opts.deband = 0;
|
|
|
|
MP_WARN(p, "Disabling debanding (GLSL version too old).\n");
|
|
|
|
}
|
2013-03-01 20:19:20 +00:00
|
|
|
}
|
|
|
|
|
vo_opengl: restore single pass optimization as separate code path
The single path optimization, rendering the video in one shader pass and
without FBO indirections, was removed soem commits ago. It didn't have a
place in this code, and caused considerable complexity and maintenance
issues.
On the other hand, it still has some worth, such as for use with
extremely crappy hardware (GLES only or OpenGL 2.1 without FBO
extension). Ideally, these use cases would be handled by a separate VO
(say, vo_gles). While cleaner, this would still cause code duplication
and other complexity.
The third option is making the single-pass optimization a completely
separate code path, with most vo_opengl features disabled. While this
does duplicate some functionality (such as "unpacking" the video data
from textures), it's also relatively unintrusive, and the high quality
code path doesn't need to take it into account at all. On another
positive node, this "dumb-mode" could be forced in other cases where
OpenGL 2.1 is not enough, and where we don't want to care about versions
this old.
2015-09-07 19:09:06 +00:00
|
|
|
static void init_gl(struct gl_video *p)
|
2013-03-01 20:19:20 +00:00
|
|
|
{
|
|
|
|
debug_check_gl(p, "before init_gl");
|
|
|
|
|
2017-08-05 16:20:45 +00:00
|
|
|
p->upload_timer = timer_pool_create(p->ra);
|
|
|
|
p->blit_timer = timer_pool_create(p->ra);
|
2017-08-05 16:59:28 +00:00
|
|
|
p->osd_timer = timer_pool_create(p->ra);
|
2016-06-05 19:55:30 +00:00
|
|
|
|
2013-03-01 20:19:20 +00:00
|
|
|
debug_check_gl(p, "after init_gl");
|
vo_opengl: start work on rendering API abstraction
This starts work on moving OpenGL-specific code out of the general
renderer code, so that we can support other other GPU APIs. This is in
a very early stage and it's only a proof of concept. It's unknown
whether this will succeed or result in other backends.
For now, the GL rendering API ("ra") and its only provider (ra_gl) does
texture creation/upload/destruction only. And it's used for the main
video texture only. All other code is still hardcoded to GL.
There is some duplication with ra_format and gl_format handling. In the
end, only the ra variants will be needed (plus the gl_format table of
course). For now, this is simpler, because for some reason lots of hwdec
code still requires the GL variants, and would have to be updated to
use the ra ones.
Currently, the video.c code accesses private ra_gl fields. In the end,
it should not do that of course, and it would not include ra_gl.h.
Probably adds bugs, but you can keep them.
2017-07-26 09:19:51 +00:00
|
|
|
|
|
|
|
ra_dump_tex_formats(p->ra, MSGL_DEBUG);
|
|
|
|
ra_dump_img_formats(p->ra, MSGL_DEBUG);
|
2013-03-01 20:19:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void gl_video_uninit(struct gl_video *p)
|
|
|
|
{
|
2014-12-03 21:37:39 +00:00
|
|
|
if (!p)
|
|
|
|
return;
|
|
|
|
|
2013-03-01 20:19:20 +00:00
|
|
|
uninit_video(p);
|
|
|
|
|
vo_gpu: make it possible to load multiple hwdec interop drivers
Make the VO<->decoder interface capable of supporting multiple hwdec
APIs at once. The main gain is that this simplifies autoprobing a lot.
Before this change, it could happen that the VO loaded the "wrong" hwdec
API, and the decoder was stuck with the choice (breaking hw decoding).
With the change applied, the VO simply loads all available APIs, so
autoprobing trickery is left entirely to the decoder.
In the past, we were quite careful about not accidentally loading the
wrong interop drivers. This was in part to make sure autoprobing works,
but also because libva had this obnoxious bug of dumping garbage to
stderr when using the API. libva was fixed, so this is not a problem
anymore.
The --opengl-hwdec-interop option is changed in various ways (again...),
and renamed to --gpu-hwdec-interop. It does not have much use anymore,
other than debugging. It's notable that the order in the hwdec interop
array ra_hwdec_drivers[] still matters if multiple drivers support the
same image formats, so the option can explicitly force one, if that
should ever be necessary, or more likely, for debugging. One example are
the ra_hwdec_d3d11egl and ra_hwdec_d3d11eglrgb drivers, which both
support d3d11 input.
vo_gpu now always loads the interop lazily by default, but when it does,
it loads them all. vo_opengl_cb now always loads them when the GL
context handle is initialized. I don't expect that this causes any
problems.
It's now possible to do things like changing between vdpau and nvdec
decoding at runtime.
This is also preparation for cleaning up vd_lavc.c hwdec autoprobing.
It's another reason why hwdec_devices_request_all() does not take a
hwdec type anymore.
2017-12-01 04:05:00 +00:00
|
|
|
for (int n = 0; n < p->num_hwdecs; n++)
|
|
|
|
ra_hwdec_uninit(p->hwdecs[n]);
|
|
|
|
p->num_hwdecs = 0;
|
|
|
|
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
gl_sc_destroy(p->sc);
|
|
|
|
|
2017-07-29 19:22:11 +00:00
|
|
|
ra_tex_free(p->ra, &p->lut_3d_texture);
|
2017-08-05 20:29:48 +00:00
|
|
|
ra_buf_free(p->ra, &p->hdr_peak_ssbo);
|
2013-03-01 20:19:20 +00:00
|
|
|
|
2017-08-05 16:20:45 +00:00
|
|
|
timer_pool_destroy(p->upload_timer);
|
|
|
|
timer_pool_destroy(p->blit_timer);
|
2017-08-05 16:59:28 +00:00
|
|
|
timer_pool_destroy(p->osd_timer);
|
2017-08-05 16:20:45 +00:00
|
|
|
|
2017-09-27 21:38:54 +00:00
|
|
|
for (int i = 0; i < VO_PASS_PERF_MAX; i++) {
|
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number
of changes and improvements:
1. mpv users can now introspect the vo_opengl passes, which is something
that has been requested multiple times.
2. performance data is now measured per-pass, which helps both
development and debugging.
3. since adding more passes is cheap, we can now report information for
more passes (e.g. the blit pass, and the osd pass). Note: we also
switch to nanosecond scale, to be able to measure these passes
better.
4. `--user-shaders` authors can now describe their own passes, helping
users both identify which user shaders are active at any given time
as well as helping shader authors identify performance issues.
5. the timing data per pass is now exported as a full list of samples,
so projects like Argon-/mpv-stats can immediately read out all of the
samples and render a graph without having to manually poll this
option constantly.
Due to gl_timer's design being complicated (directly reading performance
data would block, so we delay the actual read-back until the next _start
command), it's vital not to conflate different passes that might be
doing different things from one frame to another. To accomplish this,
the actual timers are stored as part of the gl_shader_cache's sc_entry,
which makes them unique for that exact shader.
Starting and stopping the time measurement is easy to unify with the
gl_sc architecture, because the existing API already relies on a
"generate, render, reset" flow, so we can just put timer_start and
timer_stop in sc_generate and sc_reset, respectively.
The ugliest thing about this code is that due to the need to keep pass
information relatively stable in between frames, we need to distinguish
between "new" and "redrawn" frames, which bloats the code somewhat and
also feels hacky and vo_opengl-specific. (But then again, this entire
thing is vo_opengl-specific)
2017-06-29 15:00:06 +00:00
|
|
|
talloc_free(p->pass_fresh[i].desc.start);
|
|
|
|
talloc_free(p->pass_redraw[i].desc.start);
|
|
|
|
}
|
2016-06-05 19:55:30 +00:00
|
|
|
|
2013-03-01 20:19:20 +00:00
|
|
|
mpgl_osd_destroy(p->osd);
|
|
|
|
|
2017-07-23 07:41:51 +00:00
|
|
|
// Forcibly destroy possibly remaining image references. This should also
|
|
|
|
// cause gl_video_dr_free_buffer() to be called for the remaining buffers.
|
|
|
|
gc_pending_dr_fences(p, true);
|
|
|
|
|
|
|
|
// Should all have been unreffed already.
|
|
|
|
assert(!p->num_dr_buffers);
|
|
|
|
|
2013-03-01 20:19:20 +00:00
|
|
|
talloc_free(p);
|
|
|
|
}
|
|
|
|
|
2014-11-23 19:06:05 +00:00
|
|
|
void gl_video_reset(struct gl_video *p)
|
|
|
|
{
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
gl_video_reset_surfaces(p);
|
2014-11-23 19:06:05 +00:00
|
|
|
}
|
|
|
|
|
2015-02-04 22:37:38 +00:00
|
|
|
bool gl_video_showing_interpolated_frame(struct gl_video *p)
|
|
|
|
{
|
|
|
|
return p->is_interpolated;
|
|
|
|
}
|
|
|
|
|
2017-06-30 14:57:17 +00:00
|
|
|
static bool is_imgfmt_desc_supported(struct gl_video *p,
|
2017-07-29 18:11:51 +00:00
|
|
|
const struct ra_imgfmt_desc *desc)
|
vo_opengl: refactor how hwdec interop exports textures
Rename gl_hwdec_driver.map_image to map_frame, and let it fill out a
struct gl_hwdec_frame describing the exact texture layout. This gives
more flexibility to what the hwdec interop can export. In particular, it
can export strange component orders/permutations and textures with
padded size. (The latter originating from cropped video.)
The way gl_hwdec_frame works is in the spirit of the rest of the
vo_opengl video processing code, which tends to put as much information
in immediate state (as part of the dataflow), instead of declaring it
globally. To some degree this duplicates the texplane and img_tex
structs, but until we somehow unify those, it's better to give the hwdec
state its own struct. The fact that changing the hwdec struct would
require changes and testing on at least 4 platform/GPU combinations
makes duplicating it almost a requirement to avoid pain later.
Make gl_hwdec_driver.reinit set the new image format and remove the
gl_hwdec.converted_imgfmt field.
Likewise, gl_hwdec.gl_texture_target is replaced with
gl_hwdec_plane.gl_target.
Split out a init_image_desc function from init_format. The latter is not
called in the hwdec case at all anymore. Setting up most of struct
texplane is also completely separate in the hwdec and normal cases.
video.c does not check whether the hwdec "mapped" image format is
supported. This should not really happen anyway, and if it does, the
hwdec interop backend must fail at creation time, so this is not an
issue.
2016-05-10 16:29:10 +00:00
|
|
|
{
|
2017-06-30 14:57:17 +00:00
|
|
|
if (!desc->num_planes)
|
2013-03-01 20:19:20 +00:00
|
|
|
return false;
|
|
|
|
|
2017-07-29 18:11:51 +00:00
|
|
|
if (desc->planes[0]->ctype == RA_CTYPE_UINT && p->forced_dumb_mode)
|
2016-01-26 19:47:32 +00:00
|
|
|
return false;
|
2014-12-17 20:48:23 +00:00
|
|
|
|
2013-03-01 20:19:20 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2013-11-03 23:00:18 +00:00
|
|
|
bool gl_video_check_format(struct gl_video *p, int mp_format)
|
2013-03-01 20:19:20 +00:00
|
|
|
{
|
2017-07-29 18:11:51 +00:00
|
|
|
struct ra_imgfmt_desc desc;
|
|
|
|
if (ra_get_imgfmt_desc(p->ra, mp_format, &desc) &&
|
2017-06-30 14:57:17 +00:00
|
|
|
is_imgfmt_desc_supported(p, &desc))
|
vo_opengl: refactor how hwdec interop exports textures
Rename gl_hwdec_driver.map_image to map_frame, and let it fill out a
struct gl_hwdec_frame describing the exact texture layout. This gives
more flexibility to what the hwdec interop can export. In particular, it
can export strange component orders/permutations and textures with
padded size. (The latter originating from cropped video.)
The way gl_hwdec_frame works is in the spirit of the rest of the
vo_opengl video processing code, which tends to put as much information
in immediate state (as part of the dataflow), instead of declaring it
globally. To some degree this duplicates the texplane and img_tex
structs, but until we somehow unify those, it's better to give the hwdec
state its own struct. The fact that changing the hwdec struct would
require changes and testing on at least 4 platform/GPU combinations
makes duplicating it almost a requirement to avoid pain later.
Make gl_hwdec_driver.reinit set the new image format and remove the
gl_hwdec.converted_imgfmt field.
Likewise, gl_hwdec.gl_texture_target is replaced with
gl_hwdec_plane.gl_target.
Split out a init_image_desc function from init_format. The latter is not
called in the hwdec case at all anymore. Setting up most of struct
texplane is also completely separate in the hwdec and normal cases.
video.c does not check whether the hwdec "mapped" image format is
supported. This should not really happen anyway, and if it does, the
hwdec interop backend must fail at creation time, so this is not an
issue.
2016-05-10 16:29:10 +00:00
|
|
|
return true;
|
vo_gpu: make it possible to load multiple hwdec interop drivers
Make the VO<->decoder interface capable of supporting multiple hwdec
APIs at once. The main gain is that this simplifies autoprobing a lot.
Before this change, it could happen that the VO loaded the "wrong" hwdec
API, and the decoder was stuck with the choice (breaking hw decoding).
With the change applied, the VO simply loads all available APIs, so
autoprobing trickery is left entirely to the decoder.
In the past, we were quite careful about not accidentally loading the
wrong interop drivers. This was in part to make sure autoprobing works,
but also because libva had this obnoxious bug of dumping garbage to
stderr when using the API. libva was fixed, so this is not a problem
anymore.
The --opengl-hwdec-interop option is changed in various ways (again...),
and renamed to --gpu-hwdec-interop. It does not have much use anymore,
other than debugging. It's notable that the order in the hwdec interop
array ra_hwdec_drivers[] still matters if multiple drivers support the
same image formats, so the option can explicitly force one, if that
should ever be necessary, or more likely, for debugging. One example are
the ra_hwdec_d3d11egl and ra_hwdec_d3d11eglrgb drivers, which both
support d3d11 input.
vo_gpu now always loads the interop lazily by default, but when it does,
it loads them all. vo_opengl_cb now always loads them when the GL
context handle is initialized. I don't expect that this causes any
problems.
It's now possible to do things like changing between vdpau and nvdec
decoding at runtime.
This is also preparation for cleaning up vd_lavc.c hwdec autoprobing.
It's another reason why hwdec_devices_request_all() does not take a
hwdec type anymore.
2017-12-01 04:05:00 +00:00
|
|
|
for (int n = 0; n < p->num_hwdecs; n++) {
|
|
|
|
if (ra_hwdec_test_format(p->hwdecs[n], mp_format))
|
|
|
|
return true;
|
|
|
|
}
|
vo_opengl: refactor how hwdec interop exports textures
Rename gl_hwdec_driver.map_image to map_frame, and let it fill out a
struct gl_hwdec_frame describing the exact texture layout. This gives
more flexibility to what the hwdec interop can export. In particular, it
can export strange component orders/permutations and textures with
padded size. (The latter originating from cropped video.)
The way gl_hwdec_frame works is in the spirit of the rest of the
vo_opengl video processing code, which tends to put as much information
in immediate state (as part of the dataflow), instead of declaring it
globally. To some degree this duplicates the texplane and img_tex
structs, but until we somehow unify those, it's better to give the hwdec
state its own struct. The fact that changing the hwdec struct would
require changes and testing on at least 4 platform/GPU combinations
makes duplicating it almost a requirement to avoid pain later.
Make gl_hwdec_driver.reinit set the new image format and remove the
gl_hwdec.converted_imgfmt field.
Likewise, gl_hwdec.gl_texture_target is replaced with
gl_hwdec_plane.gl_target.
Split out a init_image_desc function from init_format. The latter is not
called in the hwdec case at all anymore. Setting up most of struct
texplane is also completely separate in the hwdec and normal cases.
video.c does not check whether the hwdec "mapped" image format is
supported. This should not really happen anyway, and if it does, the
hwdec interop backend must fail at creation time, so this is not an
issue.
2016-05-10 16:29:10 +00:00
|
|
|
return false;
|
2013-03-01 20:19:20 +00:00
|
|
|
}
|
|
|
|
|
2013-06-07 23:35:44 +00:00
|
|
|
void gl_video_config(struct gl_video *p, struct mp_image_params *params)
|
2013-03-01 20:19:20 +00:00
|
|
|
{
|
2017-05-02 15:09:16 +00:00
|
|
|
unmap_overlay(p);
|
2016-09-08 13:55:47 +00:00
|
|
|
unref_current_image(p);
|
2013-11-05 18:08:44 +00:00
|
|
|
|
2015-01-29 18:53:49 +00:00
|
|
|
if (!mp_image_params_equal(&p->real_image_params, params)) {
|
2013-11-05 18:08:44 +00:00
|
|
|
uninit_video(p);
|
2015-01-29 18:53:49 +00:00
|
|
|
p->real_image_params = *params;
|
|
|
|
p->image_params = *params;
|
2014-12-09 20:36:45 +00:00
|
|
|
if (params->imgfmt)
|
2015-01-29 18:53:49 +00:00
|
|
|
init_video(p);
|
2013-11-05 18:08:44 +00:00
|
|
|
}
|
2014-11-07 14:28:12 +00:00
|
|
|
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
gl_video_reset_surfaces(p);
|
2013-03-01 20:19:20 +00:00
|
|
|
}
|
|
|
|
|
2015-03-23 15:32:59 +00:00
|
|
|
void gl_video_set_osd_source(struct gl_video *p, struct osd_state *osd)
|
|
|
|
{
|
|
|
|
mpgl_osd_destroy(p->osd);
|
|
|
|
p->osd = NULL;
|
|
|
|
p->osd_state = osd;
|
2016-05-18 15:47:10 +00:00
|
|
|
reinit_osd(p);
|
2015-03-23 15:32:59 +00:00
|
|
|
}
|
|
|
|
|
2017-08-07 17:14:18 +00:00
|
|
|
struct gl_video *gl_video_init(struct ra *ra, struct mp_log *log,
|
|
|
|
struct mpv_global *g)
|
2013-03-01 20:19:20 +00:00
|
|
|
{
|
|
|
|
struct gl_video *p = talloc_ptrtype(NULL, p);
|
|
|
|
*p = (struct gl_video) {
|
vo_opengl: start work on rendering API abstraction
This starts work on moving OpenGL-specific code out of the general
renderer code, so that we can support other other GPU APIs. This is in
a very early stage and it's only a proof of concept. It's unknown
whether this will succeed or result in other backends.
For now, the GL rendering API ("ra") and its only provider (ra_gl) does
texture creation/upload/destruction only. And it's used for the main
video texture only. All other code is still hardcoded to GL.
There is some duplication with ra_format and gl_format handling. In the
end, only the ra variants will be needed (plus the gl_format table of
course). For now, this is simpler, because for some reason lots of hwdec
code still requires the GL variants, and would have to be updated to
use the ra ones.
Currently, the video.c code accesses private ra_gl fields. In the end,
it should not do that of course, and it would not include ra_gl.h.
Probably adds bugs, but you can keep them.
2017-07-26 09:19:51 +00:00
|
|
|
.ra = ra,
|
2015-03-27 12:27:40 +00:00
|
|
|
.global = g,
|
2013-07-31 19:44:21 +00:00
|
|
|
.log = log,
|
2017-08-05 12:20:14 +00:00
|
|
|
.sc = gl_sc_create(ra, g, log),
|
video: redo video equalizer option handling
I really wouldn't care much about this, but some parts of the core code
are under HAVE_GPL, so there's some need to get rid of it. Simply turn
the video equalizer from its current fine-grained handling with vf/vo
fallbacks into global options. This makes updating them much simpler.
This removes any possibility of applying video equalizers in filters,
which affects vf_scale, and the previously removed vf_eq. Not a big
loss, since the preferred VOs have this builtin.
Remove video equalizer handling from vo_direct3d, vo_sdl, vo_vaapi, and
vo_xv. I'm not going to waste my time on these legacy VOs.
vo.eq_opts_cache exists _only_ to send a VOCTRL_SET_EQUALIZER, which
exists _only_ to trigger a redraw. This seems silly, but for now I feel
like this is less of a pain. The rest of the equalizer using code is
self-updating.
See commit 96b906a51d5 for how some video equalizer code was GPL only.
Some command line option names and ranges can probably be traced back to
a GPL only committer, but we don't consider these copyrightable.
2017-08-22 15:01:35 +00:00
|
|
|
.video_eq = mp_csp_equalizer_create(p, g),
|
2016-09-02 13:59:40 +00:00
|
|
|
.opts_cache = m_config_cache_alloc(p, g, &gl_video_conf),
|
2013-03-01 20:19:20 +00:00
|
|
|
};
|
2017-07-03 14:59:38 +00:00
|
|
|
// make sure this variable is initialized to *something*
|
|
|
|
p->pass = p->pass_fresh;
|
2016-09-06 09:11:36 +00:00
|
|
|
struct gl_video_opts *opts = p->opts_cache->opts;
|
|
|
|
p->cms = gl_lcms_init(p, log, g, opts->icc_opts),
|
|
|
|
p->opts = *opts;
|
2016-03-05 08:42:57 +00:00
|
|
|
for (int n = 0; n < SCALER_COUNT; n++)
|
|
|
|
p->scaler[n] = (struct scaler){.index = n};
|
2017-09-27 22:07:42 +00:00
|
|
|
// our VAO always has the vec2 position as the first element
|
|
|
|
MP_TARRAY_APPEND(p, p->vao, p->vao_len, (struct ra_renderpass_input) {
|
|
|
|
.name = "position",
|
|
|
|
.type = RA_VARTYPE_FLOAT,
|
|
|
|
.dim_v = 2,
|
|
|
|
.dim_m = 1,
|
|
|
|
.offset = 0,
|
|
|
|
});
|
vo_opengl: restore single pass optimization as separate code path
The single path optimization, rendering the video in one shader pass and
without FBO indirections, was removed soem commits ago. It didn't have a
place in this code, and caused considerable complexity and maintenance
issues.
On the other hand, it still has some worth, such as for use with
extremely crappy hardware (GLES only or OpenGL 2.1 without FBO
extension). Ideally, these use cases would be handled by a separate VO
(say, vo_gles). While cleaner, this would still cause code duplication
and other complexity.
The third option is making the single-pass optimization a completely
separate code path, with most vo_opengl features disabled. While this
does duplicate some functionality (such as "unpacking" the video data
from textures), it's also relatively unintrusive, and the high quality
code path doesn't need to take it into account at all. On another
positive node, this "dumb-mode" could be forced in other cases where
OpenGL 2.1 is not enough, and where we don't want to care about versions
this old.
2015-09-07 19:09:06 +00:00
|
|
|
init_gl(p);
|
2016-09-06 09:11:23 +00:00
|
|
|
reinit_from_options(p);
|
2013-03-01 20:19:20 +00:00
|
|
|
return p;
|
|
|
|
}
|
|
|
|
|
2015-03-13 18:30:31 +00:00
|
|
|
// Get static string for scaler shader. If "tscale" is set to true, the
|
|
|
|
// scaler must be a separable convolution filter.
|
|
|
|
static const char *handle_scaler_opt(const char *name, bool tscale)
|
2013-03-01 20:19:20 +00:00
|
|
|
{
|
2015-01-26 01:03:44 +00:00
|
|
|
if (name && name[0]) {
|
2013-03-01 20:19:20 +00:00
|
|
|
const struct filter_kernel *kernel = mp_find_filter_kernel(name);
|
2015-03-13 18:30:31 +00:00
|
|
|
if (kernel && (!tscale || !kernel->polar))
|
vo_opengl: separate kernel and window
This makes the core much more elegant, reusable, reconfigurable and also
allows us to more easily add aliases for specific configurations.
Furthermore, this lets us apply a generic blur factor / window function
to arbitrary filters, so we can finally "mix and match" in order to
fine-tune windowing functions.
A few notes are in order:
1. The current system for configuring scalers is ugly and rapidly
getting unwieldy. I modified the man page to make it a bit more
bearable, but long-term we have to do something about it; especially
since..
2. There's currently no way to affect the blur factor or parameters of
the window functions themselves. For example, I can't actually
fine-tune the kaiser window's param1, since there's simply no way to
do so in the current API - even though filter_kernels.c supports it
just fine!
3. This removes some lesser used filters (especially those which are
purely window functions to begin with). If anybody asks, you can get
eg. the old behavior of scale=hanning by using
scale=box:scale-window=hanning:scale-radius=1 (and yes, the result is
just as terrible as that sounds - which is why nobody should have
been using them in the first place).
4. This changes the semantics of the "triangle" scaler slightly - it now
has an arbitrary radius. This can possibly produce weird results for
people who were previously using scale-down=triangle, especially if
in combination with scale-radius (for the usual upscaling). The
correct fix for this is to use scale-down=bilinear_slow instead,
which is an alias for triangle at radius 1.
In regards to the last point, in future I want to make it so that
filters have a filter-specific "preferred radius" (for the ones that
are arbitrarily tunable), once the configuration system for filters has
been redesigned (in particular in a way that will let us separate scale
and scale-down cleanly). That way, "triangle" can simply have the
preferred radius of 1 by default, while still being tunable. (Rather
than the default radius being hard-coded to 3 always)
2015-03-25 03:40:28 +00:00
|
|
|
return kernel->f.name;
|
2013-03-01 20:19:20 +00:00
|
|
|
|
2015-03-15 06:11:51 +00:00
|
|
|
for (const char *const *filter = tscale ? fixed_tscale_filters
|
|
|
|
: fixed_scale_filters;
|
|
|
|
*filter; filter++) {
|
|
|
|
if (strcmp(*filter, name) == 0)
|
2013-03-01 20:19:20 +00:00
|
|
|
return *filter;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2017-10-17 07:07:35 +00:00
|
|
|
static void gl_video_update_options(struct gl_video *p)
|
2015-09-08 20:46:36 +00:00
|
|
|
{
|
2016-09-02 13:59:40 +00:00
|
|
|
if (m_config_cache_update(p->opts_cache)) {
|
2016-09-06 09:11:36 +00:00
|
|
|
gl_lcms_update_options(p->cms);
|
2016-09-02 13:59:40 +00:00
|
|
|
reinit_from_options(p);
|
|
|
|
}
|
2017-10-17 07:07:35 +00:00
|
|
|
|
|
|
|
if (mp_csp_equalizer_state_changed(p->video_eq))
|
|
|
|
p->output_tex_valid = false;
|
2016-06-04 15:52:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void reinit_from_options(struct gl_video *p)
|
|
|
|
{
|
2016-06-04 18:48:56 +00:00
|
|
|
p->use_lut_3d = gl_lcms_has_profile(p->cms);
|
2016-06-03 18:35:17 +00:00
|
|
|
|
2016-09-06 09:11:36 +00:00
|
|
|
// Copy the option fields, so that check_gl_features() can mutate them.
|
|
|
|
// This works only for the fields themselves of course, not for any memory
|
|
|
|
// referenced by them.
|
|
|
|
p->opts = *(struct gl_video_opts *)p->opts_cache->opts;
|
|
|
|
|
2017-08-11 11:02:13 +00:00
|
|
|
if (!p->force_clear_color)
|
|
|
|
p->clear_color = p->opts.background;
|
|
|
|
|
2015-07-16 20:43:40 +00:00
|
|
|
check_gl_features(p);
|
|
|
|
uninit_rendering(p);
|
2017-08-05 12:20:14 +00:00
|
|
|
gl_sc_set_cache_dir(p->sc, p->opts.shader_cache_dir);
|
2017-08-19 02:33:40 +00:00
|
|
|
p->ra->use_pbo = p->opts.pbo;
|
2016-05-18 15:47:10 +00:00
|
|
|
gl_video_setup_hooks(p);
|
|
|
|
reinit_osd(p);
|
2015-11-28 18:59:11 +00:00
|
|
|
|
|
|
|
if (p->opts.interpolation && !p->global->opts->video_sync && !p->dsi_warned) {
|
|
|
|
MP_WARN(p, "Interpolation now requires enabling display-sync mode.\n"
|
|
|
|
"E.g.: --video-sync=display-resample\n");
|
|
|
|
p->dsi_warned = true;
|
|
|
|
}
|
2015-07-16 20:43:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void gl_video_configure_queue(struct gl_video *p, struct vo *vo)
|
|
|
|
{
|
2017-10-17 07:07:35 +00:00
|
|
|
gl_video_update_options(p);
|
|
|
|
|
2015-07-20 19:12:46 +00:00
|
|
|
int queue_size = 1;
|
2015-07-16 20:43:40 +00:00
|
|
|
|
2015-03-13 18:30:31 +00:00
|
|
|
// Figure out an adequate size for the interpolation queue. The larger
|
2015-06-26 08:59:57 +00:00
|
|
|
// the radius, the earlier we need to queue frames.
|
2015-07-16 20:43:40 +00:00
|
|
|
if (p->opts.interpolation) {
|
vo_opengl: refactor scaler configuration
This merges all of the scaler-related options into a single
configuration struct, and also cleans up the way they're passed through
the code. (For example, the scaler index is no longer threaded through
pass_sample, just the scaler configuration itself, and there's no longer
duplication of the params etc.)
In addition, this commit makes scale-down more principled, and turns it
into a scaler in its own right - so there's no longer an ugly separation
between scale and scale-down in the code.
Finally, the radius stuff has been made more proper - filters always
have a radius now (there's no more radius -1), and get a new .resizable
attribute instead for when it's tunable.
User-visible changes:
1. scale-down has been renamed dscale and now has its own set of config
options (dscale-param1, dscale-radius) etc., instead of reusing
scale-param1 (which was arguably a bug).
2. The default radius is no longer fixed at 3, but instead uses that
filter's preferred radius by default. (Scalers with a default radius
other than 3 include sinc, gaussian, box and triangle)
3. scale-radius etc. now goes down to 0.5, rather than 1.0. 0.5 is the
smallest radius that theoretically makes sense, and indeed it's used
by at least one filter (nearest).
Apart from that, it should just be internal changes only.
Note that this sets up for the refactor discussed in #1720, which would
be to merge scaler and window configurations (include parameters etc.)
into a single, simplified string. In the code, this would now basically
just mean getting rid of all the OPT_FLOATRANGE etc. lines related to
scalers and replacing them by a single function that parses a string and
updates the struct scaler_config as appropriate.
2015-03-26 00:55:32 +00:00
|
|
|
const struct filter_kernel *kernel =
|
2016-03-05 08:42:57 +00:00
|
|
|
mp_find_filter_kernel(p->opts.scaler[SCALER_TSCALE].kernel.name);
|
2015-03-13 18:30:31 +00:00
|
|
|
if (kernel) {
|
2017-03-05 01:13:18 +00:00
|
|
|
// filter_scale wouldn't be correctly initialized were we to use it here.
|
|
|
|
// This is fine since we're always upsampling, but beware if downsampling
|
|
|
|
// is added!
|
vo_opengl: separate kernel and window
This makes the core much more elegant, reusable, reconfigurable and also
allows us to more easily add aliases for specific configurations.
Furthermore, this lets us apply a generic blur factor / window function
to arbitrary filters, so we can finally "mix and match" in order to
fine-tune windowing functions.
A few notes are in order:
1. The current system for configuring scalers is ugly and rapidly
getting unwieldy. I modified the man page to make it a bit more
bearable, but long-term we have to do something about it; especially
since..
2. There's currently no way to affect the blur factor or parameters of
the window functions themselves. For example, I can't actually
fine-tune the kaiser window's param1, since there's simply no way to
do so in the current API - even though filter_kernels.c supports it
just fine!
3. This removes some lesser used filters (especially those which are
purely window functions to begin with). If anybody asks, you can get
eg. the old behavior of scale=hanning by using
scale=box:scale-window=hanning:scale-radius=1 (and yes, the result is
just as terrible as that sounds - which is why nobody should have
been using them in the first place).
4. This changes the semantics of the "triangle" scaler slightly - it now
has an arbitrary radius. This can possibly produce weird results for
people who were previously using scale-down=triangle, especially if
in combination with scale-radius (for the usual upscaling). The
correct fix for this is to use scale-down=bilinear_slow instead,
which is an alias for triangle at radius 1.
In regards to the last point, in future I want to make it so that
filters have a filter-specific "preferred radius" (for the ones that
are arbitrarily tunable), once the configuration system for filters has
been redesigned (in particular in a way that will let us separate scale
and scale-down cleanly). That way, "triangle" can simply have the
preferred radius of 1 by default, while still being tunable. (Rather
than the default radius being hard-coded to 3 always)
2015-03-25 03:40:28 +00:00
|
|
|
double radius = kernel->f.radius;
|
2016-03-05 08:42:57 +00:00
|
|
|
radius = radius > 0 ? radius : p->opts.scaler[SCALER_TSCALE].radius;
|
2015-07-20 19:12:46 +00:00
|
|
|
queue_size += 1 + ceil(radius);
|
2015-07-11 11:55:45 +00:00
|
|
|
} else {
|
2016-07-19 18:12:33 +00:00
|
|
|
// Oversample/linear case
|
2015-07-20 19:12:46 +00:00
|
|
|
queue_size += 2;
|
2015-03-13 18:30:31 +00:00
|
|
|
}
|
|
|
|
}
|
2013-03-01 20:19:20 +00:00
|
|
|
|
2015-11-25 21:10:55 +00:00
|
|
|
vo_set_queue_params(vo, 0, queue_size);
|
2013-03-01 20:19:20 +00:00
|
|
|
}
|
|
|
|
|
2013-12-21 19:03:36 +00:00
|
|
|
static int validate_scaler_opt(struct mp_log *log, const m_option_t *opt,
|
|
|
|
struct bstr name, struct bstr param)
|
2013-03-01 20:19:20 +00:00
|
|
|
{
|
2015-01-22 18:58:22 +00:00
|
|
|
char s[20] = {0};
|
|
|
|
int r = 1;
|
2015-03-13 18:30:31 +00:00
|
|
|
bool tscale = bstr_equals0(name, "tscale");
|
2013-07-22 00:14:15 +00:00
|
|
|
if (bstr_equals0(param, "help")) {
|
2016-09-17 16:07:40 +00:00
|
|
|
r = M_OPT_EXIT;
|
2015-01-22 18:58:22 +00:00
|
|
|
} else {
|
|
|
|
snprintf(s, sizeof(s), "%.*s", BSTR_P(param));
|
2015-03-13 18:30:31 +00:00
|
|
|
if (!handle_scaler_opt(s, tscale))
|
2015-01-22 18:58:22 +00:00
|
|
|
r = M_OPT_INVALID;
|
|
|
|
}
|
|
|
|
if (r < 1) {
|
2013-12-21 19:03:36 +00:00
|
|
|
mp_info(log, "Available scalers:\n");
|
2015-03-15 06:11:51 +00:00
|
|
|
for (const char *const *filter = tscale ? fixed_tscale_filters
|
|
|
|
: fixed_scale_filters;
|
|
|
|
*filter; filter++) {
|
|
|
|
mp_info(log, " %s\n", *filter);
|
2015-03-13 18:30:31 +00:00
|
|
|
}
|
vo_opengl: separate kernel and window
This makes the core much more elegant, reusable, reconfigurable and also
allows us to more easily add aliases for specific configurations.
Furthermore, this lets us apply a generic blur factor / window function
to arbitrary filters, so we can finally "mix and match" in order to
fine-tune windowing functions.
A few notes are in order:
1. The current system for configuring scalers is ugly and rapidly
getting unwieldy. I modified the man page to make it a bit more
bearable, but long-term we have to do something about it; especially
since..
2. There's currently no way to affect the blur factor or parameters of
the window functions themselves. For example, I can't actually
fine-tune the kaiser window's param1, since there's simply no way to
do so in the current API - even though filter_kernels.c supports it
just fine!
3. This removes some lesser used filters (especially those which are
purely window functions to begin with). If anybody asks, you can get
eg. the old behavior of scale=hanning by using
scale=box:scale-window=hanning:scale-radius=1 (and yes, the result is
just as terrible as that sounds - which is why nobody should have
been using them in the first place).
4. This changes the semantics of the "triangle" scaler slightly - it now
has an arbitrary radius. This can possibly produce weird results for
people who were previously using scale-down=triangle, especially if
in combination with scale-radius (for the usual upscaling). The
correct fix for this is to use scale-down=bilinear_slow instead,
which is an alias for triangle at radius 1.
In regards to the last point, in future I want to make it so that
filters have a filter-specific "preferred radius" (for the ones that
are arbitrarily tunable), once the configuration system for filters has
been redesigned (in particular in a way that will let us separate scale
and scale-down cleanly). That way, "triangle" can simply have the
preferred radius of 1 by default, while still being tunable. (Rather
than the default radius being hard-coded to 3 always)
2015-03-25 03:40:28 +00:00
|
|
|
for (int n = 0; mp_filter_kernels[n].f.name; n++) {
|
2015-03-13 18:30:31 +00:00
|
|
|
if (!tscale || !mp_filter_kernels[n].polar)
|
vo_opengl: separate kernel and window
This makes the core much more elegant, reusable, reconfigurable and also
allows us to more easily add aliases for specific configurations.
Furthermore, this lets us apply a generic blur factor / window function
to arbitrary filters, so we can finally "mix and match" in order to
fine-tune windowing functions.
A few notes are in order:
1. The current system for configuring scalers is ugly and rapidly
getting unwieldy. I modified the man page to make it a bit more
bearable, but long-term we have to do something about it; especially
since..
2. There's currently no way to affect the blur factor or parameters of
the window functions themselves. For example, I can't actually
fine-tune the kaiser window's param1, since there's simply no way to
do so in the current API - even though filter_kernels.c supports it
just fine!
3. This removes some lesser used filters (especially those which are
purely window functions to begin with). If anybody asks, you can get
eg. the old behavior of scale=hanning by using
scale=box:scale-window=hanning:scale-radius=1 (and yes, the result is
just as terrible as that sounds - which is why nobody should have
been using them in the first place).
4. This changes the semantics of the "triangle" scaler slightly - it now
has an arbitrary radius. This can possibly produce weird results for
people who were previously using scale-down=triangle, especially if
in combination with scale-radius (for the usual upscaling). The
correct fix for this is to use scale-down=bilinear_slow instead,
which is an alias for triangle at radius 1.
In regards to the last point, in future I want to make it so that
filters have a filter-specific "preferred radius" (for the ones that
are arbitrarily tunable), once the configuration system for filters has
been redesigned (in particular in a way that will let us separate scale
and scale-down cleanly). That way, "triangle" can simply have the
preferred radius of 1 by default, while still being tunable. (Rather
than the default radius being hard-coded to 3 always)
2015-03-25 03:40:28 +00:00
|
|
|
mp_info(log, " %s\n", mp_filter_kernels[n].f.name);
|
2015-03-13 18:30:31 +00:00
|
|
|
}
|
2015-01-22 18:58:22 +00:00
|
|
|
if (s[0])
|
|
|
|
mp_fatal(log, "No scaler named '%s' found!\n", s);
|
2013-07-22 00:14:15 +00:00
|
|
|
}
|
2015-01-22 18:58:22 +00:00
|
|
|
return r;
|
2013-03-01 20:19:20 +00:00
|
|
|
}
|
2013-03-15 19:17:33 +00:00
|
|
|
|
vo_opengl: separate kernel and window
This makes the core much more elegant, reusable, reconfigurable and also
allows us to more easily add aliases for specific configurations.
Furthermore, this lets us apply a generic blur factor / window function
to arbitrary filters, so we can finally "mix and match" in order to
fine-tune windowing functions.
A few notes are in order:
1. The current system for configuring scalers is ugly and rapidly
getting unwieldy. I modified the man page to make it a bit more
bearable, but long-term we have to do something about it; especially
since..
2. There's currently no way to affect the blur factor or parameters of
the window functions themselves. For example, I can't actually
fine-tune the kaiser window's param1, since there's simply no way to
do so in the current API - even though filter_kernels.c supports it
just fine!
3. This removes some lesser used filters (especially those which are
purely window functions to begin with). If anybody asks, you can get
eg. the old behavior of scale=hanning by using
scale=box:scale-window=hanning:scale-radius=1 (and yes, the result is
just as terrible as that sounds - which is why nobody should have
been using them in the first place).
4. This changes the semantics of the "triangle" scaler slightly - it now
has an arbitrary radius. This can possibly produce weird results for
people who were previously using scale-down=triangle, especially if
in combination with scale-radius (for the usual upscaling). The
correct fix for this is to use scale-down=bilinear_slow instead,
which is an alias for triangle at radius 1.
In regards to the last point, in future I want to make it so that
filters have a filter-specific "preferred radius" (for the ones that
are arbitrarily tunable), once the configuration system for filters has
been redesigned (in particular in a way that will let us separate scale
and scale-down cleanly). That way, "triangle" can simply have the
preferred radius of 1 by default, while still being tunable. (Rather
than the default radius being hard-coded to 3 always)
2015-03-25 03:40:28 +00:00
|
|
|
static int validate_window_opt(struct mp_log *log, const m_option_t *opt,
|
|
|
|
struct bstr name, struct bstr param)
|
|
|
|
{
|
|
|
|
char s[20] = {0};
|
|
|
|
int r = 1;
|
|
|
|
if (bstr_equals0(param, "help")) {
|
2016-09-17 16:07:40 +00:00
|
|
|
r = M_OPT_EXIT;
|
vo_opengl: separate kernel and window
This makes the core much more elegant, reusable, reconfigurable and also
allows us to more easily add aliases for specific configurations.
Furthermore, this lets us apply a generic blur factor / window function
to arbitrary filters, so we can finally "mix and match" in order to
fine-tune windowing functions.
A few notes are in order:
1. The current system for configuring scalers is ugly and rapidly
getting unwieldy. I modified the man page to make it a bit more
bearable, but long-term we have to do something about it; especially
since..
2. There's currently no way to affect the blur factor or parameters of
the window functions themselves. For example, I can't actually
fine-tune the kaiser window's param1, since there's simply no way to
do so in the current API - even though filter_kernels.c supports it
just fine!
3. This removes some lesser used filters (especially those which are
purely window functions to begin with). If anybody asks, you can get
eg. the old behavior of scale=hanning by using
scale=box:scale-window=hanning:scale-radius=1 (and yes, the result is
just as terrible as that sounds - which is why nobody should have
been using them in the first place).
4. This changes the semantics of the "triangle" scaler slightly - it now
has an arbitrary radius. This can possibly produce weird results for
people who were previously using scale-down=triangle, especially if
in combination with scale-radius (for the usual upscaling). The
correct fix for this is to use scale-down=bilinear_slow instead,
which is an alias for triangle at radius 1.
In regards to the last point, in future I want to make it so that
filters have a filter-specific "preferred radius" (for the ones that
are arbitrarily tunable), once the configuration system for filters has
been redesigned (in particular in a way that will let us separate scale
and scale-down cleanly). That way, "triangle" can simply have the
preferred radius of 1 by default, while still being tunable. (Rather
than the default radius being hard-coded to 3 always)
2015-03-25 03:40:28 +00:00
|
|
|
} else {
|
|
|
|
snprintf(s, sizeof(s), "%.*s", BSTR_P(param));
|
|
|
|
const struct filter_window *window = mp_find_filter_window(s);
|
|
|
|
if (!window)
|
|
|
|
r = M_OPT_INVALID;
|
|
|
|
}
|
|
|
|
if (r < 1) {
|
|
|
|
mp_info(log, "Available windows:\n");
|
|
|
|
for (int n = 0; mp_filter_windows[n].name; n++)
|
|
|
|
mp_info(log, " %s\n", mp_filter_windows[n].name);
|
|
|
|
if (s[0])
|
|
|
|
mp_fatal(log, "No window named '%s' found!\n", s);
|
|
|
|
}
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2015-02-07 12:54:18 +00:00
|
|
|
float gl_video_scale_ambient_lux(float lmin, float lmax,
|
|
|
|
float rmin, float rmax, float lux)
|
|
|
|
{
|
|
|
|
assert(lmax > lmin);
|
|
|
|
|
|
|
|
float num = (rmax - rmin) * (log10(lux) - log10(lmin));
|
|
|
|
float den = log10(lmax) - log10(lmin);
|
|
|
|
float result = num / den + rmin;
|
|
|
|
|
|
|
|
// clamp the result
|
|
|
|
float max = MPMAX(rmax, rmin);
|
|
|
|
float min = MPMIN(rmax, rmin);
|
|
|
|
return MPMAX(MPMIN(result, max), min);
|
|
|
|
}
|
|
|
|
|
|
|
|
void gl_video_set_ambient_lux(struct gl_video *p, int lux)
|
|
|
|
{
|
|
|
|
if (p->opts.gamma_auto) {
|
2017-09-19 04:35:24 +00:00
|
|
|
p->opts.gamma = gl_video_scale_ambient_lux(16.0, 256.0, 1.0, 1.2, lux);
|
2017-09-28 09:53:57 +00:00
|
|
|
MP_TRACE(p, "ambient light changed: %d lux (gamma: %f)\n", lux,
|
|
|
|
p->opts.gamma);
|
2015-02-07 12:54:18 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-08-14 17:57:44 +00:00
|
|
|
static void *gl_video_dr_alloc_buffer(struct gl_video *p, size_t size)
|
2017-07-23 07:41:51 +00:00
|
|
|
{
|
2017-08-05 20:29:48 +00:00
|
|
|
struct ra_buf_params params = {
|
|
|
|
.type = RA_BUF_TYPE_TEX_UPLOAD,
|
|
|
|
.host_mapped = true,
|
|
|
|
.size = size,
|
2017-07-23 07:41:51 +00:00
|
|
|
};
|
|
|
|
|
2017-08-05 20:29:48 +00:00
|
|
|
struct ra_buf *buf = ra_buf_create(p->ra, ¶ms);
|
|
|
|
if (!buf)
|
2017-07-23 07:41:51 +00:00
|
|
|
return NULL;
|
|
|
|
|
2017-08-05 20:29:48 +00:00
|
|
|
MP_TARRAY_GROW(p, p->dr_buffers, p->num_dr_buffers);
|
|
|
|
p->dr_buffers[p->num_dr_buffers++] = (struct dr_buffer){ .buf = buf };
|
|
|
|
|
|
|
|
return buf->data;
|
2017-07-23 07:41:51 +00:00
|
|
|
};
|
|
|
|
|
2017-08-14 17:57:44 +00:00
|
|
|
static void gl_video_dr_free_buffer(void *opaque, uint8_t *data)
|
2017-07-23 07:41:51 +00:00
|
|
|
{
|
2017-08-14 17:57:44 +00:00
|
|
|
struct gl_video *p = opaque;
|
|
|
|
|
2017-07-23 07:41:51 +00:00
|
|
|
for (int n = 0; n < p->num_dr_buffers; n++) {
|
|
|
|
struct dr_buffer *buffer = &p->dr_buffers[n];
|
2017-08-14 17:57:44 +00:00
|
|
|
if (buffer->buf->data == data) {
|
2017-07-23 07:41:51 +00:00
|
|
|
assert(!buffer->mpi); // can't be freed while it has a ref
|
2017-08-05 20:29:48 +00:00
|
|
|
ra_buf_free(p->ra, &buffer->buf);
|
2017-07-23 07:41:51 +00:00
|
|
|
MP_TARRAY_REMOVE_AT(p->dr_buffers, p->num_dr_buffers, n);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// not found - must not happen
|
|
|
|
assert(0);
|
|
|
|
}
|
2017-08-14 17:57:44 +00:00
|
|
|
|
|
|
|
struct mp_image *gl_video_get_image(struct gl_video *p, int imgfmt, int w, int h,
|
|
|
|
int stride_align)
|
|
|
|
{
|
2018-01-17 10:49:55 +00:00
|
|
|
if (!gl_video_check_format(p, imgfmt))
|
|
|
|
return NULL;
|
|
|
|
|
2017-08-14 17:57:44 +00:00
|
|
|
int size = mp_image_get_alloc_size(imgfmt, w, h, stride_align);
|
|
|
|
if (size < 0)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
int alloc_size = size + stride_align;
|
|
|
|
void *ptr = gl_video_dr_alloc_buffer(p, alloc_size);
|
|
|
|
if (!ptr)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
// (we expect vo.c to proxy the free callback, so it happens in the same
|
|
|
|
// thread it was allocated in, removing the need for synchronization)
|
|
|
|
struct mp_image *res = mp_image_from_buffer(imgfmt, w, h, stride_align,
|
|
|
|
ptr, alloc_size, p,
|
|
|
|
gl_video_dr_free_buffer);
|
|
|
|
if (!res)
|
|
|
|
gl_video_dr_free_buffer(p, ptr);
|
|
|
|
return res;
|
|
|
|
}
|
vo_gpu: make it possible to load multiple hwdec interop drivers
Make the VO<->decoder interface capable of supporting multiple hwdec
APIs at once. The main gain is that this simplifies autoprobing a lot.
Before this change, it could happen that the VO loaded the "wrong" hwdec
API, and the decoder was stuck with the choice (breaking hw decoding).
With the change applied, the VO simply loads all available APIs, so
autoprobing trickery is left entirely to the decoder.
In the past, we were quite careful about not accidentally loading the
wrong interop drivers. This was in part to make sure autoprobing works,
but also because libva had this obnoxious bug of dumping garbage to
stderr when using the API. libva was fixed, so this is not a problem
anymore.
The --opengl-hwdec-interop option is changed in various ways (again...),
and renamed to --gpu-hwdec-interop. It does not have much use anymore,
other than debugging. It's notable that the order in the hwdec interop
array ra_hwdec_drivers[] still matters if multiple drivers support the
same image formats, so the option can explicitly force one, if that
should ever be necessary, or more likely, for debugging. One example are
the ra_hwdec_d3d11egl and ra_hwdec_d3d11eglrgb drivers, which both
support d3d11 input.
vo_gpu now always loads the interop lazily by default, but when it does,
it loads them all. vo_opengl_cb now always loads them when the GL
context handle is initialized. I don't expect that this causes any
problems.
It's now possible to do things like changing between vdpau and nvdec
decoding at runtime.
This is also preparation for cleaning up vd_lavc.c hwdec autoprobing.
It's another reason why hwdec_devices_request_all() does not take a
hwdec type anymore.
2017-12-01 04:05:00 +00:00
|
|
|
|
|
|
|
static void load_add_hwdec(struct gl_video *p, struct mp_hwdec_devices *devs,
|
|
|
|
const struct ra_hwdec_driver *drv, bool is_auto)
|
|
|
|
{
|
|
|
|
struct ra_hwdec *hwdec =
|
|
|
|
ra_hwdec_load_driver(p->ra, p->log, p->global, devs, drv, is_auto);
|
|
|
|
if (hwdec)
|
|
|
|
MP_TARRAY_APPEND(p, p->hwdecs, p->num_hwdecs, hwdec);
|
|
|
|
}
|
|
|
|
|
|
|
|
void gl_video_load_hwdecs(struct gl_video *p, struct mp_hwdec_devices *devs,
|
|
|
|
bool load_all_by_default)
|
|
|
|
{
|
|
|
|
char *type = p->opts.hwdec_interop;
|
|
|
|
if (!type || !type[0] || strcmp(type, "auto") == 0) {
|
|
|
|
if (!load_all_by_default)
|
|
|
|
return;
|
|
|
|
type = "all";
|
|
|
|
}
|
|
|
|
if (strcmp(type, "no") == 0) {
|
|
|
|
// do nothing, just block further loading
|
|
|
|
} else if (strcmp(type, "all") == 0) {
|
2017-12-10 21:56:29 +00:00
|
|
|
gl_video_load_hwdecs_all(p, devs);
|
vo_gpu: make it possible to load multiple hwdec interop drivers
Make the VO<->decoder interface capable of supporting multiple hwdec
APIs at once. The main gain is that this simplifies autoprobing a lot.
Before this change, it could happen that the VO loaded the "wrong" hwdec
API, and the decoder was stuck with the choice (breaking hw decoding).
With the change applied, the VO simply loads all available APIs, so
autoprobing trickery is left entirely to the decoder.
In the past, we were quite careful about not accidentally loading the
wrong interop drivers. This was in part to make sure autoprobing works,
but also because libva had this obnoxious bug of dumping garbage to
stderr when using the API. libva was fixed, so this is not a problem
anymore.
The --opengl-hwdec-interop option is changed in various ways (again...),
and renamed to --gpu-hwdec-interop. It does not have much use anymore,
other than debugging. It's notable that the order in the hwdec interop
array ra_hwdec_drivers[] still matters if multiple drivers support the
same image formats, so the option can explicitly force one, if that
should ever be necessary, or more likely, for debugging. One example are
the ra_hwdec_d3d11egl and ra_hwdec_d3d11eglrgb drivers, which both
support d3d11 input.
vo_gpu now always loads the interop lazily by default, but when it does,
it loads them all. vo_opengl_cb now always loads them when the GL
context handle is initialized. I don't expect that this causes any
problems.
It's now possible to do things like changing between vdpau and nvdec
decoding at runtime.
This is also preparation for cleaning up vd_lavc.c hwdec autoprobing.
It's another reason why hwdec_devices_request_all() does not take a
hwdec type anymore.
2017-12-01 04:05:00 +00:00
|
|
|
} else {
|
|
|
|
for (int n = 0; ra_hwdec_drivers[n]; n++) {
|
|
|
|
const struct ra_hwdec_driver *drv = ra_hwdec_drivers[n];
|
|
|
|
if (strcmp(type, drv->name) == 0) {
|
|
|
|
load_add_hwdec(p, devs, drv, false);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
p->hwdec_interop_loading_done = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void gl_video_load_hwdecs_all(struct gl_video *p, struct mp_hwdec_devices *devs)
|
|
|
|
{
|
|
|
|
if (!p->hwdec_interop_loading_done) {
|
|
|
|
for (int n = 0; ra_hwdec_drivers[n]; n++)
|
|
|
|
load_add_hwdec(p, devs, ra_hwdec_drivers[n], true);
|
|
|
|
p->hwdec_interop_loading_done = true;
|
|
|
|
}
|
|
|
|
}
|