mirror of
https://github.com/mpv-player/mpv
synced 2025-01-03 05:22:23 +00:00
vo_opengl: use textureGatherOffset for polar filters
This is more efficient on my machine (nvidia), but only when applied to groups of exactly 4 texels. So we switch to the more efficient textureGather for groups of 4. Some notes: - textureGatherOffset seems to be faster than textureGather by a non-negligible amount, but for some reason, textureOffset is still slower than a straight-up texture - textureGather* requires GLSL 400; and at least on nvidia, this requires actually allocating a GL 4.0 context. - the code in opengl/common.c that clamped the GLSL version to 330 is deprecated, because the old user shader style has been removed completely in the meantime - To combat the growing complexity of the polar sampling code, we drop the antiringing functionality from EWA shaders completely, since it never really worked well for EWA to begin with. (Horrific artifacting)
This commit is contained in:
parent
b387f82aa4
commit
ad0d6caac7
@ -4034,7 +4034,7 @@ The following video options are currently all specific to ``--vo=opengl`` and
|
||||
0.0 and 1.0. The default value of 0.0 disables antiringing entirely.
|
||||
|
||||
Note that this doesn't affect the special filters ``bilinear`` and
|
||||
``bicubic_fast``.
|
||||
``bicubic_fast``, nor does it affect any polar (EWA) scalers.
|
||||
|
||||
``--scale-window=<window>``, ``--cscale-window=<window>``, ``--dscale-window=<window>``, ``--tscale-window=<window>``
|
||||
(Advanced users only) Choose a custom windowing function for the kernel.
|
||||
|
@ -579,8 +579,8 @@ void mpgl_load_functions2(GL *gl, void *(*get_fn)(void *ctx, const char *n),
|
||||
int glsl_major = 0, glsl_minor = 0;
|
||||
if (shader && sscanf(shader, "%d.%d", &glsl_major, &glsl_minor) == 2)
|
||||
gl->glsl_version = glsl_major * 100 + glsl_minor;
|
||||
// GLSL 400 defines "sample" as keyword - breaks custom shaders.
|
||||
gl->glsl_version = MPMIN(gl->glsl_version, 330);
|
||||
// restrict GLSL version to be forwards compatible
|
||||
gl->glsl_version = MPMIN(gl->glsl_version, 400);
|
||||
}
|
||||
|
||||
if (is_software_gl(gl)) {
|
||||
|
@ -92,6 +92,7 @@ static const struct mpgl_driver *const backends[] = {
|
||||
// 0-terminated list of desktop GL versions a backend should try to
|
||||
// initialize. The first entry is the most preferred version.
|
||||
const int mpgl_preferred_gl_versions[] = {
|
||||
400,
|
||||
330,
|
||||
320,
|
||||
310,
|
||||
|
@ -1583,7 +1583,7 @@ static void pass_sample(struct gl_video *p, struct img_tex tex,
|
||||
} else if (strcmp(name, "oversample") == 0) {
|
||||
pass_sample_oversample(p->sc, scaler, w, h);
|
||||
} else if (scaler->kernel && scaler->kernel->polar) {
|
||||
pass_sample_polar(p->sc, scaler);
|
||||
pass_sample_polar(p->sc, scaler, tex.components, p->gl->glsl_version);
|
||||
} else if (scaler->kernel) {
|
||||
pass_sample_separated(p, tex, scaler, w, h);
|
||||
} else {
|
||||
|
@ -105,62 +105,106 @@ void pass_sample_separated_gen(struct gl_shader_cache *sc, struct scaler *scaler
|
||||
GLSLF("}\n");
|
||||
}
|
||||
|
||||
void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler)
|
||||
// Subroutine for computing and adding an individual texel contribution
|
||||
// If subtexel < 0, samples directly. Otherwise, takes the texel from cN[comp]
|
||||
static void polar_sample(struct gl_shader_cache *sc, struct scaler *scaler,
|
||||
int x, int y, int subtexel, int components)
|
||||
{
|
||||
double radius = scaler->kernel->f.radius * scaler->kernel->filter_scale;
|
||||
double radius_cutoff = scaler->kernel->radius_cutoff;
|
||||
int bound = ceil(radius_cutoff);
|
||||
bool use_ar = scaler->conf.antiring > 0;
|
||||
|
||||
// Since we can't know the subpixel position in advance, assume a
|
||||
// worst case scenario
|
||||
int yy = y > 0 ? y-1 : y;
|
||||
int xx = x > 0 ? x-1 : x;
|
||||
double dmax = sqrt(xx*xx + yy*yy);
|
||||
// Skip samples definitely outside the radius
|
||||
if (dmax >= radius_cutoff)
|
||||
return;
|
||||
GLSLF("d = length(vec2(%d.0, %d.0) - fcoord)/%f;\n", x, y, radius);
|
||||
// Check for samples that might be skippable
|
||||
bool maybe_skippable = dmax >= radius_cutoff - M_SQRT2;
|
||||
if (maybe_skippable)
|
||||
GLSLF("if (d < %f) {\n", radius_cutoff / radius);
|
||||
|
||||
// get the weight for this pixel
|
||||
if (scaler->gl_target == GL_TEXTURE_1D) {
|
||||
GLSLF("w = texture1D(lut, LUT_POS(d, %d.0)).r;\n",
|
||||
scaler->lut_size);
|
||||
} else {
|
||||
GLSLF("w = texture(lut, vec2(0.5, LUT_POS(d, %d.0))).r;\n",
|
||||
scaler->lut_size);
|
||||
}
|
||||
GLSL(wsum += w;)
|
||||
|
||||
if (subtexel < 0) {
|
||||
GLSLF("c0 = texture(tex, base + pt * vec2(%d.0, %d.0));\n", x, y);
|
||||
GLSL(color += vec4(w) * c0;)
|
||||
} else {
|
||||
for (int n = 0; n < components; n++)
|
||||
GLSLF("color[%d] += w * c%d[%d];\n", n, n, subtexel);
|
||||
}
|
||||
|
||||
if (maybe_skippable)
|
||||
GLSLF("}\n");
|
||||
}
|
||||
|
||||
void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler,
|
||||
int components, int glsl_version)
|
||||
{
|
||||
GLSL(color = vec4(0.0);)
|
||||
GLSLF("{\n");
|
||||
GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));)
|
||||
GLSL(vec2 base = pos - fcoord * pt;)
|
||||
GLSL(vec4 c;)
|
||||
GLSLF("float w, d, wsum = 0.0;\n");
|
||||
if (use_ar) {
|
||||
GLSL(vec4 lo = vec4(1.0);)
|
||||
GLSL(vec4 hi = vec4(0.0);)
|
||||
}
|
||||
for (int n = 0; n < components; n++)
|
||||
GLSLF("vec4 c%d;\n", n);
|
||||
|
||||
gl_sc_uniform_tex(sc, "lut", scaler->gl_target, scaler->gl_lut);
|
||||
|
||||
GLSLF("// scaler samples\n");
|
||||
for (int y = 1-bound; y <= bound; y++) {
|
||||
for (int x = 1-bound; x <= bound; x++) {
|
||||
// Since we can't know the subpixel position in advance, assume a
|
||||
// worst case scenario
|
||||
int yy = y > 0 ? y-1 : y;
|
||||
int xx = x > 0 ? x-1 : x;
|
||||
double dmax = sqrt(xx*xx + yy*yy);
|
||||
// Skip samples definitely outside the radius
|
||||
if (dmax >= radius_cutoff)
|
||||
continue;
|
||||
GLSLF("d = length(vec2(%d.0, %d.0) - fcoord)/%f;\n", x, y, radius);
|
||||
// Check for samples that might be skippable
|
||||
bool maybe_skippable = dmax >= radius_cutoff - M_SQRT2;
|
||||
if (maybe_skippable)
|
||||
GLSLF("if (d < %f) {\n", radius_cutoff / radius);
|
||||
if (scaler->gl_target == GL_TEXTURE_1D) {
|
||||
GLSLF("w = texture1D(lut, LUT_POS(d, %d.0)).r;\n",
|
||||
scaler->lut_size);
|
||||
int bound = ceil(scaler->kernel->radius_cutoff);
|
||||
for (int y = 1-bound; y <= bound; y += 2) {
|
||||
for (int x = 1-bound; x <= bound; x += 2) {
|
||||
// First we figure out whether it's more efficient to use direct
|
||||
// sampling or gathering. The problem is that gathering 4 texels
|
||||
// only to discard some of them is very wasteful, so only do it if
|
||||
// we suspect it will be a win rather than a loss. This is the case
|
||||
// exactly when all four texels are within bounds
|
||||
bool use_gather = sqrt(x*x + y*y) < scaler->kernel->radius_cutoff;
|
||||
|
||||
// textureGather is only supported in GLSL 400+
|
||||
if (glsl_version < 400)
|
||||
use_gather = false;
|
||||
|
||||
if (use_gather) {
|
||||
// Gather the four surrounding texels simultaneously
|
||||
for (int n = 0; n < components; n++) {
|
||||
GLSLF("c%d = textureGatherOffset(tex, base, ivec2(%d, %d), %d);\n",
|
||||
n, x, y, n);
|
||||
}
|
||||
|
||||
// Mix in all of the points with their weights
|
||||
for (int p = 0; p < 4; p++) {
|
||||
// The four texels are gathered counterclockwise starting
|
||||
// from the bottom left
|
||||
static const int xo[4] = {0, 1, 1, 0};
|
||||
static const int yo[4] = {1, 1, 0, 0};
|
||||
if (x+xo[p] > bound || y+yo[p] > bound)
|
||||
continue;
|
||||
polar_sample(sc, scaler, x+xo[p], y+yo[p], p, components);
|
||||
}
|
||||
} else {
|
||||
GLSLF("w = texture(lut, vec2(0.5, LUT_POS(d, %d.0))).r;\n",
|
||||
scaler->lut_size);
|
||||
// switch to direct sampling instead, for efficiency/compatibility
|
||||
for (int yy = y; yy <= bound && yy <= y+1; yy++) {
|
||||
for (int xx = x; xx <= bound && xx <= x+1; xx++)
|
||||
polar_sample(sc, scaler, xx, yy, -1, components);
|
||||
}
|
||||
}
|
||||
GLSL(wsum += w;)
|
||||
GLSLF("c = texture(tex, base + pt * vec2(%d.0, %d.0));\n", x, y);
|
||||
GLSL(color += vec4(w) * c;)
|
||||
if (use_ar && x >= 0 && y >= 0 && x <= 1 && y <= 1) {
|
||||
GLSL(lo = min(lo, c);)
|
||||
GLSL(hi = max(hi, c);)
|
||||
}
|
||||
if (maybe_skippable)
|
||||
GLSLF("}\n");
|
||||
}
|
||||
}
|
||||
|
||||
GLSL(color = color / vec4(wsum);)
|
||||
if (use_ar)
|
||||
GLSLF("color = mix(color, clamp(color, lo, hi), %f);\n",
|
||||
scaler->conf.antiring);
|
||||
GLSLF("}\n");
|
||||
}
|
||||
|
||||
|
@ -30,7 +30,8 @@ extern const struct m_sub_options deband_conf;
|
||||
void sampler_prelude(struct gl_shader_cache *sc, int tex_num);
|
||||
void pass_sample_separated_gen(struct gl_shader_cache *sc, struct scaler *scaler,
|
||||
int d_x, int d_y);
|
||||
void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler);
|
||||
void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler,
|
||||
int components, int glsl_version);
|
||||
void pass_sample_bicubic_fast(struct gl_shader_cache *sc);
|
||||
void pass_sample_oversample(struct gl_shader_cache *sc, struct scaler *scaler,
|
||||
int w, int h);
|
||||
|
Loading…
Reference in New Issue
Block a user