mirror of
https://github.com/mpv-player/mpv
synced 2025-02-12 18:07:12 +00:00
vo_opengl: unroll ewa_lanczos to avoid looping and unnecessary samples
This speeds up performance by a factor of something like 10%, since it omits unnecessary checks. This will also make adding anti-ringing easier.
This commit is contained in:
parent
f5e48f0235
commit
6c250505fe
@ -954,9 +954,29 @@ static void shader_setup_scaler(char **shader, struct scaler *scaler, int pass)
|
|||||||
APPENDF(shader, "#define DEF_SCALER%d \\\n ", unit);
|
APPENDF(shader, "#define DEF_SCALER%d \\\n ", unit);
|
||||||
char lut_fn[40];
|
char lut_fn[40];
|
||||||
if (scaler->kernel->polar) {
|
if (scaler->kernel->polar) {
|
||||||
|
int radius = (int)scaler->kernel->radius;
|
||||||
// SAMPLE_CONVOLUTION_POLAR_R(NAME, R, LUT)
|
// SAMPLE_CONVOLUTION_POLAR_R(NAME, R, LUT)
|
||||||
APPENDF(shader, "SAMPLE_CONVOLUTION_POLAR_R(%s, %d, %s)\n",
|
APPENDF(shader, "SAMPLE_CONVOLUTION_POLAR_R(%s, %d, %s, WEIGHTS%d)\n",
|
||||||
name, (int)scaler->kernel->radius, lut_tex);
|
name, radius, lut_tex, unit);
|
||||||
|
|
||||||
|
// Pre-compute unrolled weights matrix
|
||||||
|
APPENDF(shader, "#define WEIGHTS%d(LUT) \\\n ", unit);
|
||||||
|
for (int y = 1-radius; y <= radius; y++) {
|
||||||
|
for (int x = 1-radius; x <= radius; x++) {
|
||||||
|
// Since we can't know the subpixel position in advance,
|
||||||
|
// assume a worst case scenario.
|
||||||
|
int yy = y > 0 ? y-1 : y;
|
||||||
|
int xx = x > 0 ? x-1 : x;
|
||||||
|
double d = sqrt(xx*xx + yy*yy);
|
||||||
|
|
||||||
|
// Samples outside the radius are unnecessary
|
||||||
|
if (d < radius) {
|
||||||
|
APPENDF(shader, "SAMPLE_POLAR(LUT, %f, %d, %d) \\\n ",
|
||||||
|
(double)radius, x, y);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
APPENDF(shader, "\n");
|
||||||
} else {
|
} else {
|
||||||
if (size == 2 || size == 6) {
|
if (size == 2 || size == 6) {
|
||||||
snprintf(lut_fn, sizeof(lut_fn), "weights%d", size);
|
snprintf(lut_fn, sizeof(lut_fn), "weights%d", size);
|
||||||
|
@ -298,21 +298,20 @@ float[6] weights6(sampler2D lookup, float f) {
|
|||||||
return res; \
|
return res; \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define SAMPLE_POLAR(LUT, R, X, Y) \
|
||||||
|
w = texture1D(LUT, length(vec2(X, Y) - fcoord)/R).r; \
|
||||||
|
wsum += w; \
|
||||||
|
res += w * texture(tex, base + pt * vec2(X, Y)); \
|
||||||
|
|
||||||
#define SAMPLE_CONVOLUTION_POLAR_R(NAME, R, LUT) \
|
#define SAMPLE_CONVOLUTION_POLAR_R(NAME, R, LUT, WEIGHTS_FN) \
|
||||||
vec4 NAME(VIDEO_SAMPLER tex, vec2 texsize, vec2 texcoord) { \
|
vec4 NAME(VIDEO_SAMPLER tex, vec2 texsize, vec2 texcoord) { \
|
||||||
vec2 pt = vec2(1.0) / texsize; \
|
vec2 pt = vec2(1.0) / texsize; \
|
||||||
vec2 fcoord = fract(texcoord * texsize - vec2(0.5)); \
|
vec2 fcoord = fract(texcoord * texsize - vec2(0.5)); \
|
||||||
vec2 base = texcoord - fcoord * pt; \
|
vec2 base = texcoord - fcoord * pt; \
|
||||||
vec4 res = vec4(0); \
|
vec4 res = vec4(0); \
|
||||||
float wsum = 0; \
|
float wsum = 0; \
|
||||||
for (int y = 1-R; y <= R; y++) { \
|
float w; \
|
||||||
for (int x = 1-R; x <= R; x++) { \
|
WEIGHTS_FN(LUT); \
|
||||||
float w = texture1D(LUT, length(vec2(x,y) - fcoord)/R).r; \
|
|
||||||
wsum += w; \
|
|
||||||
res += w * texture(tex, base + pt * vec2(x, y)); \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
return res / wsum; \
|
return res / wsum; \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user