vo_opengl: refactor: instantiate scaler functions at runtime

Before this commit, the convolution scaler shader functions were pre- instantiated in the shader file. For every filter size, a corresponding function (with the filter size as suffix) had to be present. Change this, and make the C code emit the necessary bits. This means the shader code is much reduced. (Although hopefully it doesn't make shader compilation faster - it would require a really dumb compiler if it spends its time on dead code.) It also makes it more flexible, which is the main goal. The DEF_SCALER0 stuff is needed because the C code writes the header of the shader, at a point where scaler macros are not defined yet.
2014-12-08 16:04:08 +01:00 · 2014-12-08 16:04:08 +01:00 · 9c484cb080
parent 4a95be014b
commit 9c484cb080
2 changed files with 43 additions and 44 deletions
--- a/video/out/gl_video.c
+++ b/video/out/gl_video.c
@ -835,27 +835,43 @@ static void shader_def_opt(char **shader, const char *name, bool b)
        shader_def(shader, name, "1");
 }

+#define APPENDF(s_ptr, ...) \
+    *(s_ptr) = talloc_asprintf_append(*(s_ptr), __VA_ARGS__)
+
 static void shader_setup_scaler(char **shader, struct scaler *scaler, int pass)
 {
-    const char *target = scaler->index == 0 ? "SAMPLE_L" : "SAMPLE_C";
+    int unit = scaler->index;
+    const char *target = unit == 0 ? "SAMPLE_L" : "SAMPLE_C";
    if (!scaler->kernel) {
-        *shader = talloc_asprintf_append(*shader, "#define %s(p0, p1, p2) "
-            "sample_%s(p0, p1, p2, filter_param1_%c)\n",
-            target, scaler->name, "lc"[scaler->index]);
+        APPENDF(shader, "#define %s(p0, p1, p2) "
+                "sample_%s(p0, p1, p2, filter_param1_%c)\n",
+                target, scaler->name, "lc"[unit]);
    } else {
        int size = scaler->kernel->size;
+        const char *lut_tex = scaler->lut_name;
+        char name[40];
+        snprintf(name, sizeof(name), "sample_scaler%d", unit);
+        APPENDF(shader, "#define DEF_SCALER%d \\\n", unit);
+        char lut_fn[40];
+        if (size < 8) {
+            snprintf(lut_fn, sizeof(lut_fn), "weights%d", size);
+        } else {
+            snprintf(lut_fn, sizeof(lut_fn), "weights_scaler%d", unit);
+            APPENDF(shader, "    WEIGHTS_N(%s, %d) \\\n    ", lut_fn, size);
+        }
        if (pass != -1) {
            // The direction/pass assignment is rather arbitrary, but fixed in
            // other parts of the code (like FBO setup).
            const char *direction = pass == 0 ? "0, 1" : "1, 0";
-            *shader = talloc_asprintf_append(*shader, "#define %s(p0, p1, p2) "
-                "sample_convolution_sep%d(vec2(%s), %s, p0, p1, p2)\n",
-                target, size, direction, scaler->lut_name);
+            // SAMPLE_CONVOLUTION_SEP_N(NAME, DIR, N, LUT, WEIGHTS_FUNC)
+            APPENDF(shader, "SAMPLE_CONVOLUTION_SEP_N(%s, vec2(%s), %d, %s, %s)\n",
+                    name, direction, size, lut_tex, lut_fn);
        } else {
-            *shader = talloc_asprintf_append(*shader, "#define %s(p0, p1, p2) "
-                "sample_convolution%d(%s, p0, p1, p2)\n",
-                target, size, scaler->lut_name);
+            // SAMPLE_CONVOLUTION_N(NAME, N, LUT, WEIGHTS_FUNC)
+            APPENDF(shader, "SAMPLE_CONVOLUTION_N(%s, %d, %s, %s)\n",
+                    name, size, lut_tex, lut_fn);
        }
+        APPENDF(shader, "#define %s %s\n", target, name);
    }
 }

--- a/video/out/gl_video_shaders.glsl
+++ b/video/out/gl_video_shaders.glsl
@ -235,6 +235,7 @@ float[6] weights6(sampler2D lookup, float f) {
    return float[6](c1.r, c1.g, c1.b, c2.r, c2.g, c2.b);
 }

+// For N=n*4 with n>1 (N==4 is covered by weights4()).
 #define WEIGHTS_N(NAME, N)                          \
    float[N] NAME(sampler2D lookup, float f) {      \
        float r[N];                                 \
@ -249,21 +250,14 @@ float[6] weights6(sampler2D lookup, float f) {
        return r;                                   \
    }

-WEIGHTS_N(weights8, 8)
-WEIGHTS_N(weights12, 12)
-WEIGHTS_N(weights16, 16)
-WEIGHTS_N(weights32, 32)
-WEIGHTS_N(weights64, 64)
-
-// The dir parameter is (0, 1) or (1, 0), and we expect the shader compiler to
+// The DIR parameter is (0, 1) or (1, 0), and we expect the shader compiler to
 // remove all the redundant multiplications and additions.
-#define SAMPLE_CONVOLUTION_SEP_N(NAME, N, WEIGHTS_FUNC)                     \
-    vec4 NAME(vec2 dir, sampler2D lookup, VIDEO_SAMPLER tex, vec2 texsize,  \
-              vec2 texcoord) {                                              \
-        vec2 pt = (1 / texsize) * dir;                                      \
-        float fcoord = dot(fract(texcoord * texsize - 0.5), dir);           \
+#define SAMPLE_CONVOLUTION_SEP_N(NAME, DIR, N, LUT, WEIGHTS_FUNC)           \
+    vec4 NAME(VIDEO_SAMPLER tex, vec2 texsize, vec2 texcoord) {             \
+        vec2 pt = (1 / texsize) * DIR;                                      \
+        float fcoord = dot(fract(texcoord * texsize - 0.5), DIR);           \
        vec2 base = texcoord - fcoord * pt - pt * (N / 2 - 1);              \
-        float weights[N] = WEIGHTS_FUNC(lookup, fcoord);                    \
+        float weights[N] = WEIGHTS_FUNC(LUT, fcoord);                       \
        vec4 res = vec4(0);                                                 \
        for (int n = 0; n < N; n++) {                                       \
            res += weights[n] * texture(tex, base + pt * n);                \
@ -271,23 +265,14 @@ WEIGHTS_N(weights64, 64)
        return res;                                                         \
    }

-SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep2, 2, weights2)
-SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep4, 4, weights4)
-SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep6, 6, weights6)
-SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep8, 8, weights8)
-SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep12, 12, weights12)
-SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep16, 16, weights16)
-SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep32, 32, weights32)
-SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep64, 64, weights64)
-
-#define SAMPLE_CONVOLUTION_N(NAME, N, WEIGHTS_FUNC)                         \
-    vec4 NAME(sampler2D lookup, VIDEO_SAMPLER tex, vec2 texsize, vec2 texcoord) {\
+#define SAMPLE_CONVOLUTION_N(NAME, N, LUT, WEIGHTS_FUNC)                    \
+    vec4 NAME(VIDEO_SAMPLER tex, vec2 texsize, vec2 texcoord) {             \
        vec2 pt = 1 / texsize;                                              \
        vec2 fcoord = fract(texcoord * texsize - 0.5);                      \
        vec2 base = texcoord - fcoord * pt - pt * (N / 2 - 1);              \
        vec4 res = vec4(0);                                                 \
-        float w_x[N] = WEIGHTS_FUNC(lookup, fcoord.x);                      \
-        float w_y[N] = WEIGHTS_FUNC(lookup, fcoord.y);                      \
+        float w_x[N] = WEIGHTS_FUNC(LUT, fcoord.x);                         \
+        float w_y[N] = WEIGHTS_FUNC(LUT, fcoord.y);                         \
        for (int y = 0; y < N; y++) {                                       \
            vec4 line = vec4(0);                                            \
            for (int x = 0; x < N; x++)                                     \
@ -297,14 +282,12 @@ SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep64, 64, weights64)
        return res;                                                         \
    }

-SAMPLE_CONVOLUTION_N(sample_convolution2, 2, weights2)
-SAMPLE_CONVOLUTION_N(sample_convolution4, 4, weights4)
-SAMPLE_CONVOLUTION_N(sample_convolution6, 6, weights6)
-SAMPLE_CONVOLUTION_N(sample_convolution8, 8, weights8)
-SAMPLE_CONVOLUTION_N(sample_convolution12, 12, weights12)
-SAMPLE_CONVOLUTION_N(sample_convolution16, 16, weights16)
-SAMPLE_CONVOLUTION_N(sample_convolution32, 32, weights32)
-SAMPLE_CONVOLUTION_N(sample_convolution64, 64, weights64)
+#ifdef DEF_SCALER0
+DEF_SCALER0
+#endif
+#ifdef DEF_SCALER1
+DEF_SCALER1
+#endif

 // Unsharp masking
 vec4 sample_sharpen3(VIDEO_SAMPLER tex, vec2 texsize, vec2 texcoord, float param1) {