From 26baf5b9da4faaec6f5e39a8efba7e58dd6317ed Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@nand.wakku.to>
Date: Sun, 4 Jan 2015 23:11:27 +0100
Subject: [PATCH] vo_opengl: add ewa_lanczos upscaler (aka jinc)

This is the polar (elliptic weighted average) version of lanczos.
This introduces a general new form of polar filters.
---
 DOCS/man/vo.rst                 |  8 +++-
 video/out/filter_kernels.c      | 71 +++++++++++++++++++++++++++
 video/out/filter_kernels.h      |  3 ++
 video/out/gl_video.c            | 85 ++++++++++++++++++++++-----------
 video/out/gl_video_shaders.glsl | 21 ++++++++
 5 files changed, 158 insertions(+), 30 deletions(-)

diff --git a/DOCS/man/vo.rst b/DOCS/man/vo.rst
index 0566fd6817..92b88dc523 100644
--- a/DOCS/man/vo.rst
+++ b/DOCS/man/vo.rst
@@ -304,6 +304,12 @@ Available video output drivers are:
         ``lanczos``
             Generic Lanczos scaling filter. Set radius with ``lradius``.
 
+        ``ewa_lanczos``
+            Generic elliptic weighted average Lanczos scaling filter. Also
+            known as Jinc. The radius can be set with ``lradius`` up to a
+            maximum value of 16, but note that performance drops very quickly
+            as the radius increases.
+
         ``spline36``
             This is the default when using ``opengl-hq``.
 
@@ -348,7 +354,7 @@ Available video output drivers are:
         Set radius for filters listed below, must be a float number between 1.0
         and 8.0. Defaults to be 2.0 if not specified.
 
-            ``sinc``, ``lanczos``, ``blackman``, ``gaussian``
+            ``sinc``, ``lanczos``, ``ewa_lanczos``, ``blackman``, ``gaussian``
 
         Note that depending on filter implementation details and video scaling
         ratio, the radius that actually being used might be different
diff --git a/video/out/filter_kernels.c b/video/out/filter_kernels.c
index 26f62ff567..4e5ca471bb 100644
--- a/video/out/filter_kernels.c
+++ b/video/out/filter_kernels.c
@@ -58,6 +58,11 @@ bool mp_init_filter(struct filter_kernel *filter, const int *sizes,
 {
     if (filter->radius < 0)
         filter->radius = 2.0;
+    // polar filters can be of any radius, and nothing special is needed
+    if (filter->polar) {
+        filter->size = filter->radius;
+        return true;
+    }
     // only downscaling requires widening the filter
     filter->inv_scale = inv_scale >= 1.0 ? inv_scale : 1.0;
     double support = filter->radius * filter->inv_scale;
@@ -111,6 +116,18 @@ void mp_compute_lut(struct filter_kernel *filter, int count, float *out_array)
     }
 }
 
+// Fill the given array with weights for the range [0, R], where R is the
+// radius of hte filter. The array is interpreted as a one-dimensional array
+// of count items.
+void mp_compute_lut_polar(struct filter_kernel *filter, int count, float *out_array)
+{
+    assert(filter->radius > 0);
+    for (int x = 0; x < count; x++) {
+        double r = x * filter->radius / (count - 1);
+        out_array[x] = r <= filter->radius ? filter->weight(filter, r) : 0;
+    }
+}
+
 typedef struct filter_kernel kernel;
 
 static double nearest(kernel *k, double x)
@@ -261,6 +278,14 @@ static double sinc(kernel *k, double x)
     return sin(pix) / pix;
 }
 
+static double jinc(kernel *k, double x)
+{
+    if (x == 0.0)
+        return 1.0;
+    double pix = M_PI * x;
+    return 2.0 * j1(pix) / pix;
+}
+
 static double lanczos(kernel *k, double x)
 {
     double radius = k->size / 2;
@@ -272,6 +297,48 @@ static double lanczos(kernel *k, double x)
     return radius * sin(pix) * sin(pix / radius) / (pix * pix);
 }
 
+static double ewa_lanczos(kernel *k, double x)
+{
+    double radius = k->radius;
+    assert(radius >= 1.0);
+
+    // This is already three orders of magnitude slower than anything you could
+    // possibly hope to play back in realtime and results in tons of ringing
+    // artifacts, so I doubt anybody will complain.
+    if (radius > 16)
+        radius = 16;
+
+    if (fabs(x) < 1e-8)
+        return 1.0;
+    if (fabs(x) >= radius)
+        return 0.0;
+
+    // Precomputed zeros of the jinc() function, needed to adjust the
+    // window size. Computing this at runtime is nontrivial.
+    // Copied from: https://github.com/AviSynth/jinc-resize/blob/master/JincResize/JincFilter.cpp#L171
+    static double jinc_zeros[16] = {
+        1.2196698912665045,
+        2.2331305943815286,
+        3.2383154841662362,
+        4.2410628637960699,
+        5.2427643768701817,
+        6.2439216898644877,
+        7.2447598687199570,
+        8.2453949139520427,
+        9.2458926849494673,
+        10.246293348754916,
+        11.246622794877883,
+        12.246898461138105,
+        13.247132522181061,
+        14.247333735806849,
+        15.247508563037300,
+        16.247661874700962
+    };
+
+    double window = jinc_zeros[0] / jinc_zeros[(int)radius - 1];
+    return jinc(k, x) * jinc(k, x*window);
+}
+
 static double blackman(kernel *k, double x)
 {
     double radius = k->size / 2;
@@ -303,6 +370,10 @@ const struct filter_kernel mp_filter_kernels[] = {
     {"sinc3",          3,   sinc},
     {"sinc4",          4,   sinc},
     {"sinc",           -1,  sinc},
+    {"ewa_lanczos2",   2,   ewa_lanczos, .polar = true},
+    {"ewa_lanczos3",   3,   ewa_lanczos, .polar = true},
+    {"ewa_lanczos4",   4,   ewa_lanczos, .polar = true},
+    {"ewa_lanczos",    -1,  ewa_lanczos, .polar = true},
     {"lanczos2",       2,   lanczos},
     {"lanczos3",       3,   lanczos},
     {"lanczos4",       4,   lanczos},
diff --git a/video/out/filter_kernels.h b/video/out/filter_kernels.h
index f9a413b9f7..4b407f4479 100644
--- a/video/out/filter_kernels.h
+++ b/video/out/filter_kernels.h
@@ -28,6 +28,8 @@ struct filter_kernel {
 
     // The filter params can be changed at runtime. Only used by some filters.
     float params[2];
+    // Whether or not the filter uses polar coordinates
+    bool polar;
     // The following values are set by mp_init_filter() at runtime.
     // Number of coefficients; equals the rounded up radius multiplied with 2.
     int size;
@@ -41,5 +43,6 @@ bool mp_init_filter(struct filter_kernel *filter, const int *sizes,
                     double scale);
 void mp_compute_weights(struct filter_kernel *filter, double f, float *out_w);
 void mp_compute_lut(struct filter_kernel *filter, int count, float *out_array);
+void mp_compute_lut_polar(struct filter_kernel *filter, int count, float *out_array);
 
 #endif /* MPLAYER_FILTER_KERNELS_H */
diff --git a/video/out/gl_video.c b/video/out/gl_video.c
index c6077c6ad2..36a34449c9 100644
--- a/video/out/gl_video.c
+++ b/video/out/gl_video.c
@@ -952,23 +952,29 @@ static void shader_setup_scaler(char **shader, struct scaler *scaler, int pass)
         snprintf(name, sizeof(name), "sample_scaler%d", unit);
         APPENDF(shader, "#define DEF_SCALER%d \\\n    ", unit);
         char lut_fn[40];
-        if (size == 2 || size == 6) {
-            snprintf(lut_fn, sizeof(lut_fn), "weights%d", size);
+        if (scaler->kernel->polar) {
+            // SAMPLE_CONVOLUTION_POLAR_R(NAME, R, LUT)
+            APPENDF(shader, "SAMPLE_CONVOLUTION_POLAR_R(%s, %d, %s)\n",
+                    name, (int)scaler->kernel->radius, lut_tex);
         } else {
-            snprintf(lut_fn, sizeof(lut_fn), "weights_scaler%d", unit);
-            APPENDF(shader, "WEIGHTS_N(%s, %d) \\\n    ", lut_fn, size);
-        }
-        if (pass != -1) {
-            // The direction/pass assignment is rather arbitrary, but fixed in
-            // other parts of the code (like FBO setup).
-            const char *direction = pass == 0 ? "0, 1" : "1, 0";
-            // SAMPLE_CONVOLUTION_SEP_N(NAME, DIR, N, LUT, WEIGHTS_FUNC)
-            APPENDF(shader, "SAMPLE_CONVOLUTION_SEP_N(%s, vec2(%s), %d, %s, %s)\n",
-                    name, direction, size, lut_tex, lut_fn);
-        } else {
-            // SAMPLE_CONVOLUTION_N(NAME, N, LUT, WEIGHTS_FUNC)
-            APPENDF(shader, "SAMPLE_CONVOLUTION_N(%s, %d, %s, %s)\n",
-                    name, size, lut_tex, lut_fn);
+            if (size == 2 || size == 6) {
+                snprintf(lut_fn, sizeof(lut_fn), "weights%d", size);
+            } else {
+                snprintf(lut_fn, sizeof(lut_fn), "weights_scaler%d", unit);
+                APPENDF(shader, "WEIGHTS_N(%s, %d) \\\n    ", lut_fn, size);
+            }
+            if (pass != -1) {
+                // The direction/pass assignment is rather arbitrary, but fixed in
+                // other parts of the code (like FBO setup).
+                const char *direction = pass == 0 ? "0, 1" : "1, 0";
+                // SAMPLE_CONVOLUTION_SEP_N(NAME, DIR, N, LUT, WEIGHTS_FUNC)
+                APPENDF(shader, "SAMPLE_CONVOLUTION_SEP_N(%s, vec2(%s), %d, %s, %s)\n",
+                        name, direction, size, lut_tex, lut_fn);
+            } else {
+                // SAMPLE_CONVOLUTION_N(NAME, N, LUT, WEIGHTS_FUNC)
+                APPENDF(shader, "SAMPLE_CONVOLUTION_N(%s, %d, %s, %s)\n",
+                        name, size, lut_tex, lut_fn);
+            }
         }
         APPENDF(shader, "#define %s %s\n", target, name);
     }
@@ -1163,7 +1169,7 @@ static void compile_shaders(struct gl_video *p)
     shader_def_opt(&header_final, "USE_DITHER", p->dither_texture != 0);
     shader_def_opt(&header_final, "USE_TEMPORAL_DITHER", p->opts.temporal_dither);
 
-    if (p->opts.scale_sep && p->scalers[0].kernel) {
+    if (p->opts.scale_sep && p->scalers[0].kernel && !p->scalers[0].kernel->polar) {
         header_sep = talloc_strdup(tmp, "");
         shader_def_opt(&header_sep, "FIXED_SCALE", true);
         shader_setup_scaler(&header_sep, &p->scalers[0], 0);
@@ -1312,32 +1318,53 @@ static void init_scaler(struct gl_video *p, struct scaler *scaler)
 
     int size = scaler->kernel->size;
     int elems_per_pixel = 4;
-    if (size == 2) {
+    if (scaler->kernel->polar) {
+        elems_per_pixel = 1;
+    } else if (size == 2) {
         elems_per_pixel = 2;
     } else if (size == 6) {
         elems_per_pixel = 3;
     }
     int width = size / elems_per_pixel;
     const struct fmt_entry *fmt = &gl_float16_formats[elems_per_pixel - 1];
-    scaler->lut_name = scaler->index == 0 ? "lut_l" : "lut_c";
+    if (scaler->kernel->polar) {
+        scaler->lut_name = scaler->index == 0 ? "lut_polar_l" : "lut_polar_c";
+    } else {
+        scaler->lut_name = scaler->index == 0 ? "lut_l" : "lut_c";
+    }
 
     gl->ActiveTexture(GL_TEXTURE0 + TEXUNIT_SCALERS + scaler->index);
 
     if (!scaler->gl_lut)
         gl->GenTextures(1, &scaler->gl_lut);
 
-    gl->BindTexture(GL_TEXTURE_2D, scaler->gl_lut);
+    if (scaler->kernel->polar) {
+        gl->BindTexture(GL_TEXTURE_1D, scaler->gl_lut);
 
-    float *weights = talloc_array(NULL, float, LOOKUP_TEXTURE_SIZE * size);
-    mp_compute_lut(scaler->kernel, LOOKUP_TEXTURE_SIZE, weights);
-    gl->TexImage2D(GL_TEXTURE_2D, 0, fmt->internal_format, width,
-                   LOOKUP_TEXTURE_SIZE, 0, fmt->format, GL_FLOAT, weights);
-    talloc_free(weights);
+        float *weights = talloc_array(NULL, float, LOOKUP_TEXTURE_SIZE);
+        mp_compute_lut_polar(scaler->kernel, LOOKUP_TEXTURE_SIZE, weights);
+        gl->TexImage1D(GL_TEXTURE_1D, 0, fmt->internal_format, LOOKUP_TEXTURE_SIZE,
+                       0, fmt->format, GL_FLOAT, weights);
+        talloc_free(weights);
+
+        gl->TexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+        gl->TexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+        gl->TexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+    } else {
+        gl->BindTexture(GL_TEXTURE_2D, scaler->gl_lut);
+
+        float *weights = talloc_array(NULL, float, LOOKUP_TEXTURE_SIZE * size);
+        mp_compute_lut(scaler->kernel, LOOKUP_TEXTURE_SIZE, weights);
+        gl->TexImage2D(GL_TEXTURE_2D, 0, fmt->internal_format, width,
+                       LOOKUP_TEXTURE_SIZE, 0, fmt->format, GL_FLOAT, weights);
+        talloc_free(weights);
+
+        gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+        gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+        gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+        gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+    }
 
-    gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
-    gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
-    gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
-    gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
 
     gl->ActiveTexture(GL_TEXTURE0);
 
diff --git a/video/out/gl_video_shaders.glsl b/video/out/gl_video_shaders.glsl
index 4037e42449..dac19cb673 100644
--- a/video/out/gl_video_shaders.glsl
+++ b/video/out/gl_video_shaders.glsl
@@ -168,6 +168,8 @@ uniform vec2 chroma_center_offset;
 uniform vec2 chroma_div;
 uniform sampler2D lut_c;
 uniform sampler2D lut_l;
+uniform sampler1D lut_polar_c;
+uniform sampler1D lut_polar_l;
 #if HAVE_3DTEX
 uniform sampler3D lut_3d;
 #endif
@@ -297,6 +299,25 @@ float[6] weights6(sampler2D lookup, float f) {
         return res;                                                         \
     }
 
+
+#define SAMPLE_CONVOLUTION_POLAR_R(NAME, R, LUT)                            \
+    vec4 NAME(VIDEO_SAMPLER tex, vec2 texsize, vec2 texcoord) {             \
+        vec2 pt = vec2(1.0) / texsize;                                      \
+        vec2 fcoord = fract(texcoord * texsize - vec2(0.5));                \
+        vec2 base = texcoord - fcoord * pt;                                 \
+        vec4 res = vec4(0);                                                 \
+        float wsum = 0;                                                     \
+        for (int y = 1-R; y <= R; y++) {                                    \
+            for (int x = 1-R; x <= R; x++) {                                \
+                vec2 d = vec2(x,y) - fcoord;                                \
+                float w = texture1D(LUT, sqrt(d.x*d.x + d.y*d.y)/R).r;      \
+                wsum += w;                                                  \
+                res += w * texture(tex, base + pt * vec2(x, y));            \
+            }                                                               \
+        }                                                                   \
+        return res / wsum;                                                  \
+    }
+
 #ifdef DEF_SCALER0
 DEF_SCALER0
 #endif