vo_gpu: redesign peak detection algorithm

The previous approach of using an FIR with tunable hard threshold for scene changes had several problems: - the FIR involved annoying hard-coded buffer sizes, high VRAM usage, and the FIR sum was prone to numerical overflow which limited the number of frames we could average over. We also totally redesign the scene change detection. - the hard scene change detection was prone to both false positives and false negatives, each with their own (annoying) issues. Scrap this entirely and switch to a dual approach of using a simple single-pole IIR low pass filter to smooth out noise, while using a softer scene change curve (with tunable low and high thresholds), based on `smoothstep`. The IIR filter is extremely simple in its implementation and has an arbitrarily user-tunable cutoff frequency, while the smoothstep-based scene change curve provides a good, tunable tradeoff between adaptation speed and stability - without exhibiting either of the traditional issues associated with the hard cutoff. Another way to think about the new options is that the "low threshold" provides a margin of error within which we don't care about small fluctuations in the scene (which will therefore be smoothed out by the IIR filter).
2024-12-27 01:22:30 +00:00 · 2019-01-01 07:30:00 +01:00 · 2019-01-01 07:30:00 +01:00 · 6179dcbb79
commit 6179dcbb79
parent 3fe882d4ae
5 changed files with 82 additions and 73 deletions
--- a/DOCS/interface-changes.rst
+++ b/DOCS/interface-changes.rst
@ -51,6 +51,7 @@ Interface changes
      only using a single value (which previously just controlled the exponent).
      The strength now linearly blends between the linear and nonlinear tone
      mapped versions of a color.
+    - add --hdr-peak-decay-rate and --hdr-scene-threshold-low/high
 --- mpv 0.29.0 ---
    - drop --opensles-sample-rate, as --audio-samplerate should be used if desired
    - drop deprecated --videotoolbox-format, --ff-aid, --ff-vid, --ff-sid,
--- a/DOCS/man/options.rst
+++ b/DOCS/man/options.rst
@ -5245,6 +5245,30 @@ The following video options are currently all specific to ``--vo=gpu`` and
    The special value ``auto`` (default) will enable HDR peak computation
    automatically if compute shaders and SSBOs are supported.

+``--hdr-peak-decay-rate=<1.0..1000.0>``
+    The decay rate used for the HDR peak detection algorithm (default: 100.0).
+    This is only relevant when ``--hdr-compute-peak`` is enabled. Higher values
+    make the peak decay more slowly, leading to more stable values at the cost
+    of more "eye adaptation"-like effects (although this is mitigated somewhat
+    by ``--hdr-scene-threshold``). A value of 1.0 (the lowest possible) disables
+    all averaging, meaning each frame's value is used directly as measured,
+    but doing this is not recommended for "noisy" sources since it may lead
+    to excessive flicker. (In signal theory terms, this controls the time
+    constant "tau" of an IIR low pass filter)
+
+``--hdr-scene-threshold-low=<0..10000>``, ``--hdr-scene-threshold-high=<0..10000>``
+    The lower and upper thresholds (in cd/m^2) for a brightness difference to
+    be considered a scene change (default: 50 low, 200 high). This is only
+    relevant when ``--hdr-compute-peak`` is enabled. Normally, small
+    fluctuations in the frame brightness are compensated for by the peak
+    averaging mechanism, but for large jumps in the brightness this can result
+    in the frame remaining too bright or too dark for up to several seconds,
+    depending on the value of ``--hdr-peak-decay-rate``. To counteract this,
+    when the brightness between the running average and the current frame
+    exceeds the low threshold, mpv will make the averaging filter more
+    aggressive, up to the limit of the high threshold (at which point the
+    filter becomes instant).
+
 ``--tone-mapping-desaturate=<0.0..1.0>``
    Apply desaturation for highlights (default: 0.75). The parameter controls
    the strength of the desaturation curve. A value of 0.0 completely disables
--- a/video/out/gpu/video.c
+++ b/video/out/gpu/video.c
@ -316,6 +316,9 @@ static const struct gl_video_opts gl_video_opts_def = {
    .tone_map = {
        .curve = TONE_MAPPING_HABLE,
        .curve_param = NAN,
+        .decay_rate = 100.0,
+        .scene_threshold_low = 50,
+        .scene_threshold_high = 200,
        .desat = 0.75,
        .desat_exp = 1.5,
    },
@ -367,6 +370,11 @@ const struct m_sub_options gl_video_conf = {
                   ({"auto", 0},
                    {"yes", 1},
                    {"no", -1})),
+        OPT_FLOATRANGE("hdr-peak-decay-rate", tone_map.decay_rate, 0, 1.0, 1000.0),
+        OPT_INTRANGE("hdr-scene-threshold-low",
+                     tone_map.scene_threshold_low, 0, 0, 10000),
+        OPT_INTRANGE("hdr-scene-threshold-high",
+                     tone_map.scene_threshold_high, 0, 0, 10000),
        OPT_FLOAT("tone-mapping-param", tone_map.curve_param, 0),
        OPT_FLOAT("tone-mapping-desaturate", tone_map.desat, 0),
        OPT_FLOATRANGE("tone-mapping-desaturate-exponent",
@ -2478,17 +2486,18 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
        dst.sig_peak = mp_trc_nom_peak(dst.gamma);

    struct gl_tone_map_opts tone_map = p->opts.tone_map;
-    bool detect_peak = tone_map.compute_peak >= 0 && mp_trc_is_hdr(src.gamma);
+    bool detect_peak = tone_map.compute_peak >= 0 && mp_trc_is_hdr(src.gamma)
+                       && src.sig_peak > dst.sig_peak;
+
    if (detect_peak && !p->hdr_peak_ssbo) {
        struct {
+            float average[2];
+            uint32_t frame_sum;
+            uint32_t frame_max;
            uint32_t counter;
-            uint32_t frame_idx;
-            uint32_t frame_num;
-            uint32_t frame_max[PEAK_DETECT_FRAMES+1];
-            uint32_t frame_sum[PEAK_DETECT_FRAMES+1];
-            uint32_t total_max;
-            uint32_t total_sum;
-        } peak_ssbo = {0};
+        } peak_ssbo = {
+            .average = { 0.25, src.sig_peak },
+        };

        struct ra_buf_params params = {
            .type = RA_BUF_TYPE_SHADER_STORAGE,
@ -2508,15 +2517,10 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
        pass_describe(p, "detect HDR peak");
        pass_is_compute(p, 8, 8, true); // 8x8 is good for performance
        gl_sc_ssbo(p->sc, "PeakDetect", p->hdr_peak_ssbo,
+            "vec2 average;"
+            "uint frame_sum;"
+            "uint frame_max;"
            "uint counter;"
-            "uint frame_idx;"
-            "uint frame_num;"
-            "uint frame_max[%d];"
-            "uint frame_avg[%d];"
-            "uint total_max;"
-            "uint total_avg;",
-            PEAK_DETECT_FRAMES + 1,
-            PEAK_DETECT_FRAMES + 1
        );
    }

--- a/video/out/gpu/video.h
+++ b/video/out/gpu/video.h
@ -95,13 +95,13 @@ enum tone_mapping {
    TONE_MAPPING_LINEAR,
 };

-// How many frames to average over for HDR peak detection
-#define PEAK_DETECT_FRAMES 63
-
 struct gl_tone_map_opts {
    int curve;
    float curve_param;
    int compute_peak;
+    float decay_rate;
+    int scene_threshold_low;
+    int scene_threshold_high;
    float desat;
    float desat_exp;
    int gamut_warning; // bool
--- a/video/out/gpu/video_shaders.c
+++ b/video/out/gpu/video_shaders.c
@ -567,75 +567,55 @@ static void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light ligh
 // under a typical presentation gamma of about 2.0.
 static const float sdr_avg = 0.25;

-// The threshold for which to consider an average luminance difference to be
-// a sign of a scene change.
-static const int scene_threshold = 0.2 * MP_REF_WHITE;
-
-static void hdr_update_peak(struct gl_shader_cache *sc)
+static void hdr_update_peak(struct gl_shader_cache *sc,
+                            const struct gl_tone_map_opts *opts)
 {
-    // For performance, we want to do as few atomic operations on global
-    // memory as possible, so use an atomic in shmem for the work group.
-    GLSLH(shared uint wg_sum;);
-    GLSL(wg_sum = 0;)
+    // Update the sig_peak/sig_avg from the old SSBO state
+    GLSL(sig_avg  = max(1e-3, average.x);)
+    GLSL(sig_peak = max(1.00, average.y);)

-    // Have each thread update the work group sum with the local value
+    // For performance, and to avoid overflows, we tally up the sub-results per
+    // pixel using shared memory first
+    GLSLH(shared uint wg_sum;)
+    GLSLH(shared uint wg_max;)
+    GLSL(wg_sum = wg_max = 0;)
    GLSL(barrier();)
-    GLSLF("atomicAdd(wg_sum, uint(sig_max * %f));\n", MP_REF_WHITE);
+    GLSLF("uint sig_uint = uint(sig_max * %f);\n", MP_REF_WHITE);
+    GLSL(atomicAdd(wg_sum, sig_uint);)
+    GLSL(atomicMax(wg_max, sig_uint);)

-    // Have one thread per work group update the global atomics. We use the
-    // work group average even for the global sum, to make the values slightly
-    // more stable and smooth out tiny super-highlights.
+    // Have one thread per work group update the global atomics
    GLSL(memoryBarrierShared();)
    GLSL(barrier();)
    GLSL(if (gl_LocalInvocationIndex == 0) {)
    GLSL(    uint wg_avg = wg_sum / (gl_WorkGroupSize.x * gl_WorkGroupSize.y);)
-    GLSL(    atomicMax(frame_max[frame_idx], wg_avg);)
-    GLSL(    atomicAdd(frame_avg[frame_idx], wg_avg);)
+    GLSL(    atomicAdd(frame_sum, wg_avg);)
+    GLSL(    atomicMax(frame_max, wg_max);)
+    GLSL(    memoryBarrierBuffer();)
    GLSL(})
-
-    const float refi = 1.0 / MP_REF_WHITE;
-
-    // Update the sig_peak/sig_avg from the old SSBO state
-    GLSL(uint num_wg = gl_NumWorkGroups.x * gl_NumWorkGroups.y;)
-    GLSL(if (frame_num > 0) {)
-    GLSLF("    float peak = %f * float(total_max) / float(frame_num);\n", refi);
-    GLSLF("    float avg = %f * float(total_avg) / float(frame_num);\n", refi);
-    GLSLF("    sig_peak = max(1.0, peak);\n");
-    GLSLF("    sig_avg  = max(%f, avg);\n", sdr_avg);
-    GLSL(});
+    GLSL(barrier();)

    // Finally, to update the global state, we increment a counter per dispatch
-    GLSL(memoryBarrierBuffer();)
-    GLSL(barrier();)
+    GLSL(uint num_wg = gl_NumWorkGroups.x * gl_NumWorkGroups.y;)
    GLSL(if (gl_LocalInvocationIndex == 0 && atomicAdd(counter, 1) == num_wg - 1) {)
-
-    // Since we sum up all the workgroups, we also still need to divide the
-    // average by the number of work groups
    GLSL(    counter = 0;)
-    GLSL(    frame_avg[frame_idx] /= num_wg;)
-    GLSL(    uint cur_max = frame_max[frame_idx];)
-    GLSL(    uint cur_avg = frame_avg[frame_idx];)
+    GLSL(    vec2 cur = vec2(float(frame_sum) / float(num_wg), frame_max);)
+    GLSLF("  cur *= 1.0/%f;\n", MP_REF_WHITE);

-    // Scene change detection
-    GLSL(    int diff = int(frame_num * cur_avg) - int(total_avg);)
-    GLSLF("  if (abs(diff) > frame_num * %d) {\n", scene_threshold);
-    GLSL(        frame_num = 0;)
-    GLSL(        total_max = total_avg = 0;)
-    GLSLF("      for (uint i = 0; i < %d; i++)\n", PEAK_DETECT_FRAMES+1);
-    GLSL(            frame_max[i] = frame_avg[i] = 0;)
-    GLSL(        frame_max[frame_idx] = cur_max;)
-    GLSL(        frame_avg[frame_idx] = cur_avg;)
-    GLSL(    })
+    // Use an IIR low-pass filter to smooth out the detected values, with a
+    // configurable decay rate based on the desired time constant (tau)
+    float a = 1.0 - cos(1.0 / opts->decay_rate);
+    float decay = sqrt(a*a + 2*a) - a;
+    GLSLF("  average += %f * (cur - average);\n", decay);

-    // Add the current frame, then subtract and reset the next frame
-    GLSLF("  uint next = (frame_idx + 1) %% %d;\n", PEAK_DETECT_FRAMES+1);
-    GLSL(    total_max += cur_max - frame_max[next];)
-    GLSL(    total_avg += cur_avg - frame_avg[next];)
-    GLSL(    frame_max[next] = frame_avg[next] = 0;)
+    // Scene change hysteresis
+    GLSLF("  float weight = smoothstep(%f, %f, abs(cur.x - average.x));\n",
+          (float) opts->scene_threshold_low / MP_REF_WHITE,
+          (float) opts->scene_threshold_high / MP_REF_WHITE);
+    GLSL(    average = mix(average, cur, weight);)

-    // Update the index and count
-    GLSL(    frame_idx = next;)
-    GLSLF("  frame_num = min(frame_num + 1, %d);\n", PEAK_DETECT_FRAMES);
+    // Reset SSBO state for the next frame
+    GLSL(    frame_max = frame_sum = 0;)
    GLSL(    memoryBarrierBuffer();)
    GLSL(})
 }
@ -659,7 +639,7 @@ static void pass_tone_map(struct gl_shader_cache *sc,
    GLSLF("float sig_avg = %f;\n", sdr_avg);

    if (opts->compute_peak >= 0)
-        hdr_update_peak(sc);
+        hdr_update_peak(sc, opts);

    GLSLF("vec3 sig = color.rgb;\n");