vo_gpu: improve accuracy of HDR brightness estimation

This change switches to a logarithmic mean to estimate the average signal brightness. This handles dark scenes with isolated highlights much more faithfully than the linear mean did, since the log of the signal roughly corresponds to the perceptual brightness.
2025-04-01 00:07:33 +00:00 · 2019-01-02 07:18:29 +01:00 · 2019-01-02 07:18:29 +01:00 · fdd671188d
commit fdd671188d
parent 12e58ff8a6
2 changed files with 14 additions and 10 deletions
--- a/video/out/gpu/video.c
+++ b/video/out/gpu/video.c
@ -2494,7 +2494,7 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
    if (detect_peak && !p->hdr_peak_ssbo) {
        struct {
            float average[2];
-            uint32_t frame_sum;
+            int32_t frame_sum;
            uint32_t frame_max;
            uint32_t counter;
        } peak_ssbo = {
@ -2520,7 +2520,7 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
        pass_is_compute(p, 8, 8, true); // 8x8 is good for performance
        gl_sc_ssbo(p->sc, "PeakDetect", p->hdr_peak_ssbo,
            "vec2 average;"
-            "uint frame_sum;"
+            "int frame_sum;"
            "uint frame_max;"
            "uint counter;"
        );
--- a/video/out/gpu/video_shaders.c
+++ b/video/out/gpu/video_shaders.c
@ -574,21 +574,24 @@ static void hdr_update_peak(struct gl_shader_cache *sc,
    GLSL(sig_avg  = max(1e-3, average.x);)
    GLSL(sig_peak = max(1.00, average.y);)

+    // Chosen to avoid overflowing on an 8K buffer
+    const float log_min = 1e-3, log_scale = 400.0, sig_scale = 10000.0;
+
    // For performance, and to avoid overflows, we tally up the sub-results per
    // pixel using shared memory first
-    GLSLH(shared uint wg_sum;)
+    GLSLH(shared int wg_sum;)
    GLSLH(shared uint wg_max;)
-    GLSL(wg_sum = wg_max = 0;)
+    GLSL(wg_sum = 0; wg_max = 0;)
    GLSL(barrier();)
-    GLSLF("uint sig_uint = uint(sig_max * %f);\n", MP_REF_WHITE);
-    GLSL(atomicAdd(wg_sum, sig_uint);)
-    GLSL(atomicMax(wg_max, sig_uint);)
+    GLSLF("float sig_log = log(max(sig_max, %f));\n", log_min);
+    GLSLF("atomicAdd(wg_sum, int(sig_log * %f));\n", log_scale);
+    GLSLF("atomicMax(wg_max, uint(sig_max * %f));\n", sig_scale);

    // Have one thread per work group update the global atomics
    GLSL(memoryBarrierShared();)
    GLSL(barrier();)
    GLSL(if (gl_LocalInvocationIndex == 0) {)
-    GLSL(    uint wg_avg = wg_sum / (gl_WorkGroupSize.x * gl_WorkGroupSize.y);)
+    GLSL(    int wg_avg = wg_sum / int(gl_WorkGroupSize.x * gl_WorkGroupSize.y);)
    GLSL(    atomicAdd(frame_sum, wg_avg);)
    GLSL(    atomicMax(frame_max, wg_max);)
    GLSL(    memoryBarrierBuffer();)
@ -600,7 +603,8 @@ static void hdr_update_peak(struct gl_shader_cache *sc,
    GLSL(if (gl_LocalInvocationIndex == 0 && atomicAdd(counter, 1) == num_wg - 1) {)
    GLSL(    counter = 0;)
    GLSL(    vec2 cur = vec2(float(frame_sum) / float(num_wg), frame_max);)
-    GLSLF("  cur *= 1.0/%f;\n", MP_REF_WHITE);
+    GLSLF("  cur *= vec2(1.0/%f, 1.0/%f);\n", log_scale, sig_scale);
+    GLSL(    cur.x = exp(cur.x);)

    // Use an IIR low-pass filter to smooth out the detected values, with a
    // configurable decay rate based on the desired time constant (tau)
@ -615,7 +619,7 @@ static void hdr_update_peak(struct gl_shader_cache *sc,
    GLSL(    average = mix(average, cur, weight);)

    // Reset SSBO state for the next frame
-    GLSL(    frame_max = frame_sum = 0;)
+    GLSL(    frame_sum = 0; frame_max = 0;)
    GLSL(    memoryBarrierBuffer();)
    GLSL(})
 }