1
0
mirror of https://github.com/mpv-player/mpv synced 2024-12-27 01:22:30 +00:00

vo_gpu: redesign peak detection algorithm

The previous approach of using an FIR with tunable hard threshold for
scene changes had several problems:

- the FIR involved annoying hard-coded buffer sizes, high VRAM usage,
  and the FIR sum was prone to numerical overflow which limited the
  number of frames we could average over. We also totally redesign the
  scene change detection.

- the hard scene change detection was prone to both false positives and
  false negatives, each with their own (annoying) issues.

Scrap this entirely and switch to a dual approach of using a simple
single-pole IIR low pass filter to smooth out noise, while using a
softer scene change curve (with tunable low and high thresholds), based
on `smoothstep`. The IIR filter is extremely simple in its
implementation and has an arbitrarily user-tunable cutoff frequency,
while the smoothstep-based scene change curve provides a good, tunable
tradeoff between adaptation speed and stability - without exhibiting
either of the traditional issues associated with the hard cutoff.

Another way to think about the new options is that the "low threshold"
provides a margin of error within which we don't care about small
fluctuations in the scene (which will therefore be smoothed out by the
IIR filter).
This commit is contained in:
Niklas Haas 2019-01-01 07:30:00 +01:00 committed by Jan Ekström
parent 3fe882d4ae
commit 6179dcbb79
5 changed files with 82 additions and 73 deletions

View File

@ -51,6 +51,7 @@ Interface changes
only using a single value (which previously just controlled the exponent).
The strength now linearly blends between the linear and nonlinear tone
mapped versions of a color.
- add --hdr-peak-decay-rate and --hdr-scene-threshold-low/high
--- mpv 0.29.0 ---
- drop --opensles-sample-rate, as --audio-samplerate should be used if desired
- drop deprecated --videotoolbox-format, --ff-aid, --ff-vid, --ff-sid,

View File

@ -5245,6 +5245,30 @@ The following video options are currently all specific to ``--vo=gpu`` and
The special value ``auto`` (default) will enable HDR peak computation
automatically if compute shaders and SSBOs are supported.
``--hdr-peak-decay-rate=<1.0..1000.0>``
The decay rate used for the HDR peak detection algorithm (default: 100.0).
This is only relevant when ``--hdr-compute-peak`` is enabled. Higher values
make the peak decay more slowly, leading to more stable values at the cost
of more "eye adaptation"-like effects (although this is mitigated somewhat
by ``--hdr-scene-threshold``). A value of 1.0 (the lowest possible) disables
all averaging, meaning each frame's value is used directly as measured,
but doing this is not recommended for "noisy" sources since it may lead
to excessive flicker. (In signal theory terms, this controls the time
constant "tau" of an IIR low pass filter)
``--hdr-scene-threshold-low=<0..10000>``, ``--hdr-scene-threshold-high=<0..10000>``
The lower and upper thresholds (in cd/m^2) for a brightness difference to
be considered a scene change (default: 50 low, 200 high). This is only
relevant when ``--hdr-compute-peak`` is enabled. Normally, small
fluctuations in the frame brightness are compensated for by the peak
averaging mechanism, but for large jumps in the brightness this can result
in the frame remaining too bright or too dark for up to several seconds,
depending on the value of ``--hdr-peak-decay-rate``. To counteract this,
when the brightness between the running average and the current frame
exceeds the low threshold, mpv will make the averaging filter more
aggressive, up to the limit of the high threshold (at which point the
filter becomes instant).
``--tone-mapping-desaturate=<0.0..1.0>``
Apply desaturation for highlights (default: 0.75). The parameter controls
the strength of the desaturation curve. A value of 0.0 completely disables

View File

@ -316,6 +316,9 @@ static const struct gl_video_opts gl_video_opts_def = {
.tone_map = {
.curve = TONE_MAPPING_HABLE,
.curve_param = NAN,
.decay_rate = 100.0,
.scene_threshold_low = 50,
.scene_threshold_high = 200,
.desat = 0.75,
.desat_exp = 1.5,
},
@ -367,6 +370,11 @@ const struct m_sub_options gl_video_conf = {
({"auto", 0},
{"yes", 1},
{"no", -1})),
OPT_FLOATRANGE("hdr-peak-decay-rate", tone_map.decay_rate, 0, 1.0, 1000.0),
OPT_INTRANGE("hdr-scene-threshold-low",
tone_map.scene_threshold_low, 0, 0, 10000),
OPT_INTRANGE("hdr-scene-threshold-high",
tone_map.scene_threshold_high, 0, 0, 10000),
OPT_FLOAT("tone-mapping-param", tone_map.curve_param, 0),
OPT_FLOAT("tone-mapping-desaturate", tone_map.desat, 0),
OPT_FLOATRANGE("tone-mapping-desaturate-exponent",
@ -2478,17 +2486,18 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
dst.sig_peak = mp_trc_nom_peak(dst.gamma);
struct gl_tone_map_opts tone_map = p->opts.tone_map;
bool detect_peak = tone_map.compute_peak >= 0 && mp_trc_is_hdr(src.gamma);
bool detect_peak = tone_map.compute_peak >= 0 && mp_trc_is_hdr(src.gamma)
&& src.sig_peak > dst.sig_peak;
if (detect_peak && !p->hdr_peak_ssbo) {
struct {
float average[2];
uint32_t frame_sum;
uint32_t frame_max;
uint32_t counter;
uint32_t frame_idx;
uint32_t frame_num;
uint32_t frame_max[PEAK_DETECT_FRAMES+1];
uint32_t frame_sum[PEAK_DETECT_FRAMES+1];
uint32_t total_max;
uint32_t total_sum;
} peak_ssbo = {0};
} peak_ssbo = {
.average = { 0.25, src.sig_peak },
};
struct ra_buf_params params = {
.type = RA_BUF_TYPE_SHADER_STORAGE,
@ -2508,15 +2517,10 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
pass_describe(p, "detect HDR peak");
pass_is_compute(p, 8, 8, true); // 8x8 is good for performance
gl_sc_ssbo(p->sc, "PeakDetect", p->hdr_peak_ssbo,
"vec2 average;"
"uint frame_sum;"
"uint frame_max;"
"uint counter;"
"uint frame_idx;"
"uint frame_num;"
"uint frame_max[%d];"
"uint frame_avg[%d];"
"uint total_max;"
"uint total_avg;",
PEAK_DETECT_FRAMES + 1,
PEAK_DETECT_FRAMES + 1
);
}

View File

@ -95,13 +95,13 @@ enum tone_mapping {
TONE_MAPPING_LINEAR,
};
// How many frames to average over for HDR peak detection
#define PEAK_DETECT_FRAMES 63
struct gl_tone_map_opts {
int curve;
float curve_param;
int compute_peak;
float decay_rate;
int scene_threshold_low;
int scene_threshold_high;
float desat;
float desat_exp;
int gamut_warning; // bool

View File

@ -567,75 +567,55 @@ static void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light ligh
// under a typical presentation gamma of about 2.0.
static const float sdr_avg = 0.25;
// The threshold for which to consider an average luminance difference to be
// a sign of a scene change.
static const int scene_threshold = 0.2 * MP_REF_WHITE;
static void hdr_update_peak(struct gl_shader_cache *sc)
static void hdr_update_peak(struct gl_shader_cache *sc,
const struct gl_tone_map_opts *opts)
{
// For performance, we want to do as few atomic operations on global
// memory as possible, so use an atomic in shmem for the work group.
GLSLH(shared uint wg_sum;);
GLSL(wg_sum = 0;)
// Update the sig_peak/sig_avg from the old SSBO state
GLSL(sig_avg = max(1e-3, average.x);)
GLSL(sig_peak = max(1.00, average.y);)
// Have each thread update the work group sum with the local value
// For performance, and to avoid overflows, we tally up the sub-results per
// pixel using shared memory first
GLSLH(shared uint wg_sum;)
GLSLH(shared uint wg_max;)
GLSL(wg_sum = wg_max = 0;)
GLSL(barrier();)
GLSLF("atomicAdd(wg_sum, uint(sig_max * %f));\n", MP_REF_WHITE);
GLSLF("uint sig_uint = uint(sig_max * %f);\n", MP_REF_WHITE);
GLSL(atomicAdd(wg_sum, sig_uint);)
GLSL(atomicMax(wg_max, sig_uint);)
// Have one thread per work group update the global atomics. We use the
// work group average even for the global sum, to make the values slightly
// more stable and smooth out tiny super-highlights.
// Have one thread per work group update the global atomics
GLSL(memoryBarrierShared();)
GLSL(barrier();)
GLSL(if (gl_LocalInvocationIndex == 0) {)
GLSL( uint wg_avg = wg_sum / (gl_WorkGroupSize.x * gl_WorkGroupSize.y);)
GLSL( atomicMax(frame_max[frame_idx], wg_avg);)
GLSL( atomicAdd(frame_avg[frame_idx], wg_avg);)
GLSL( atomicAdd(frame_sum, wg_avg);)
GLSL( atomicMax(frame_max, wg_max);)
GLSL( memoryBarrierBuffer();)
GLSL(})
const float refi = 1.0 / MP_REF_WHITE;
// Update the sig_peak/sig_avg from the old SSBO state
GLSL(uint num_wg = gl_NumWorkGroups.x * gl_NumWorkGroups.y;)
GLSL(if (frame_num > 0) {)
GLSLF(" float peak = %f * float(total_max) / float(frame_num);\n", refi);
GLSLF(" float avg = %f * float(total_avg) / float(frame_num);\n", refi);
GLSLF(" sig_peak = max(1.0, peak);\n");
GLSLF(" sig_avg = max(%f, avg);\n", sdr_avg);
GLSL(});
GLSL(barrier();)
// Finally, to update the global state, we increment a counter per dispatch
GLSL(memoryBarrierBuffer();)
GLSL(barrier();)
GLSL(uint num_wg = gl_NumWorkGroups.x * gl_NumWorkGroups.y;)
GLSL(if (gl_LocalInvocationIndex == 0 && atomicAdd(counter, 1) == num_wg - 1) {)
// Since we sum up all the workgroups, we also still need to divide the
// average by the number of work groups
GLSL( counter = 0;)
GLSL( frame_avg[frame_idx] /= num_wg;)
GLSL( uint cur_max = frame_max[frame_idx];)
GLSL( uint cur_avg = frame_avg[frame_idx];)
GLSL( vec2 cur = vec2(float(frame_sum) / float(num_wg), frame_max);)
GLSLF(" cur *= 1.0/%f;\n", MP_REF_WHITE);
// Scene change detection
GLSL( int diff = int(frame_num * cur_avg) - int(total_avg);)
GLSLF(" if (abs(diff) > frame_num * %d) {\n", scene_threshold);
GLSL( frame_num = 0;)
GLSL( total_max = total_avg = 0;)
GLSLF(" for (uint i = 0; i < %d; i++)\n", PEAK_DETECT_FRAMES+1);
GLSL( frame_max[i] = frame_avg[i] = 0;)
GLSL( frame_max[frame_idx] = cur_max;)
GLSL( frame_avg[frame_idx] = cur_avg;)
GLSL( })
// Use an IIR low-pass filter to smooth out the detected values, with a
// configurable decay rate based on the desired time constant (tau)
float a = 1.0 - cos(1.0 / opts->decay_rate);
float decay = sqrt(a*a + 2*a) - a;
GLSLF(" average += %f * (cur - average);\n", decay);
// Add the current frame, then subtract and reset the next frame
GLSLF(" uint next = (frame_idx + 1) %% %d;\n", PEAK_DETECT_FRAMES+1);
GLSL( total_max += cur_max - frame_max[next];)
GLSL( total_avg += cur_avg - frame_avg[next];)
GLSL( frame_max[next] = frame_avg[next] = 0;)
// Scene change hysteresis
GLSLF(" float weight = smoothstep(%f, %f, abs(cur.x - average.x));\n",
(float) opts->scene_threshold_low / MP_REF_WHITE,
(float) opts->scene_threshold_high / MP_REF_WHITE);
GLSL( average = mix(average, cur, weight);)
// Update the index and count
GLSL( frame_idx = next;)
GLSLF(" frame_num = min(frame_num + 1, %d);\n", PEAK_DETECT_FRAMES);
// Reset SSBO state for the next frame
GLSL( frame_max = frame_sum = 0;)
GLSL( memoryBarrierBuffer();)
GLSL(})
}
@ -659,7 +639,7 @@ static void pass_tone_map(struct gl_shader_cache *sc,
GLSLF("float sig_avg = %f;\n", sdr_avg);
if (opts->compute_peak >= 0)
hdr_update_peak(sc);
hdr_update_peak(sc, opts);
GLSLF("vec3 sig = color.rgb;\n");