1
0
mirror of https://github.com/mpv-player/mpv synced 2025-04-11 04:01:31 +00:00

vo_gpu: port HDR tone mapping algorithm from libplacebo

The current peak detection algorithm was very bugged (which contributed
to the excessive cross-frame flicker without long normalization) and
also didn't take into account the frame average brightness level.

The new algorithm both takes into account frame average brightness (in
addition to peak brightness), and also computes the values in a more
stable/correct way. (The old path was basically undefined behavior)

In addition to improving the algorithm, we also switch to hable tone
mapping by default, and try to enable peak computation automatically
whever possible (compute shaders + SSBOs supported). We also make the
desaturation milder, after extensive testing during libplacebo
development.

I also had to compensate a bit for the representational differences
between mpv and libplacebo (libplacebo treats 1.0 as the reference peak,
but mpv treats it as the nominal peak), but it shouldn't have caused any
problems.

This is still not quite the same as libplacebo, since libplacebo also
allows tagging the desired scene average brightness on the output, and
it also supports reading the scene average brightness from static
metadata (MaxFALL) where available. But those changes are a bit more
involved. It's possible we could also read this from metadata in the
future, but we have problems communicating with AVFrames as it is and I
don't want to touch the mpv colorimetry structs for the time being.
This commit is contained in:
Niklas Haas 2018-02-03 14:45:01 +01:00 committed by Kevin Mitchell
parent 0870859e3d
commit e3d93fde2f
4 changed files with 123 additions and 81 deletions

View File

@ -5063,7 +5063,7 @@ The following video options are currently all specific to ``--vo=gpu`` and
for in-range material as much as possible. Use this when you care about
color accuracy more than detail preservation. This is somewhere in
between ``clip`` and ``reinhard``, depending on the value of
``--tone-mapping-param``. (default)
``--tone-mapping-param``.
reinhard
Reinhard tone mapping algorithm. Very simple continuous curve.
Preserves overall image brightness but uses nonlinear contrast, which
@ -5074,7 +5074,9 @@ The following video options are currently all specific to ``--vo=gpu`` and
desaturating everything. Developed by John Hable for use in video
games. Use this when you care about detail preservation more than
color/brightness accuracy. This is roughly equivalent to
``--hdr-tone-mapping=reinhard --tone-mapping-param=0.24``.
``--hdr-tone-mapping=reinhard --tone-mapping-param=0.24``. If possible,
you should also enable ``--hdr-compute-peak`` for the best results.
(Default)
gamma
Fits a logarithmic transfer between the tone curves.
linear
@ -5103,13 +5105,15 @@ The following video options are currently all specific to ``--vo=gpu`` and
linear
Specifies the scale factor to use while stretching. Defaults to 1.0.
``--hdr-compute-peak``
Compute the HDR peak per-frame of relying on tagged metadata. These values
are averaged over local regions as well as over several frames to prevent
the value from jittering around too much. This option basically gives you
dynamic, per-scene tone mapping. Requires compute shaders, which is a
fairly recent OpenGL feature, and will probably also perform horribly on
some drivers, so enable at your own risk.
``--hdr-compute-peak=<auto|yes|no>``
Compute the HDR peak and frame average brightness per-frame instead of
relying on tagged metadata. These values are averaged over local regions as
well as over several frames to prevent the value from jittering around too
much. This option basically gives you dynamic, per-scene tone mapping.
Requires compute shaders, which is a fairly recent OpenGL feature, and will
probably also perform horribly on some drivers, so enable at your own risk.
The special value ``auto`` (default) will enable HDR peak computation
automatically if compute shaders and SSBOs are supported.
``--tone-mapping-desaturate=<value>``
Apply desaturation for highlights. The parameter essentially controls the
@ -5119,8 +5123,9 @@ The following video options are currently all specific to ``--vo=gpu`` and
into white instead. This makes images feel more natural, at the cost of
reducing information about out-of-range colors.
The default of 1.0 provides a good balance that roughly matches the look
and feel of the ACES ODT curves. A setting of 0.0 disables this option.
The default of 0.5 provides a good balance. This value is weaker than the
ACES ODT curves' recommendation, but works better for most content in
practice. A setting of 0.0 disables this option.
``--gamut-warning``
If enabled, mpv will mark all clipped/out-of-gamut pixels that exceed a

View File

@ -313,9 +313,9 @@ static const struct gl_video_opts gl_video_opts_def = {
.alpha_mode = ALPHA_BLEND_TILES,
.background = {0, 0, 0, 255},
.gamma = 1.0f,
.tone_mapping = TONE_MAPPING_MOBIUS,
.tone_mapping = TONE_MAPPING_HABLE,
.tone_mapping_param = NAN,
.tone_mapping_desat = 1.0,
.tone_mapping_desat = 0.5,
.early_flush = -1,
.hwdec_interop = "auto",
};
@ -358,7 +358,10 @@ const struct m_sub_options gl_video_conf = {
{"hable", TONE_MAPPING_HABLE},
{"gamma", TONE_MAPPING_GAMMA},
{"linear", TONE_MAPPING_LINEAR})),
OPT_FLAG("hdr-compute-peak", compute_hdr_peak, 0),
OPT_CHOICE("hdr-compute-peak", compute_hdr_peak, 0,
({"auto", 0},
{"yes", 1},
{"no", -1})),
OPT_FLOAT("tone-mapping-param", tone_mapping_param, 0),
OPT_FLOAT("tone-mapping-desaturate", tone_mapping_desat, 0),
OPT_FLAG("gamut-warning", gamut_warning, 0),
@ -2442,20 +2445,18 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
dst.gamma = MP_CSP_TRC_GAMMA22;
}
bool detect_peak = p->opts.compute_hdr_peak && mp_trc_is_hdr(src.gamma);
bool detect_peak = p->opts.compute_hdr_peak >= 0 && mp_trc_is_hdr(src.gamma);
if (detect_peak && !p->hdr_peak_ssbo) {
struct {
unsigned int sig_peak_raw;
unsigned int index;
unsigned int counter;
unsigned int frame_idx;
unsigned int frame_num;
unsigned int frame_max[PEAK_DETECT_FRAMES+1];
unsigned int frame_sum[PEAK_DETECT_FRAMES+1];
unsigned int total_max;
unsigned int total_sum;
} peak_ssbo = {0};
// Prefill with safe values
int safe = MP_REF_WHITE * mp_trc_nom_peak(p->image_params.color.gamma);
peak_ssbo.sig_peak_raw = PEAK_DETECT_FRAMES * safe;
for (int i = 0; i < PEAK_DETECT_FRAMES+1; i++)
peak_ssbo.frame_max[i] = safe;
struct ra_buf_params params = {
.type = RA_BUF_TYPE_SHADER_STORAGE,
.size = sizeof(peak_ssbo),
@ -2465,7 +2466,8 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
p->hdr_peak_ssbo = ra_buf_create(ra, &params);
if (!p->hdr_peak_ssbo) {
MP_WARN(p, "Failed to create HDR peak detection SSBO, disabling.\n");
detect_peak = (p->opts.compute_hdr_peak = false);
detect_peak = false;
p->opts.compute_hdr_peak = -1;
}
}
@ -2473,9 +2475,15 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
pass_describe(p, "detect HDR peak");
pass_is_compute(p, 8, 8); // 8x8 is good for performance
gl_sc_ssbo(p->sc, "PeakDetect", p->hdr_peak_ssbo,
"uint sig_peak_raw;"
"uint index;"
"uint frame_max[%d];", PEAK_DETECT_FRAMES + 1
"uint counter;"
"uint frame_idx;"
"uint frame_num;"
"uint frame_max[%d];"
"uint frame_sum[%d];"
"uint total_max;"
"uint total_sum;",
PEAK_DETECT_FRAMES + 1,
PEAK_DETECT_FRAMES + 1
);
}
@ -3504,9 +3512,10 @@ static void check_gl_features(struct gl_video *p)
p->opts.deband = 0;
MP_WARN(p, "Disabling debanding (GLSL version too old).\n");
}
if ((!have_compute || !have_ssbo) && p->opts.compute_hdr_peak) {
p->opts.compute_hdr_peak = 0;
MP_WARN(p, "Disabling HDR peak computation (no compute shaders).\n");
if ((!have_compute || !have_ssbo) && p->opts.compute_hdr_peak >= 0) {
int msgl = p->opts.compute_hdr_peak == 1 ? MSGL_WARN : MSGL_V;
MP_MSG(p, msgl, "Disabling HDR peak computation (no compute shaders).\n");
p->opts.compute_hdr_peak = -1;
}
}

View File

@ -96,7 +96,7 @@ enum tone_mapping {
};
// How many frames to average over for HDR peak detection
#define PEAK_DETECT_FRAMES 100
#define PEAK_DETECT_FRAMES 20
struct gl_video_opts {
int dumb_mode;

View File

@ -553,13 +553,63 @@ void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, floa
default:
abort();
}
}
GLSLF("color.rgb *= vec3(1.0/%f);\n", peak);
// Average light level for SDR signals. This is equal to a signal level of 0.5
// under a typical presentation gamma of about 2.0.
static const float sdr_avg = 0.25;
static void hdr_update_peak(struct gl_shader_cache *sc)
{
// For performance, we want to do as few atomic operations on global
// memory as possible, so use an atomic in shmem for the work group.
GLSLH(shared uint wg_sum;);
GLSL(wg_sum = 0;)
// Have each thread update the work group sum with the local value
GLSL(barrier();)
GLSLF("atomicAdd(wg_sum, uint(sig * %f));\n", MP_REF_WHITE);
// Have one thread per work group update the global atomics. We use the
// work group average even for the global sum, to make the values slightly
// more stable and smooth out tiny super-highlights.
GLSL(memoryBarrierShared();)
GLSL(barrier();)
GLSL(if (gl_LocalInvocationIndex == 0) {)
GLSL( uint wg_avg = wg_sum / (gl_WorkGroupSize.x * gl_WorkGroupSize.y);)
GLSL( atomicMax(frame_max[frame_idx], wg_avg);)
GLSL( atomicAdd(frame_sum[frame_idx], wg_avg);)
GLSL(})
// Update the sig_peak/sig_avg from the old SSBO state
GLSL(uint num_wg = gl_NumWorkGroups.x * gl_NumWorkGroups.y;)
GLSL(if (frame_num > 0) {)
GLSLF(" float peak = float(total_max) / (%f * float(frame_num));\n", MP_REF_WHITE);
GLSLF(" float avg = float(total_sum) / (%f * float(frame_num * num_wg));\n", MP_REF_WHITE);
GLSLF(" sig_peak = max(1.0, peak);\n");
GLSLF(" sig_avg = max(%f, avg);\n", sdr_avg);
GLSL(});
// Finally, to update the global state, we increment a counter per dispatch
GLSL(memoryBarrierBuffer();)
GLSL(barrier();)
GLSL(if (gl_LocalInvocationIndex == 0 && atomicAdd(counter, 1) == num_wg - 1) {)
GLSL( counter = 0;)
// Add the current frame, then subtract and reset the next frame
GLSLF(" uint next = (frame_idx + 1) %% %d;\n", PEAK_DETECT_FRAMES+1);
GLSL( total_max += frame_max[frame_idx] - frame_max[next];)
GLSL( total_sum += frame_sum[frame_idx] - frame_sum[next];)
GLSL( frame_max[next] = frame_sum[next] = 0;)
// Update the index and count
GLSL( frame_idx = next;)
GLSLF(" frame_num = min(frame_num + 1, %d);\n", PEAK_DETECT_FRAMES);
GLSL(})
}
// Tone map from a known peak brightness to the range [0,1]. If ref_peak
// is 0, we will use peak detection instead
static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak,
static void pass_tone_map(struct gl_shader_cache *sc, bool detect_peak,
float src_peak, float dst_range,
enum tone_mapping algo, float param, float desat)
{
GLSLF("// HDR tone mapping\n");
@ -568,6 +618,16 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak,
// sure to reduce the value range as far as necessary to keep the entire
// signal in range, so tone map based on the brightest component.
GLSL(float sig = max(max(color.r, color.g), color.b);)
GLSLF("float sig_peak = %f;\n", src_peak);
GLSLF("float sig_avg = %f;\n", sdr_avg);
// Rescale the variables in order to bring it into a representation where
// 1.0 represents the dst_peak. This is because all of the tone mapping
// algorithms are defined in such a way that they map to the range [0.0, 1.0].
if (dst_range > 1.0) {
GLSLF("sig *= %f;\n", 1.0 / dst_range);
GLSLF("sig_peak *= %f;\n", 1.0 / dst_range);
}
// Desaturate the color using a coefficient dependent on the signal
if (desat > 0) {
@ -578,41 +638,14 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak,
GLSL(sig = mix(sig, luma, coeff);) // also make sure to update `sig`
}
if (!ref_peak) {
// For performance, we want to do as few atomic operations on global
// memory as possible, so use an atomic in shmem for the work group.
// We also want slightly more stable values, so use the group average
// instead of the group max
GLSLHF("shared uint group_sum = 0;\n");
GLSLF("atomicAdd(group_sum, uint(sig * %f));\n", MP_REF_WHITE);
// Have one thread in each work group update the frame maximum
GLSL(memoryBarrierBuffer();)
GLSL(barrier();)
GLSL(if (gl_LocalInvocationIndex == 0))
GLSL(atomicMax(frame_max[index], group_sum /
(gl_WorkGroupSize.x * gl_WorkGroupSize.y));)
// Finally, have one thread per invocation update the total maximum
// and advance the index
GLSL(memoryBarrierBuffer();)
GLSL(barrier();)
GLSL(if (gl_GlobalInvocationID == ivec3(0)) {) // do this once per invocation
GLSLF("uint next = (index + 1) %% %d;\n", PEAK_DETECT_FRAMES+1);
GLSLF("sig_peak_raw = sig_peak_raw + frame_max[index] - frame_max[next];\n");
GLSLF("frame_max[next] = %d;\n", (int)MP_REF_WHITE);
GLSL(index = next;)
GLSL(})
GLSL(memoryBarrierBuffer();)
GLSL(barrier();)
GLSLF("float sig_peak = 1.0/%f * float(sig_peak_raw);\n",
MP_REF_WHITE * PEAK_DETECT_FRAMES);
} else {
GLSLHF("const float sig_peak = %f;\n", ref_peak);
}
if (detect_peak)
hdr_update_peak(sc);
GLSL(float sig_orig = sig;)
GLSLF("float slope = min(1.0, %f / sig_avg);\n", sdr_avg);
GLSL(sig *= slope;)
GLSL(sig_peak *= slope;)
switch (algo) {
case TONE_MAPPING_CLIP:
GLSLF("sig = %f * sig;\n", isnan(param) ? 1.0 : param);
@ -668,6 +701,7 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak,
// Apply the computed scale factor to the color, linearly to prevent
// discoloration
GLSL(sig = min(sig, 1.0);)
GLSL(color.rgb *= sig / sig_orig;)
}
@ -689,7 +723,6 @@ void pass_color_map(struct gl_shader_cache *sc,
// Compute the highest encodable level
float src_range = mp_trc_nom_peak(src.gamma),
dst_range = mp_trc_nom_peak(dst.gamma);
float ref_peak = src.sig_peak / dst_range;
// Some operations need access to the video's luma coefficients, so make
// them available
@ -709,20 +742,13 @@ void pass_color_map(struct gl_shader_cache *sc,
src.light != dst.light;
if (need_gamma && !is_linear) {
// We also pull it up so that 1.0 is the reference white
pass_linearize(sc, src.gamma);
is_linear= true;
is_linear = true;
}
if (src.light != dst.light)
pass_ootf(sc, src.light, mp_trc_nom_peak(src.gamma));
// Rescale the signal to compensate for differences in the encoding range
// and reference white level. This is necessary because of how mpv encodes
// brightness in textures.
if (src_range != dst_range) {
GLSLF("// rescale value range;\n");
GLSLF("color.rgb *= vec3(%f);\n", src_range / dst_range);
}
pass_ootf(sc, src.light, src_range);
// Adapt to the right colorspace if necessary
if (src.primaries != dst.primaries) {
@ -734,18 +760,20 @@ void pass_color_map(struct gl_shader_cache *sc,
GLSL(color.rgb = cms_matrix * color.rgb;)
// Since this can reduce the gamut, figure out by how much
for (int c = 0; c < 3; c++)
ref_peak = MPMAX(ref_peak, m[c][c]);
src.sig_peak = MPMAX(src.sig_peak, m[c][c]);
}
// Tone map to prevent clipping when the source signal peak exceeds the
// encodable range or we've reduced the gamut
if (ref_peak > 1) {
pass_tone_map(sc, detect_peak ? 0 : ref_peak, algo,
if (src.sig_peak > dst_range) {
GLSLF("color.rgb *= vec3(%f);\n", src_range);
pass_tone_map(sc, detect_peak, src.sig_peak, dst_range, algo,
tone_mapping_param, tone_mapping_desat);
GLSLF("color.rgb *= vec3(%f);\n", 1.0 / dst_range);
}
if (src.light != dst.light)
pass_inverse_ootf(sc, dst.light, mp_trc_nom_peak(dst.gamma));
pass_inverse_ootf(sc, dst.light, dst_range);
// Warn for remaining out-of-gamut colors is enabled
if (gamut_warning) {