diff --git a/DOCS/man/options.rst b/DOCS/man/options.rst index e73ef6eee4..0f59392feb 100644 --- a/DOCS/man/options.rst +++ b/DOCS/man/options.rst @@ -4752,6 +4752,14 @@ The following video options are currently all specific to ``--vo=opengl`` and linear Specifies the scale factor to use while stretching. Defaults to 1.0. +``--hdr-compute-peak`` + Compute the HDR peak per-frame of relying on tagged metadata. These values + are averaged over local regions as well as over several frames to prevent + the value from jittering around too much. This option basically gives you + dynamic, per-scene tone mapping. Requires compute shaders, which is a + fairly recent OpenGL feature, and will probably also perform horribly on + some drivers, so enable at your own risk. + ``--tone-mapping-desaturate=`` Apply desaturation for highlights that exceed this level of brightness. The higher the parameter, the more color information will be preserved. This diff --git a/video/out/opengl/common.c b/video/out/opengl/common.c index b9536b6c59..9af21856ab 100644 --- a/video/out/opengl/common.c +++ b/video/out/opengl/common.c @@ -344,6 +344,11 @@ static const struct gl_functions gl_functions[] = { {0} }, }, + { + .ver_core = 430, + .extension = "GL_ARB_shader_storage_buffer_object", + .provides = MPGL_CAP_SSBO, + }, { .ver_core = 430, .extension = "GL_ARB_compute_shader", diff --git a/video/out/opengl/common.h b/video/out/opengl/common.h index 40208c45e5..eec7806624 100644 --- a/video/out/opengl/common.h +++ b/video/out/opengl/common.h @@ -54,6 +54,7 @@ enum { MPGL_CAP_EXT16 = (1 << 18), // GL_EXT_texture_norm16 MPGL_CAP_ARB_FLOAT = (1 << 19), // GL_ARB_texture_float MPGL_CAP_EXT_CR_HFLOAT = (1 << 20), // GL_EXT_color_buffer_half_float + MPGL_CAP_SSBO = (1 << 21), // GL_ARB_shader_storage_buffer_object MPGL_CAP_SW = (1 << 30), // indirect or sw renderer }; diff --git a/video/out/opengl/gl_headers.h b/video/out/opengl/gl_headers.h index 8f201bb64c..a55749cbb7 100644 --- a/video/out/opengl/gl_headers.h +++ b/video/out/opengl/gl_headers.h @@ -83,6 +83,11 @@ #define GL_COMPUTE_SHADER 0x91B9 +// -- GL 4.3 or GL_ARB_shader_storage_buffer_object + +#define GL_SHADER_STORAGE_BUFFER 0x90D2 +#define GL_SHADER_STORAGE_BARRIER_BIT 0x00002000 + // --- GL_NV_vdpau_interop #define GLvdpauSurfaceNV GLintptr diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c index f1e0081b10..afbd6f65af 100644 --- a/video/out/opengl/utils.c +++ b/video/out/opengl/utils.c @@ -473,6 +473,13 @@ struct sc_uniform { GLenum img_iformat; }; +struct sc_buffer { + char *name; + char *format; + GLuint binding; + GLuint ssbo; +}; + struct sc_cached_uniform { GLint loc; union uniform_val v; @@ -503,6 +510,7 @@ struct gl_shader_cache { bstr text; int next_texture_unit; int next_image_unit; + int next_buffer_binding; struct gl_vao *vao; // deprecated struct sc_entry *entries; @@ -512,6 +520,8 @@ struct gl_shader_cache { struct sc_uniform *uniforms; int num_uniforms; + struct sc_buffer *buffers; + int num_buffers; const struct gl_vao_entry *vertex_entries; size_t vertex_size; @@ -562,6 +572,11 @@ void gl_sc_reset(struct gl_shader_cache *sc) } } gl->ActiveTexture(GL_TEXTURE0); + + for (int n = 0; n < sc->num_buffers; n++) { + struct sc_buffer *b = &sc->buffers[n]; + gl->BindBufferBase(GL_SHADER_STORAGE_BUFFER, b->binding, 0); + } } sc->prelude_text.len = 0; @@ -570,8 +585,14 @@ void gl_sc_reset(struct gl_shader_cache *sc) for (int n = 0; n < sc->num_uniforms; n++) talloc_free(sc->uniforms[n].name); sc->num_uniforms = 0; + for (int n = 0; n < sc->num_buffers; n++) { + talloc_free(sc->buffers[n].name); + talloc_free(sc->buffers[n].format); + } + sc->num_buffers = 0; sc->next_texture_unit = 1; // not 0, as 0 is "free for use" sc->next_image_unit = 1; + sc->next_buffer_binding = 1; sc->vertex_entries = NULL; sc->vertex_size = 0; sc->current_shader = NULL; @@ -680,6 +701,21 @@ static struct sc_uniform *find_uniform(struct gl_shader_cache *sc, return &sc->uniforms[sc->num_uniforms - 1]; } +static struct sc_buffer *find_buffer(struct gl_shader_cache *sc, + const char *name) +{ + for (int n = 0; n < sc->num_buffers; n++) { + if (strcmp(sc->buffers[n].name, name) == 0) + return &sc->buffers[n]; + } + // not found -> add it + struct sc_buffer new = { + .name = talloc_strdup(NULL, name), + }; + MP_TARRAY_APPEND(sc, sc->buffers, sc->num_buffers, new); + return &sc->buffers[sc->num_buffers - 1]; +} + const char *mp_sampler_type(GLenum texture_target) { switch (texture_target) { @@ -738,6 +774,20 @@ void gl_sc_uniform_image2D(struct gl_shader_cache *sc, char *name, GLuint textur u->img_iformat = iformat; } +void gl_sc_ssbo(struct gl_shader_cache *sc, char *name, GLuint ssbo, + char *format, ...) +{ + struct sc_buffer *b = find_buffer(sc, name); + b->binding = sc->next_buffer_binding++; + b->ssbo = ssbo; + b->format = format; + + va_list ap; + va_start(ap, format); + b->format = ta_vasprintf(sc, format, ap); + va_end(ap); +} + void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, GLfloat f) { struct sc_uniform *u = find_uniform(sc, name); @@ -1217,6 +1267,12 @@ struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc, GLenum type) ADD(comp, "uniform %s %s;\n", u->glsl_type, u->name); } + for (int n = 0; n < sc->num_buffers; n++) { + struct sc_buffer *b = &sc->buffers[n]; + ADD(comp, "layout(std430, binding=%d) buffer %s { %s };\n", + b->binding, b->name, b->format); + } + ADD_BSTR(comp, sc->prelude_text); ADD_BSTR(comp, sc->header_text); @@ -1271,6 +1327,10 @@ struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc, GLenum type) for (int n = 0; n < sc->num_uniforms; n++) update_uniform(gl, entry, &sc->uniforms[n], n); + for (int n = 0; n < sc->num_buffers; n++) { + struct sc_buffer *b = &sc->buffers[n]; + gl->BindBufferBase(GL_SHADER_STORAGE_BUFFER, b->binding, b->ssbo); + } gl->ActiveTexture(GL_TEXTURE0); diff --git a/video/out/opengl/utils.h b/video/out/opengl/utils.h index 3dc7e5d72d..f2c405fa9a 100644 --- a/video/out/opengl/utils.h +++ b/video/out/opengl/utils.h @@ -149,6 +149,8 @@ void gl_sc_uniform_tex(struct gl_shader_cache *sc, char *name, GLenum target, void gl_sc_uniform_tex_ui(struct gl_shader_cache *sc, char *name, GLuint texture); void gl_sc_uniform_image2D(struct gl_shader_cache *sc, char *name, GLuint texture, GLuint iformat, GLenum access); +void gl_sc_ssbo(struct gl_shader_cache *sc, char *name, GLuint ssbo, + char *format, ...); void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, GLfloat f); void gl_sc_uniform_i(struct gl_shader_cache *sc, char *name, GLint f); void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, GLfloat f[2]); diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c index ab8f311191..76b9d829ab 100644 --- a/video/out/opengl/video.c +++ b/video/out/opengl/video.c @@ -236,9 +236,11 @@ struct gl_video { struct fbotex integer_fbo[4]; struct fbotex indirect_fbo; struct fbotex blend_subs_fbo; + struct fbotex screen_fbo; struct fbotex output_fbo; struct fbosurface surfaces[FBOSURFACES_MAX]; struct fbotex vdpau_deinterleave_fbo[2]; + GLuint hdr_peak_ssbo; int surface_idx; int surface_now; @@ -368,6 +370,7 @@ const struct m_sub_options gl_video_conf = { {"hable", TONE_MAPPING_HABLE}, {"gamma", TONE_MAPPING_GAMMA}, {"linear", TONE_MAPPING_LINEAR})), + OPT_FLAG("hdr-compute-peak", compute_hdr_peak, 0), OPT_FLOAT("tone-mapping-param", tone_mapping_param, 0), OPT_FLOAT("tone-mapping-desaturate", tone_mapping_desat, 0), OPT_FLAG("opengl-pbo", pbo, 0), @@ -541,6 +544,7 @@ static void uninit_rendering(struct gl_video *p) fbotex_uninit(&p->indirect_fbo); fbotex_uninit(&p->blend_subs_fbo); + fbotex_uninit(&p->screen_fbo); for (int n = 0; n < FBOSURFACES_MAX; n++) fbotex_uninit(&p->surfaces[n].fbotex); @@ -2358,6 +2362,8 @@ static void pass_scale_main(struct gl_video *p) // by previous passes (i.e. linear scaling) static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool osd) { + GL *gl = p->gl; + // Figure out the target color space from the options, or auto-guess if // none were set struct mp_colorspace dst = { @@ -2417,10 +2423,42 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool dst.gamma = MP_CSP_TRC_GAMMA22; } + bool detect_peak = p->opts.compute_hdr_peak && mp_trc_is_hdr(src.gamma); + if (detect_peak) { + pass_describe(p, "detect HDR peak"); + compute_size_minimum(p, 8, 8); // 8x8 is good for performance + + if (!p->hdr_peak_ssbo) { + struct { + GLuint sig_peak_raw; + GLuint index; + GLuint frame_max[PEAK_DETECT_FRAMES+1]; + } peak_ssbo = {0}; + + // Prefill with safe values + int safe = MP_REF_WHITE * mp_trc_nom_peak(p->image_params.color.gamma); + peak_ssbo.sig_peak_raw = PEAK_DETECT_FRAMES * safe; + for (int i = 0; i < PEAK_DETECT_FRAMES+1; i++) + peak_ssbo.frame_max[i] = safe; + + gl->GenBuffers(1, &p->hdr_peak_ssbo); + gl->BindBuffer(GL_SHADER_STORAGE_BUFFER, p->hdr_peak_ssbo); + gl->BufferData(GL_SHADER_STORAGE_BUFFER, sizeof(peak_ssbo), + &peak_ssbo, GL_STREAM_COPY); + gl->BindBuffer(GL_SHADER_STORAGE_BUFFER, 0); + } + + gl_sc_ssbo(p->sc, "PeakDetect", p->hdr_peak_ssbo, + "uint sig_peak_raw;" + "uint index;" + "uint frame_max[%d];", PEAK_DETECT_FRAMES + 1 + ); + } + // Adapt from src to dst as necessary pass_color_map(p->sc, src, dst, p->opts.hdr_tone_mapping, p->opts.tone_mapping_param, p->opts.tone_mapping_desat, - p->use_linear && !osd); + detect_peak, p->use_linear && !osd); if (p->use_lut_3d) { gl_sc_uniform_tex(p->sc, "lut_3d", GL_TEXTURE_3D, p->lut_3d_texture); @@ -2710,6 +2748,17 @@ static void pass_draw_to_screen(struct gl_video *p, int fbo) pass_colormanage(p, p->image_params.color, false); + // Since finish_pass_direct doesn't work with compute shaders, and neither + // does the checkerboard/dither code, we may need an indirection via + // p->screen_fbo here. + if (p->compute_w > 0 && p->compute_h > 0) { + int o_w = p->dst_rect.x1 - p->dst_rect.x0, + o_h = p->dst_rect.y1 - p->dst_rect.y0; + finish_pass_fbo(p, &p->screen_fbo, o_w, o_h, FBOTEX_FUZZY); + struct img_tex tmp = img_tex_fbo(&p->screen_fbo, PLANE_RGB, p->components); + copy_img_tex(p, &(int){0}, tmp); + } + if (p->has_alpha){ if (p->opts.alpha_mode == ALPHA_BLEND_TILES) { // Draw checkerboard pattern to indicate transparency @@ -3326,6 +3375,7 @@ static void check_gl_features(struct gl_video *p) bool have_mglsl = gl->glsl_version >= 130; // modern GLSL (1st class arrays etc.) bool have_texrg = gl->mpgl_caps & MPGL_CAP_TEX_RG; bool have_tex16 = !gl->es || (gl->mpgl_caps & MPGL_CAP_EXT16); + bool have_compute = gl->glsl_version >= 430; // easiest way to ensure all const GLint auto_fbo_fmts[] = {GL_RGBA16, GL_RGBA16F, GL_RGB10_A2, GL_RGBA8, 0}; @@ -3436,6 +3486,10 @@ static void check_gl_features(struct gl_video *p) p->opts.deband = 0; MP_WARN(p, "Disabling debanding (GLSL version too old).\n"); } + if (!have_compute && p->opts.compute_hdr_peak) { + p->opts.compute_hdr_peak = 0; + MP_WARN(p, "Disabling HDR peak computation (no compute shaders).\n"); + } } static void init_gl(struct gl_video *p) @@ -3471,6 +3525,7 @@ void gl_video_uninit(struct gl_video *p) gl_sc_destroy(p->sc); gl->DeleteTextures(1, &p->lut_3d_texture); + gl->DeleteBuffers(1, &p->hdr_peak_ssbo); gl_timer_free(p->upload_timer); gl_timer_free(p->blit_timer); diff --git a/video/out/opengl/video.h b/video/out/opengl/video.h index f3608626e4..b19f6e099d 100644 --- a/video/out/opengl/video.h +++ b/video/out/opengl/video.h @@ -99,6 +99,9 @@ enum tone_mapping { TONE_MAPPING_LINEAR, }; +// How many frames to average over for HDR peak detection +#define PEAK_DETECT_FRAMES 100 + struct gl_video_opts { int dumb_mode; struct scaler_config scaler[4]; @@ -109,6 +112,7 @@ struct gl_video_opts { int target_trc; int target_brightness; int hdr_tone_mapping; + int compute_hdr_peak; float tone_mapping_param; float tone_mapping_desat; int linear_scaling; diff --git a/video/out/opengl/video_shaders.c b/video/out/opengl/video_shaders.c index 3381d532b6..a7ecf1a448 100644 --- a/video/out/opengl/video_shaders.c +++ b/video/out/opengl/video_shaders.c @@ -521,7 +521,8 @@ void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, floa GLSLF("color.rgb *= vec3(1.0/%f);\n", peak); } -// Tone map from a known peak brightness to the range [0,1] +// Tone map from a known peak brightness to the range [0,1]. If ref_peak +// is 0, we will use peak detection instead static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak, enum tone_mapping algo, float param, float desat) { @@ -531,8 +532,42 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak, GLSL(float luma = dot(src_luma, color.rgb);) GLSL(float luma_orig = luma;) + if (!ref_peak) { + // For performance, we want to do as few atomic operations on global + // memory as possible, so use an atomic in shmem for the work group. + // We also want slightly more stable values, so use the group average + // instead of the group max + GLSLHF("shared uint group_sum = 0;\n"); + GLSLF("atomicAdd(group_sum, uint(luma * %f));\n", MP_REF_WHITE); + + // Have one thread in each work group update the frame maximum + GLSL(memoryBarrierBuffer();) + GLSL(barrier();) + GLSL(if (gl_LocalInvocationIndex == 0)) + GLSL(atomicMax(frame_max[index], group_sum / + (gl_WorkGroupSize.x * gl_WorkGroupSize.y));) + + // Finally, have one thread per invocation update the total maximum + // and advance the index + GLSL(memoryBarrierBuffer();) + GLSL(barrier();) + GLSL(if (gl_GlobalInvocationID == ivec3(0)) {) // do this once per invocation + GLSLF("uint next = (index + 1) %% %d;\n", PEAK_DETECT_FRAMES+1); + GLSLF("sig_peak_raw = sig_peak_raw + frame_max[index] - frame_max[next];\n"); + GLSLF("frame_max[next] = %d;\n", (int)MP_REF_WHITE); + GLSL(index = next;) + GLSL(}) + + GLSL(memoryBarrierBuffer();) + GLSL(barrier();) + GLSLF("const float sig_peak = 1.0/%f * float(sig_peak_raw);\n", + MP_REF_WHITE * PEAK_DETECT_FRAMES); + } else { + GLSLHF("const float sig_peak = %f;\n", ref_peak); + } + // Desaturate the color using a coefficient dependent on the brightness - if (desat > 0 && ref_peak > desat) { + if (desat > 0) { GLSLF("float overbright = max(luma - %f, 1e-6) / max(luma, 1e-6);\n", desat); GLSL(color.rgb = mix(color.rgb, vec3(luma), overbright);) } @@ -542,23 +577,23 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak, GLSLF("luma = clamp(%f * luma, 0.0, 1.0);\n", isnan(param) ? 1.0 : param); break; - case TONE_MAPPING_MOBIUS: { - float j = isnan(param) ? 0.3 : param; - // solve for M(j) = j; M(ref_peak) = 1.0; M'(j) = 1.0 + case TONE_MAPPING_MOBIUS: + GLSLF("const float j = %f;\n", isnan(param) ? 0.3 : param); + // solve for M(j) = j; M(sig_peak) = 1.0; M'(j) = 1.0 // where M(x) = scale * (x+a)/(x+b) - float a = -j*j * (ref_peak - 1) / (j*j - 2*j + ref_peak), - b = (j*j - 2*j*ref_peak + ref_peak) / (ref_peak - 1); - - GLSLF("luma = mix(%f * (luma + %f) / (luma + %f), luma, luma <= %f);\n", - (b*b + 2*b*j + j*j) / (b - a), a, b, j); + GLSLF("const float a = -j*j * (sig_peak - 1) / (j*j - 2*j + sig_peak);\n"); + GLSLF("const float b = (j*j - 2*j*sig_peak + sig_peak) / " + "max(1e-6, sig_peak - 1);\n"); + GLSLF("const float scale = (b*b + 2*b*j + j*j) / (b-a);\n"); + GLSL(luma = mix(luma, scale * (luma + a) / (luma + b), luma > j);) break; - } case TONE_MAPPING_REINHARD: { float contrast = isnan(param) ? 0.5 : param, offset = (1.0 - contrast) / contrast; GLSLF("luma = luma / (luma + %f);\n", offset); - GLSLF("luma *= %f;\n", (ref_peak + offset) / ref_peak); + GLSLF("const float lumascale = (sig_peak + %f) / sig_peak;\n", offset); + GLSL(luma *= lumascale;) break; } @@ -568,20 +603,19 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak, GLSLHF("return ((x * (%f*x + %f)+%f)/(x * (%f*x + %f) + %f)) - %f;\n", A, C*B, D*E, A, B, D*F, E/F); GLSLHF("}\n"); - - GLSLF("luma = hable(luma) / hable(%f);\n", ref_peak); + GLSL(luma = hable(luma) / hable(sig_peak);) break; } case TONE_MAPPING_GAMMA: { float gamma = isnan(param) ? 1.8 : param; - GLSLF("luma = pow(luma * 1.0/%f, %f);\n", ref_peak, 1.0/gamma); + GLSLF("luma = pow(luma / sig_peak, %f);\n", 1.0/gamma); break; } case TONE_MAPPING_LINEAR: { float coeff = isnan(param) ? 1.0 : param; - GLSLF("luma = %f * luma;\n", coeff / ref_peak); + GLSLF("luma = %f / sig_peak * luma;\n", coeff); break; } @@ -596,11 +630,15 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak, // Map colors from one source space to another. These source spaces must be // known (i.e. not MP_CSP_*_AUTO), as this function won't perform any // auto-guessing. If is_linear is true, we assume the input has already been -// linearized (e.g. for linear-scaling) +// linearized (e.g. for linear-scaling). If `detect_peak` is true, we will +// detect the peak instead of relying on metadata. Note that this requires +// the caller to have already bound the appropriate SSBO and set up the +// compute shader metadata void pass_color_map(struct gl_shader_cache *sc, struct mp_colorspace src, struct mp_colorspace dst, enum tone_mapping algo, float tone_mapping_param, - float tone_mapping_desat, bool is_linear) + float tone_mapping_desat, bool detect_peak, + bool is_linear) { GLSLF("// color mapping\n"); @@ -643,8 +681,8 @@ void pass_color_map(struct gl_shader_cache *sc, // Tone map to prevent clipping when the source signal peak exceeds the // encodable range if (src.sig_peak > dst_range) { - pass_tone_map(sc, src.sig_peak / dst_range, algo, tone_mapping_param, - tone_mapping_desat); + float ref_peak = detect_peak ? 0 : src.sig_peak / dst_range; + pass_tone_map(sc, ref_peak, algo, tone_mapping_param, tone_mapping_desat); } // Adapt to the right colorspace if necessary diff --git a/video/out/opengl/video_shaders.h b/video/out/opengl/video_shaders.h index b0b8b4214e..e0594f28f3 100644 --- a/video/out/opengl/video_shaders.h +++ b/video/out/opengl/video_shaders.h @@ -44,7 +44,8 @@ void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, floa void pass_color_map(struct gl_shader_cache *sc, struct mp_colorspace src, struct mp_colorspace dst, enum tone_mapping algo, float tone_mapping_param, - float tone_mapping_desat, bool is_linear); + float tone_mapping_desat, bool use_detected_peak, + bool is_linear); void pass_sample_deband(struct gl_shader_cache *sc, struct deband_opts *opts, AVLFG *lfg);