diff --git a/DOCS/man/vo.rst b/DOCS/man/vo.rst index e73f1d578a..82611e5a19 100644 --- a/DOCS/man/vo.rst +++ b/DOCS/man/vo.rst @@ -352,6 +352,10 @@ Available video output drivers are: blurrier. Defaults to 1. Note that setting this too low (eg. 0.5) leads to bad results. It's recommended to stay between 0.9 and 1.1. + ``sharpen3``, ``sharpen5`` + Sharpening strength. Increasing this makes the image sharper but + adds more ringing and aliasing. Defaults to 0.5. + ``scale-radius=`` Set radius for filters listed below, must be a float number between 1.0 and 16.0. Defaults to be 3.0 if not specified. @@ -377,21 +381,6 @@ Available video output drivers are: will reproduce the source image perfectly if no scaling is performed. Note that this option never affects ``cscale``. - ``srgb`` - Convert and color correct the output to sRGB before displaying it on - the screen. This option enables ``linear-scaling``. - - This option is equivalent to using ``icc-profile`` with an sRGB ICC - profile, but it is implemented without a 3DLUT and does not require - LittleCMS 2. If both ``srgb`` and ``icc-profile`` are present, the - latter takes precedence, as they are somewhat redundant. - - Note: When playing back BT.2020 content with this option enabled, out - of gamut colors will be numerically clipped, which can potentially - change the hue and/or luminance. If this is not desired, it is - recommended to use ``icc-profile`` with an sRGB ICC profile instead, - when playing back wide-gamut BT.2020 content. - ``pbo`` Enable use of PBOs. This is slightly faster, but can sometimes lead to sporadic and temporary image corruption (in theory, because reupload @@ -460,9 +449,10 @@ Available video output drivers are: ``scale-antiring``. ``linear-scaling`` - Scale in linear light. This is automatically enabled if ``srgb``, - ``icc-profile`` or ``sigmoid-upscaling`` is set. It should only - be used with a ``fbo-format`` that has at least 16 bit precision. + Scale in linear light. This is automatically enabled if + ``target-prim``, ``target-trc``, ``icc-profile`` or + ``sigmoid-upscaling`` is set. It should only be used with a + ``fbo-format`` that has at least 16 bit precision. ``fancy-downscaling`` When using convolution based filters, extend the filter size @@ -553,13 +543,44 @@ Available video output drivers are: NOTE: Only implemented on OS X. + ``target-prim=`` + Specifies the primaries of the display. Video colors will be adapted + to this colorspace if necessary. Valid values are: + + auto + Disable any adaptation (default) + bt470m + ITU-R BT.470 M + bt601-525 + ITU-R BT.601 (525-line SD systems, eg. NTSC), SMPTE 170M/240M + bt601-625 + ITU-R BT.601 (625-line SD systems, eg. PAL/SECAM), ITU-R BT.470 B/G + bt709 + ITU-R BT.709 (HD), IEC 61966-2-4 (sRGB), SMPTE RP177 Annex B + bt2020 + ITU-R BT.2020 (UHD) + + ``target-trc=`` + Specifies the transfer characteristics (gamma) of the display. Video + colors will be adjusted to this curve. Valid values are: + + auto + Disable any adaptation (default) + bt1886 + ITU-R BT.1886 curve, without the brightness drop (approx. 1.961) + srgb + IEC 61966-2-4 (sRGB) + linear + Linear light output + gamma22 + Pure power curve (gamma 2.2) + ``icc-profile=`` Load an ICC profile and use it to transform linear RGB to screen output. - Needs LittleCMS 2 support compiled in. This option overrides the ``srgb`` - property, as using both is somewhat redundant. It also enables + Needs LittleCMS 2 support compiled in. This option overrides the + ``target-prim`` and ``target-trc`` options. It also enables ``linear-scaling``. - ``icc-profile-auto`` Automatically select the ICC display profile currently specified by the display settings of the operating system. @@ -573,9 +594,8 @@ Available video output drivers are: Its size depends on the ``3dlut-size``, and can be very big. ``icc-intent=`` - Specifies the ICC Intent used for transformations between color spaces. - This affects the rendering when using ``icc-profile`` or ``srgb`` and - also affects the way DCP XYZ content gets converted to RGB. + Specifies the ICC intent used for the color transformation (when using + ``icc-profile``). 0 perceptual diff --git a/video/csputils.c b/video/csputils.c index cee33dbba9..06de4bb9e8 100644 --- a/video/csputils.c +++ b/video/csputils.c @@ -70,6 +70,7 @@ const char *const mp_csp_trc_names[MP_CSP_TRC_COUNT] = { "BT.1886 (SD, HD, UHD)", "sRGB (IEC 61966-2-1)", "Linear light", + "Pure power (gamma 2.2)", }; const char *const mp_csp_equalizer_names[MP_CSP_EQ_COUNT] = { @@ -156,6 +157,7 @@ enum mp_csp_trc avcol_trc_to_mp_csp_trc(int avtrc) case AVCOL_TRC_BT2020_12: return MP_CSP_TRC_BT_1886; case AVCOL_TRC_IEC61966_2_1: return MP_CSP_TRC_SRGB; case AVCOL_TRC_LINEAR: return MP_CSP_TRC_LINEAR; + case AVCOL_TRC_GAMMA22: return MP_CSP_TRC_GAMMA22; default: return MP_CSP_TRC_AUTO; } } @@ -202,6 +204,7 @@ int mp_csp_trc_to_avcol_trc(enum mp_csp_trc trc) case MP_CSP_TRC_BT_1886: return AVCOL_TRC_BT709; case MP_CSP_TRC_SRGB: return AVCOL_TRC_IEC61966_2_1; case MP_CSP_TRC_LINEAR: return AVCOL_TRC_LINEAR; + case MP_CSP_TRC_GAMMA22: return AVCOL_TRC_GAMMA22; default: return AVCOL_TRC_UNSPECIFIED; } } diff --git a/video/csputils.h b/video/csputils.h index a082682e43..a68c106549 100644 --- a/video/csputils.h +++ b/video/csputils.h @@ -76,6 +76,7 @@ enum mp_csp_trc { MP_CSP_TRC_BT_1886, MP_CSP_TRC_SRGB, MP_CSP_TRC_LINEAR, + MP_CSP_TRC_GAMMA22, MP_CSP_TRC_COUNT }; diff --git a/video/out/gl_osd.c b/video/out/gl_osd.c index 0ab85f59c4..7a9532d416 100644 --- a/video/out/gl_osd.c +++ b/video/out/gl_osd.c @@ -294,7 +294,7 @@ static void gen_osd_cb(void *pctx, struct sub_bitmaps *imgs) osd->num_subparts * sizeof(osd->subparts[0])); } -static void write_quad(struct vertex *va, float matrix[3][3], +static void write_quad(struct vertex *va, float matrix[3][2], float x0, float y0, float x1, float y1, float tx0, float ty0, float tx1, float ty1, float tex_w, float tex_h, const uint8_t color[4]) @@ -312,7 +312,7 @@ static void write_quad(struct vertex *va, float matrix[3][3], #undef COLOR_INIT } -static int generate_verts(struct mpgl_osd_part *part, float matrix[3][3]) +static int generate_verts(struct mpgl_osd_part *part, float matrix[3][2]) { int num_vertices = part->num_subparts * 6; MP_TARRAY_GROW(part, part->vertices, num_vertices); @@ -337,7 +337,7 @@ static int generate_verts(struct mpgl_osd_part *part, float matrix[3][3]) return num_vertices; } -static void draw_part(struct mpgl_osd *ctx, int index, float matrix[3][3]) +static void draw_part(struct mpgl_osd *ctx, int index, float matrix[3][2]) { GL *gl = ctx->gl; struct mpgl_osd_part *part = ctx->parts[index]; @@ -377,7 +377,7 @@ void mpgl_osd_draw_part(struct mpgl_osd *ctx, int vp_w, int vp_h, int index) for (int x = 0; x < div[0]; x++) { for (int y = 0; y < div[1]; y++) { - float matrix[3][3]; + float matrix[3][2]; gl_matrix_ortho2d(matrix, 0, vp_w, 0, vp_h); diff --git a/video/out/gl_utils.c b/video/out/gl_utils.c index ca2fef10bf..7881a6cf1f 100644 --- a/video/out/gl_utils.c +++ b/video/out/gl_utils.c @@ -418,7 +418,7 @@ void fbotex_uninit(struct fbotex *fbo) // Standard parallel 2D projection, except y1 < y0 means that the coordinate // system is flipped, not the projection. -void gl_matrix_ortho2d(float m[3][3], float x0, float x1, float y0, float y1) +void gl_matrix_ortho2d(float m[3][2], float x0, float x1, float y0, float y1) { if (y1 < y0) { float t = y0; @@ -426,12 +426,12 @@ void gl_matrix_ortho2d(float m[3][3], float x0, float x1, float y0, float y1) y1 = t; } - memset(m, 0, 9 * sizeof(float)); m[0][0] = 2.0f / (x1 - x0); + m[0][1] = 0.0f; + m[1][0] = 0.0f; m[1][1] = 2.0f / (y1 - y0); m[2][0] = -(x1 + x0) / (x1 - x0); m[2][1] = -(y1 + y0) / (y1 - y0); - m[2][2] = 1.0f; } static void GLAPIENTRY gl_debug_cb(GLenum source, GLenum type, GLuint id, diff --git a/video/out/gl_utils.h b/video/out/gl_utils.h index a1bb2ecafb..b4f5650ea6 100644 --- a/video/out/gl_utils.h +++ b/video/out/gl_utils.h @@ -86,15 +86,27 @@ bool fbotex_change(struct fbotex *fbo, GL *gl, struct mp_log *log, int w, int h, #define FBOTEX_FUZZY_H 2 void fbotex_set_filter(struct fbotex *fbo, GLenum gl_filter); -void gl_matrix_ortho2d(float m[3][3], float x0, float x1, float y0, float y1); +void gl_matrix_ortho2d(float m[3][2], float x0, float x1, float y0, float y1); -static inline void gl_matrix_mul_vec(float m[3][3], float *x, float *y) +// This treats m as an affine transformation, in other words m[2][n] gets +// added to the output. +static inline void gl_matrix_mul_vec(float m[3][2], float *x, float *y) { float vx = *x, vy = *y; *x = vx * m[0][0] + vy * m[1][0] + m[2][0]; *y = vx * m[0][1] + vy * m[1][1] + m[2][1]; } +struct mp_rect_f { + float x0, y0, x1, y1; +}; + +static inline void gl_matrix_mul_rect(float m[3][2], struct mp_rect_f *r) +{ + gl_matrix_mul_vec(m, &r->x0, &r->y0); + gl_matrix_mul_vec(m, &r->x1, &r->y1); +} + void gl_set_debug_logger(GL *gl, struct mp_log *log); struct gl_shader_cache; diff --git a/video/out/gl_video.c b/video/out/gl_video.c index a52bd82020..5f64dcb1d6 100644 --- a/video/out/gl_video.c +++ b/video/out/gl_video.c @@ -44,7 +44,7 @@ // Pixel width of 1D lookup textures. #define LOOKUP_TEXTURE_SIZE 256 -// Texture units 0-3 are used by the video, with unit 0 for free use. +// Texture units 0-3 are used by the video, and for free use by the passes // Units 4-5 are used for scaler LUTs. #define TEXUNIT_SCALERS 4 #define TEXUNIT_3DLUT 6 @@ -123,16 +123,15 @@ struct scaler { struct fbosurface { struct fbotex fbotex; int64_t pts; - bool valid; }; -#define FBOSURFACES_MAX 2 +#define FBOSURFACES_MAX 4 struct src_tex { GLuint gl_tex; GLenum gl_target; int tex_w, tex_h; - struct mp_rect src; + struct mp_rect_f src; }; struct gl_video { @@ -171,10 +170,7 @@ struct gl_video { bool has_alpha; char color_swizzle[5]; - float input_gamma, conv_gamma; - float user_gamma; - bool user_gamma_enabled; // shader handles user_gamma - bool sigmoid_enabled; + bool user_gamma_enabled; struct video_image image; @@ -183,20 +179,14 @@ struct gl_video { struct fbosurface surfaces[FBOSURFACES_MAX]; size_t surface_idx; + size_t surface_now; + bool is_interpolated; // state for luma (0) and chroma (1) scalers struct scaler scalers[2]; - // true if scaler is currently upscaling - bool upscaling; - - bool is_interpolated; - struct mp_csp_equalizer video_eq; - // Source and destination color spaces for the CMS matrix - struct mp_csp_primaries csp_src, csp_dest; - struct mp_rect src_rect; // displayed part of the source video struct mp_rect dst_rect; // video rectangle on output window struct mp_osd_res osd_rect; // OSD size/margins @@ -366,7 +356,19 @@ const struct m_sub_options gl_video_conf = { .opts = (const m_option_t[]) { OPT_FLOATRANGE("gamma", gamma, 0, 0.1, 2.0), OPT_FLAG("gamma-auto", gamma_auto, 0), - OPT_FLAG("srgb", srgb, 0), + OPT_CHOICE("target-prim", target_prim, 0, + ({"auto", MP_CSP_PRIM_AUTO}, + {"bt601-525", MP_CSP_PRIM_BT_601_525}, + {"bt601-625", MP_CSP_PRIM_BT_601_625}, + {"bt709", MP_CSP_PRIM_BT_709}, + {"bt2020", MP_CSP_PRIM_BT_2020}, + {"bt470m", MP_CSP_PRIM_BT_470M})), + OPT_CHOICE("target-trc", target_trc, 0, + ({"auto", MP_CSP_TRC_AUTO}, + {"bt1886", MP_CSP_TRC_BT_1886}, + {"srgb", MP_CSP_TRC_SRGB}, + {"linear", MP_CSP_TRC_LINEAR}, + {"gamma22", MP_CSP_TRC_GAMMA22})), OPT_FLAG("npot", npot, 0), OPT_FLAG("pbo", pbo, 0), OPT_STRING_VALIDATE("scale", scalers[0], 0, validate_scaler_opt), @@ -433,6 +435,7 @@ const struct m_sub_options gl_video_conf = { OPT_REPLACED("cparam2", "cscale-param2"), OPT_REPLACED("cradius", "cscale-radius"), OPT_REPLACED("cantiring", "cscale-antiring"), + OPT_REPLACED("srgb", "target-prim=srgb:target-trc=srgb"), {0} }, @@ -479,6 +482,19 @@ void gl_video_set_debug(struct gl_video *p, bool enable) gl_set_debug_logger(gl, enable ? p->log : NULL); } +static void gl_video_reset_surfaces(struct gl_video *p) +{ + for (int i = 0; i < FBOSURFACES_MAX; i++) + p->surfaces[i].pts = 0; + p->surface_idx = 0; + p->surface_now = 0; +} + +static size_t fbosurface_next(size_t id) +{ + return (id+1) % FBOSURFACES_MAX; +} + static void recreate_osd(struct gl_video *p) { if (p->osd) @@ -507,6 +523,8 @@ static void uninit_rendering(struct gl_video *p) gl->DeleteTextures(1, &p->dither_texture); p->dither_texture = 0; + + gl_video_reset_surfaces(p); } void gl_video_set_lut3d(struct gl_video *p, struct lut3d *lut3d) @@ -546,13 +564,28 @@ void gl_video_set_lut3d(struct gl_video *p, struct lut3d *lut3d) reinit_rendering(p); } -static void pass_set_image_textures(struct gl_video *p, struct video_image *vimg) +static void pass_load_fbotex(struct gl_video *p, struct fbotex *src_fbo, int id, + int w, int h) +{ + p->pass_tex[id] = (struct src_tex){ + .gl_tex = src_fbo->texture, + .gl_target = GL_TEXTURE_2D, + .tex_w = src_fbo->tex_w, + .tex_h = src_fbo->tex_h, + .src = {0, 0, w, h}, + }; +} + +static void pass_set_image_textures(struct gl_video *p, struct video_image *vimg, + float chroma[3][2]) { GLuint imgtex[4] = {0}; assert(vimg->mpi); - float offset[2] = {0}; + float ls_w = 1.0 / (1 << p->image_desc.chroma_xs); + float ls_h = 1.0 / (1 << p->image_desc.chroma_ys); + int chroma_loc = p->opts.chroma_location; if (!chroma_loc) chroma_loc = p->image_params.chroma_location; @@ -564,13 +597,21 @@ static void pass_set_image_textures(struct gl_video *p, struct video_image *vimg // so that the luma and chroma sample line up exactly. // For 4:4:4, setting chroma location should have no effect at all. // luma sample size (in chroma coord. space) - float ls_w = 1.0 / (1 << p->image_desc.chroma_xs); - float ls_h = 1.0 / (1 << p->image_desc.chroma_ys); - // move chroma center to luma center (in chroma coord. space) - offset[0] = ls_w < 1 ? ls_w * -cx / 2 : 0; - offset[1] = ls_h < 1 ? ls_h * -cy / 2 : 0; + chroma[2][0] = ls_w < 1 ? ls_w * -cx / 2 : 0; + chroma[2][1] = ls_h < 1 ? ls_h * -cy / 2 : 0; + } else { + chroma[2][0] = chroma[2][1] = 0.0; } + // Make sure luma/chroma sizes are aligned. + // Example: For 4:2:0 with size 3x3, the subsampled chroma plane is 2x2 + // so luma (3,3) has to align with chroma (2,2). + chroma[0][0] = ls_w * (float)vimg->planes[0].tex_w + / vimg->planes[1].tex_w; + chroma[1][1] = ls_h * (float)vimg->planes[0].tex_h + / vimg->planes[1].tex_h; + chroma[0][1] = chroma[1][0] = 0.0; // No rotation etc. + if (p->hwdec_active) { p->hwdec->driver->map_image(p->hwdec, vimg->mpi, imgtex); } else { @@ -585,17 +626,7 @@ static void pass_set_image_textures(struct gl_video *p, struct video_image *vimg .gl_target = t->gl_target, .tex_w = t->tex_w, .tex_h = t->tex_h, - //.src = {0, 0, t->w, t->h}, - .src = { - // xxx this is wrong; we want to crop the source when sampling - // from indirect_fbo, but not when rendering to indirect_fbo - // also, this should apply offset, and take care of odd video - // dimensions properly; and it should use floats instead - .x0 = p->src_rect.x0 >> p->image_desc.xs[n], - .y0 = p->src_rect.y0 >> p->image_desc.ys[n], - .x1 = p->src_rect.x1 >> p->image_desc.xs[n], - .y1 = p->src_rect.y1 >> p->image_desc.ys[n], - }, + .src = {0, 0, t->w, t->h}, }; } } @@ -712,7 +743,7 @@ static void pass_prepare_src_tex(struct gl_video *p) GL *gl = p->gl; struct gl_shader_cache *sc = p->sc; - for (int n = 0; n < p->plane_count; n++) { + for (int n = 0; n < 4; n++) { struct src_tex *s = &p->pass_tex[n]; if (!s->gl_tex) continue; @@ -722,9 +753,9 @@ static void pass_prepare_src_tex(struct gl_video *p) snprintf(texture_name, sizeof(texture_name), "texture%d", n); snprintf(texture_size, sizeof(texture_size), "texture_size%d", n); - gl_sc_uniform_sampler(sc, texture_name, p->gl_target, n); + gl_sc_uniform_sampler(sc, texture_name, s->gl_target, n); float f[2] = {1, 1}; - if (p->gl_target != GL_TEXTURE_RECTANGLE) { + if (s->gl_target != GL_TEXTURE_RECTANGLE) { f[0] = s->tex_w; f[1] = s->tex_h; } @@ -736,12 +767,13 @@ static void pass_prepare_src_tex(struct gl_video *p) gl->ActiveTexture(GL_TEXTURE0); } +// flags = bits 0-1: rotate, bit 2: flip vertically static void render_pass_quad(struct gl_video *p, int vp_w, int vp_h, - const struct mp_rect *dst) + const struct mp_rect *dst, int flags) { struct vertex va[4]; - float matrix[3][3]; + float matrix[3][2]; gl_matrix_ortho2d(matrix, 0, vp_w, 0, vp_h); float x[2] = {dst->x0, dst->x1}; @@ -758,6 +790,8 @@ static void render_pass_quad(struct gl_video *p, int vp_w, int vp_h, if (s->gl_tex) { float tx[2] = {s->src.x0, s->src.x1}; float ty[2] = {s->src.y0, s->src.y1}; + if (flags & 4) + MPSWAP(float, ty[0], ty[1]); bool rect = s->gl_target == GL_TEXTURE_RECTANGLE; v->texcoord[i].x = tx[n / 2] / (rect ? 1 : s->tex_w); v->texcoord[i].y = ty[n % 2] / (rect ? 1 : s->tex_h); @@ -765,20 +799,31 @@ static void render_pass_quad(struct gl_video *p, int vp_w, int vp_h, } } + int rot = flags & 3; + while (rot--) { + static const int perm[4] = {1, 3, 0, 2}; + struct vertex vb[4]; + memcpy(vb, va, sizeof(vb)); + for (int n = 0; n < 4; n++) + memcpy(va[n].texcoord, vb[perm[n]].texcoord, + sizeof(struct vertex_pt[4])); + } + gl_vao_draw_data(&p->vao, GL_TRIANGLE_STRIP, va, 4); debug_check_gl(p, "after rendering"); } +// flags: see render_pass_quad static void finish_pass_direct(struct gl_video *p, GLint fbo, int vp_w, int vp_h, - const struct mp_rect *dst) + const struct mp_rect *dst, int flags) { GL *gl = p->gl; pass_prepare_src_tex(p); gl->BindFramebuffer(GL_FRAMEBUFFER, fbo); gl->Viewport(0, 0, vp_w, vp_h < 0 ? -vp_h : vp_h); gl_sc_gen_shader_and_reset(p->sc); - render_pass_quad(p, vp_w, vp_h, dst); + render_pass_quad(p, vp_w, vp_h, dst, flags); gl->BindFramebuffer(GL_FRAMEBUFFER, 0); memset(&p->pass_tex, 0, sizeof(p->pass_tex)); } @@ -787,22 +832,17 @@ static void finish_pass_direct(struct gl_video *p, GLint fbo, int vp_w, int vp_h // FBO, if the required parameters have changed // w, h: required FBO target dimension, and also defines the target rectangle // used for rasterization +// tex: the texture ID to load the result back into // flags: 0 or combination of FBOTEX_FUZZY_W/FBOTEX_FUZZY_H (setting the fuzzy // flags allows the FBO to be larger than the target) static void finish_pass_fbo(struct gl_video *p, struct fbotex *dst_fbo, - int w, int h, int flags) + int w, int h, int tex, int flags) { fbotex_change(dst_fbo, p->gl, p->log, w, h, p->opts.fbo_format, flags); finish_pass_direct(p, dst_fbo->fbo, dst_fbo->tex_w, dst_fbo->tex_h, - &(struct mp_rect){0, 0, w, h}); - p->pass_tex[0] = (struct src_tex){ - .gl_tex = dst_fbo->texture, - .gl_target = GL_TEXTURE_2D, - .tex_w = dst_fbo->tex_w, - .tex_h = dst_fbo->tex_h, - .src = {0, 0, w, h}, - }; + &(struct mp_rect){0, 0, w, h}, 0); + pass_load_fbotex(p, dst_fbo, tex, w, h); } static void uninit_scaler(struct gl_video *p, int scaler_unit) @@ -834,6 +874,9 @@ static void reinit_scaler(struct gl_video *p, int scaler_unit, const char *name, scaler->insufficient = false; scaler->initialized = true; + for (int n = 0; n < 2; n++) + scaler->params[n] = p->opts.scaler_params[scaler->index][n]; + const struct filter_kernel *t_kernel = mp_find_filter_kernel(scaler->name); if (!t_kernel) return; @@ -842,8 +885,8 @@ static void reinit_scaler(struct gl_video *p, int scaler_unit, const char *name, scaler->kernel = &scaler->kernel_storage; for (int n = 0; n < 2; n++) { - if (!isnan(p->opts.scaler_params[scaler->index][n])) - scaler->kernel->params[n] = p->opts.scaler_params[scaler->index][n]; + if (!isnan(scaler->params[n])) + scaler->kernel->params[n] = scaler->params[n]; } scaler->antiring = p->opts.scaler_antiring[scaler->index]; @@ -920,14 +963,15 @@ static void pass_sample_separated_get_weights(struct gl_video *p, GLSL(vec4 c2 = texture(lut, vec2(0.75, fcoord));) GLSL(float weights[6] = float[](c1.r, c1.g, c1.b, c2.r, c2.g, c2.b);) } else { - GLSL(float weights[N];) - GLSL(for (int n = 0; n < N / 4; n++) {) - GLSL( vec4 c = texture(lut, vec2(1.0 / (N / 2) + n / float(N / 4), fcoord));) - GLSL( weights[n * 4 + 0] = c.r;) - GLSL( weights[n * 4 + 1] = c.g;) - GLSL( weights[n * 4 + 2] = c.b;) - GLSL( weights[n * 4 + 3] = c.a;) - GLSL(}) + GLSLF("float weights[%d];\n", N); + for (int n = 0; n < N / 4; n++) { + GLSLF("c = texture(lut, vec2(1.0 / %d + %d / float(%d), fcoord));\n", + N / 2, n, N / 4); + GLSLF("weights[%d] = c.r;\n", n * 4 + 0); + GLSLF("weights[%d] = c.g;\n", n * 4 + 1); + GLSLF("weights[%d] = c.b;\n", n * 4 + 2); + GLSLF("weights[%d] = c.a;\n", n * 4 + 3); + } } } @@ -937,117 +981,294 @@ static void pass_sample_separated_gen(struct gl_video *p, struct scaler *scaler, int d_x, int d_y) { int N = scaler->kernel->size; + bool use_ar = scaler->antiring > 0; + GLSL(vec4 color = vec4(0.0);) + GLSLF("{\n"); GLSLF("vec2 dir = vec2(%d, %d);\n", d_x, d_y); - GLSLF("#define N %d\n", N); - GLSLF("#define ANTIRING %f\n", scaler->antiring); - GLSL(vec2 pt = (vec2(1.0) / texture_size0) * dir;) - GLSL(float fcoord = dot(fract(texcoord0 * texture_size0 - vec2(0.5)), dir);) - GLSL(vec2 base = texcoord0 - fcoord * pt - pt * vec2(N / 2 - 1);) + GLSL(vec2 pt = (vec2(1.0) / sample_size) * dir;) + GLSL(float fcoord = dot(fract(sample_pos * sample_size - vec2(0.5)), dir);) + GLSLF("vec2 base = sample_pos - fcoord * pt - pt * vec2(%d);\n", N / 2 - 1); + GLSL(vec4 c;) + if (use_ar) { + GLSL(vec4 hi = vec4(0.0);) + GLSL(vec4 lo = vec4(1.0);) + } pass_sample_separated_get_weights(p, scaler); - GLSL(vec4 color = vec4(0);) - GLSL(vec4 hi = vec4(0);) - GLSL(vec4 lo = vec4(1);) - GLSL(for (int n = 0; n < N; n++) {) - GLSL( vec4 c = texture(texture0, base + pt * vec2(n));) - GLSL( color += vec4(weights[n]) * c;) - GLSL( if (n == N/2-1 || n == N/2) {) - GLSL( lo = min(lo, c);) - GLSL( hi = max(hi, c);) - GLSL( }) - GLSL(}) - GLSL(color = mix(color, clamp(color, lo, hi), ANTIRING);) + GLSLF("// scaler samples\n"); + for (int n = 0; n < N; n++) { + GLSLF("c = texture(texture0, base + pt * vec2(%d));\n", n); + GLSLF("color += vec4(weights[%d]) * c;\n", n); + if (use_ar && (n == N/2-1 || n == N/2)) { + GLSL(lo = min(lo, c);) + GLSL(hi = max(hi, c);) + } + } + if (use_ar) + GLSLF("color = mix(color, clamp(color, lo, hi), %f);\n", scaler->antiring); + GLSLF("}\n"); } -static void pass_sample_separated(struct gl_video *p, struct scaler *scaler, - int w, int h) +static void pass_sample_separated(struct gl_video *p, int src_tex, + struct scaler *scaler, int w, int h, + float transform[3][2]) { + // Keep the x components untouched for the first pass + struct mp_rect_f src_new = p->pass_tex[0].src; + gl_matrix_mul_rect(transform, &src_new); GLSLF("// pass 1\n"); + p->pass_tex[0].src.y0 = src_new.y0; + p->pass_tex[0].src.y1 = src_new.y1; pass_sample_separated_gen(p, scaler, 0, 1); int src_w = p->pass_tex[0].src.x1 - p->pass_tex[0].src.x0; - finish_pass_fbo(p, &scaler->sep_fbo, src_w, h, 0); + finish_pass_fbo(p, &scaler->sep_fbo, src_w, h, src_tex, FBOTEX_FUZZY_H); + // Restore the sample source for the second pass + GLSLF("#define sample_tex texture%d\n", src_tex); + GLSLF("#define sample_pos texcoord%d\n", src_tex); + GLSLF("#define sample_size texture_size%d\n", src_tex); GLSLF("// pass 2\n"); + p->pass_tex[0].src.x0 = src_new.x0; + p->pass_tex[0].src.x1 = src_new.x1; pass_sample_separated_gen(p, scaler, 1, 0); } -// Scale. This uses the p->pass_tex[0] texture as source. It's hardcoded to -// use all variables and values associated with p->pass_tex[0] (which includes -// texture0/texcoord0/texture_size0). -// The src rectangle is implicit in p->pass_tex. +static void pass_sample_polar(struct gl_video *p, struct scaler *scaler) +{ + double radius = scaler->kernel->radius; + int bound = (int)ceil(radius); + bool use_ar = scaler->antiring > 0; + GLSL(vec4 color = vec4(0.0);) + GLSLF("{\n"); + GLSL(vec2 pt = vec2(1.0) / sample_size;) + GLSL(vec2 fcoord = fract(sample_pos * sample_size - vec2(0.5));) + GLSL(vec2 base = sample_pos - fcoord * pt;) + GLSL(vec4 c;) + GLSLF("float w, d, wsum = 0.0;\n"); + if (use_ar) { + GLSL(vec4 lo = vec4(1.0);) + GLSL(vec4 hi = vec4(0.0);) + } + gl_sc_uniform_sampler(p->sc, "lut", scaler->gl_target, + TEXUNIT_SCALERS + scaler->index); + GLSLF("// scaler samples\n"); + for (int y = 1-bound; y <= bound; y++) { + for (int x = 1-bound; x <= bound; x++) { + // Since we can't know the subpixel position in advance, assume a + // worst case scenario + int yy = y > 0 ? y-1 : y; + int xx = x > 0 ? x-1 : x; + double dmax = sqrt(xx*xx + yy*yy); + // Skip samples definitely outside the radius + if (dmax >= radius) + continue; + GLSLF("d = length(vec2(%d, %d) - fcoord)/%f;\n", x, y, radius); + // Check for samples that might be skippable + if (dmax >= radius - 1) + GLSLF("if (d < 1.0) {\n"); + GLSL(w = texture1D(lut, d).r;) + GLSL(wsum += w;) + GLSLF("c = texture(sample_tex, base + pt * vec2(%d, %d));\n", x, y); + GLSL(color += vec4(w) * c;) + if (use_ar && x >= 0 && y >= 0 && x <= 1 && y <= 1) { + GLSL(lo = min(lo, c);) + GLSL(hi = max(hi, c);) + } + if (dmax >= radius -1) + GLSLF("}\n"); + } + } + GLSL(color = color / vec4(wsum);) + if (use_ar) + GLSLF("color = mix(color, clamp(color, lo, hi), %f);\n", scaler->antiring); + GLSLF("}\n"); +} + +static void bicubic_calcweights(struct gl_video *p, const char *t, const char *s) +{ + // Explanation of how bicubic scaling with only 4 texel fetches is done: + // http://www.mate.tue.nl/mate/pdfs/10318.pdf + // 'Efficient GPU-Based Texture Interpolation using Uniform B-Splines' + // Explanation why this algorithm normally always blurs, even with unit + // scaling: + // http://bigwww.epfl.ch/preprints/ruijters1001p.pdf + // 'GPU Prefilter for Accurate Cubic B-spline Interpolation' + GLSLF("vec4 %s = vec4(-0.5, 0.1666, 0.3333, -0.3333) * %s" + " + vec4(1, 0, -0.5, 0.5);\n", t, s); + GLSLF("%s = %s * %s + vec4(0, 0, -0.5, 0.5);\n", t, t, s); + GLSLF("%s = %s * %s + vec4(-0.6666, 0, 0.8333, 0.1666);\n", t, t, s); + GLSLF("%s.xy *= vec2(1, 1) / vec2(%s.z, %s.w);\n", t, t, t); + GLSLF("%s.xy += vec2(1 + %s, 1 - %s);\n", t, s, s); +} + +static void pass_sample_bicubic_fast(struct gl_video *p) +{ + GLSL(vec4 color;) + GLSLF("{\n"); + GLSL(vec2 pt = 1.0 / sample_size;) + GLSL(vec2 fcoord = fract(sample_tex * sample_size + vec2(0.5, 0.5));) + bicubic_calcweights(p, "parmx", "fcoord.x"); + bicubic_calcweights(p, "parmy", "fcoord.y"); + GLSL(vec4 cdelta;) + GLSL(cdelta.xz = parmx.RG * vec2(-pt.x, pt.x);) + GLSL(cdelta.yw = parmy.RG * vec2(-pt.y, pt.y);) + // first y-interpolation + GLSL(vec4 ar = texture(sample_tex, sample_pos + cdelta.xy);) + GLSL(vec4 ag = texture(sample_tex, sample_pos + cdelta.xw);) + GLSL(vec4 ab = mix(ag, ar, parmy.b);) + // second y-interpolation + GLSL(vec4 br = texture(sample_tex, sample_pos + cdelta.zy);) + GLSL(vec4 bg = texture(sample_tex, sample_pos + cdelta.zw);) + GLSL(vec4 aa = mix(bg, br, parmy.b);) + // x-interpolation + GLSL(color = mix(aa, ab, parmx.b);) + GLSLF("}\n"); +} + +static void pass_sample_sharpen3(struct gl_video *p, struct scaler *scaler) +{ + GLSL(vec4 color;) + GLSLF("{\n"); + GLSL(vec2 pt = 1.0 / sample_size;) + GLSL(vec2 st = pt * 0.5;) + GLSL(vec4 p = texture(sample_tex, sample_pos);) + GLSL(vec4 sum = texture(sample_tex, sample_pos + st * vec2(+1, +1)) + + texture(sample_tex, sample_pos + st * vec2(+1, -1)) + + texture(sample_tex, sample_pos + st * vec2(-1, +1)) + + texture(sample_tex, sample_pos + st * vec2(-1, -1));) + double param = isnan(scaler->params[0]) ? 0.5 : scaler->params[0]; + GLSLF("color = p + (p - 0.25 * sum) * %f;\n", param); + GLSLF("}\n"); +} + +static void pass_sample_sharpen5(struct gl_video *p, struct scaler *scaler) +{ + GLSL(vec4 color;) + GLSLF("{\n"); + GLSL(vec2 pt = 1.0 / sample_size;) + GLSL(vec2 st1 = pt * 1.2;) + GLSL(vec4 p = texture(sample_tex, sample_pos);) + GLSL(vec4 sum1 = texture(sample_tex, sample_pos + st1 * vec2(+1, +1)) + + texture(sample_tex, sample_pos + st1 * vec2(+1, -1)) + + texture(sample_tex, sample_pos + st1 * vec2(-1, +1)) + + texture(sample_tex, sample_pos + st1 * vec2(-1, -1));) + GLSL(vec2 st2 = pt * 1.5;) + GLSL(vec4 sum2 = texture(sample_tex, sample_pos + st2 * vec2(+1, 0)) + + texture(sample_tex, sample_pos + st2 * vec2( 0, +1)) + + texture(sample_tex, sample_pos + st2 * vec2(-1, 0)) + + texture(sample_tex, sample_pos + st2 * vec2( 0, -1));) + GLSL(vec4 t = p * 0.859375 + sum2 * -0.1171875 + sum1 * -0.09765625;) + double param = isnan(scaler->params[0]) ? 0.5 : scaler->params[0]; + GLSLF("color = p + t * %f;\n", param); + GLSLF("}\n"); + +} + +// Sample. This samples from the texture ID given by src_tex. It's hardcoded to +// use all variables and values associated with it (which includes textureN, +// texcoordN and texture_sizeN). +// The src rectangle is implicit in p->pass_tex + transform. // The dst rectangle is implicit by what the caller will do next, but w and h // must still be what is going to be used (to dimension FBOs correctly). // This will declare "vec4 color;", which contains the scaled contents. // The scaler unit is initialized by this function; in order to avoid cache // thrashing, the scaler unit should usually use the same parameters. -static void pass_scale(struct gl_video *p, int scaler_unit, const char *name, - double scale_factor, int w, int h) +static void pass_sample(struct gl_video *p, int src_tex, + int scaler_unit, const char *name, double scale_factor, + int w, int h, float transform[3][2]) { struct scaler *scaler = &p->scalers[scaler_unit]; reinit_scaler(p, scaler_unit, name, scale_factor); + // Set up the sample parameters appropriately + GLSLF("#define sample_tex texture%d\n", src_tex); + GLSLF("#define sample_pos texcoord%d\n", src_tex); + GLSLF("#define sample_size texture_size%d\n", src_tex); + + // Set up the transformation for everything other than separated scaling + if (!scaler->kernel || scaler->kernel->polar) + gl_matrix_mul_rect(transform, &p->pass_tex[src_tex].src); + // Dispatch the scaler. They're all wildly different. if (strcmp(scaler->name, "bilinear") == 0) { - GLSL(vec4 color = texture(texture0, texcoord0);) - } else if (scaler->kernel && !scaler->kernel->polar) { - pass_sample_separated(p, scaler, w, h); + GLSL(vec4 color = texture(sample_tex, sample_pos);) + } else if (strcmp(scaler->name, "bicubic_fast") == 0) { + pass_sample_bicubic_fast(p); + } else if (strcmp(scaler->name, "sharpen3") == 0) { + pass_sample_sharpen3(p, scaler); + } else if (strcmp(scaler->name, "sharpen5") == 0) { + pass_sample_sharpen5(p, scaler); + } else if (scaler->kernel && scaler->kernel->polar) { + pass_sample_polar(p, scaler); + } else if (scaler->kernel) { + pass_sample_separated(p, src_tex, scaler, w, h, transform); } else { - abort(); //not implemented yet + // Should never happen + abort(); } + + // Micro-optimization: Avoid scaling unneeded channels + if (!p->has_alpha || p->opts.alpha_mode != 1) + GLSL(color.a = 1.0;) } // sample from video textures, set "color" variable to yuv value -// (not sure how exactly this should involve the resamplers) -static void pass_read_video(struct gl_video *p, bool *use_indirect) +static void pass_read_video(struct gl_video *p) { - pass_set_image_textures(p, &p->image); + float chromafix[3][2]; + pass_set_image_textures(p, &p->image, chromafix); - if (p->plane_count > 1) { + if (p->plane_count == 1) { + GLSL(vec4 color = texture(texture0, texcoord0);) + goto fixalpha; + } + + const char *cscale = p->opts.scalers[1]; + if (p->image_desc.flags & MP_IMGFLAG_SUBSAMPLED && + strcmp(cscale, "bilinear") != 0) { + struct src_tex luma = p->pass_tex[0]; + if (p->plane_count > 2) { + // For simplicity and performance, we merge the chroma planes + // into a single texture before scaling, so the scaler doesn't + // need to run multiple times. + GLSLF("// chroma merging\n"); + GLSL(vec4 color = vec4(texture(texture1, texcoord0).r, + texture(texture2, texcoord2).r, + 0.0, 1.0);) + int c_w = p->pass_tex[1].src.x1 - p->pass_tex[1].src.x0; + int c_h = p->pass_tex[1].src.y1 - p->pass_tex[1].src.y0; + assert(c_w == p->pass_tex[2].src.x1 - p->pass_tex[2].src.x0); + assert(c_h == p->pass_tex[2].src.y1 - p->pass_tex[2].src.y0); + finish_pass_fbo(p, &p->chroma_merge_fbo, c_w, c_h, 1, 0); + } + GLSLF("// chroma scaling\n"); + pass_sample(p, 1, 1, cscale, 1.0, p->image_w, p->image_h, chromafix); + GLSL(vec2 chroma = color.rg;) + // Always force rendering to a FBO before main scaling, or we would + // scale chroma incorrectly. + p->use_indirect = true; + p->pass_tex[0] = luma; // Restore luma after scaling + } else { + GLSL(vec4 color;) if (p->plane_count == 2) { - GLSL(vec2 chroma = texture(texture1, texcoord1).RG;) // NV formats + gl_matrix_mul_rect(chromafix, &p->pass_tex[1].src); + GLSL(vec2 chroma = texture(texture1, texcoord0).rg;) // NV formats } else { + gl_matrix_mul_rect(chromafix, &p->pass_tex[1].src); + gl_matrix_mul_rect(chromafix, &p->pass_tex[2].src); GLSL(vec2 chroma = vec2(texture(texture1, texcoord1).r, texture(texture2, texcoord2).r);) } + } - const char *cscale = p->opts.scalers[1]; - if (p->image_desc.flags & MP_IMGFLAG_SUBSAMPLED && - strcmp(cscale, "bilinear") != 0) { - GLSLF("// chroma merging\n"); - GLSL(vec4 color = vec4(chroma.r, chroma.g, 0.0, 0.0);) - if (1) { //p->plane_count > 2) { - // For simplicity - and maybe also for performance - we merge - // the chroma planes into one texture before scaling. So the - // scaler doesn't need to deal with more than 1 source texture. - int c_w = p->pass_tex[1].src.x1 - p->pass_tex[1].src.x0; - int c_h = p->pass_tex[1].src.y1 - p->pass_tex[1].src.y0; - finish_pass_fbo(p, &p->chroma_merge_fbo, c_w, c_h, 0); - } - GLSLF("// chroma scaling\n"); - pass_scale(p, 1, cscale, 1.0, p->image_w, p->image_h); - GLSL(vec2 chroma = color.rg;) - // Always force rendering to a FBO before main scaling, or we would - // scale chroma incorrectly. - *use_indirect = true; + GLSL(color = vec4(texture(texture0, texcoord0).r, chroma, 1.0);) - // What we'd really like to do is putting the output of the chroma - // scaler on texture unit 1, and leave luma on unit 0 (alpha on 3). - // But this obviously doesn't work, so here's an extremely shitty - // hack. Keep in mind that the shader already uses tex unit 0, so - // it can't be changed. alpha is missing too. - struct src_tex prev = p->pass_tex[0]; - pass_set_image_textures(p, &p->image); - p->pass_tex[1] = p->pass_tex[0]; - p->pass_tex[0] = prev; - GLSL(color = vec4(texture(texture1, texcoord1).r, chroma, 0);) - } else { - GLSL(vec4 color = vec4(0.0, chroma, 0.0);) - // These always use bilinear; either because the scaler is bilinear, - // or because we use an indirect pass. - GLSL(color.r = texture(texture0, texcoord0).r;) - if (p->has_alpha && p->plane_count >= 4) - GLSL(color.a = texture(texture3, texcoord3).r;) - } - } else { - GLSL(vec4 color = texture(texture0, texcoord0);) +fixalpha: + if (p->has_alpha) { + if (p->plane_count >= 4) + GLSL(color.a = texture(texture3, texcoord3).r;) + if (p->opts.alpha_mode == 0) // none + GLSL(color.a = 1.0;) + if (p->opts.alpha_mode == 2) // blend + GLSL(color = vec4(color.rgb * color.a, 1.0);) } } @@ -1056,33 +1277,38 @@ static void pass_convert_yuv(struct gl_video *p) { struct gl_shader_cache *sc = p->sc; + struct mp_csp_params cparams = MP_CSP_PARAMS_DEFAULTS; + cparams.gray = p->is_yuv && !p->is_packed_yuv && p->plane_count == 1; + cparams.input_bits = p->image_desc.component_bits; + cparams.texture_bits = (cparams.input_bits + 7) & ~7; + mp_csp_set_image_params(&cparams, &p->image_params); + mp_csp_copy_equalizer_values(&cparams, &p->video_eq); + + float user_gamma = cparams.gamma * p->opts.gamma; + p->user_gamma_enabled |= user_gamma != 1.0; + GLSLF("// color conversion\n"); if (p->color_swizzle[0]) GLSLF("color = color.%s;\n", p->color_swizzle); - // Conversion from Y'CbCr or other spaces to RGB - if (!p->is_rgb) { - struct mp_csp_params cparams = MP_CSP_PARAMS_DEFAULTS; - cparams.gray = p->is_yuv && !p->is_packed_yuv && p->plane_count == 1; - cparams.input_bits = p->image_desc.component_bits; - cparams.texture_bits = (cparams.input_bits + 7) & ~7; - mp_csp_set_image_params(&cparams, &p->image_params); - mp_csp_copy_equalizer_values(&cparams, &p->video_eq); - if (p->image_desc.flags & MP_IMGFLAG_XYZ) { - cparams.colorspace = MP_CSP_XYZ; - cparams.input_bits = 8; - cparams.texture_bits = 8; - } + // Pre-colormatrix input gamma correction + if (p->image_desc.flags & MP_IMGFLAG_XYZ) { + cparams.colorspace = MP_CSP_XYZ; + cparams.input_bits = 8; + cparams.texture_bits = 8; + // Pre-colormatrix input gamma correction. Note that this results in + // linear light + GLSL(color.rgb *= vec3(2.6);) + } + + // Conversion from Y'CbCr or other linear spaces to RGB + if (!p->is_rgb) { struct mp_cmat m = {{{0}}}; if (p->image_desc.flags & MP_IMGFLAG_XYZ) { - // Hard-coded as relative colorimetric for now, since this transforms - // from the source file's D55 material to whatever color space our - // projector/display lives in, which should be D55 for a proper - // home cinema setup either way. - mp_get_xyz2rgb_coeffs(&cparams, p->csp_src, - MP_INTENT_RELATIVE_COLORIMETRIC, &m); + struct mp_csp_primaries csp = mp_get_csp_primaries(p->image_params.primaries); + mp_get_xyz2rgb_coeffs(&cparams, csp, MP_INTENT_RELATIVE_COLORIMETRIC, &m); } else { mp_get_yuv2rgb_coeffs(&cparams, &m); } @@ -1091,6 +1317,50 @@ static void pass_convert_yuv(struct gl_video *p) GLSL(color.rgb = mat3(colormatrix) * color.rgb + colormatrix_c;) } + + if (p->image_params.colorspace == MP_CSP_BT_2020_C) { + p->use_indirect = true; + // Conversion for C'rcY'cC'bc via the BT.2020 CL system: + // C'bc = (B'-Y'c) / 1.9404 | C'bc <= 0 + // = (B'-Y'c) / 1.5816 | C'bc > 0 + // + // C'rc = (R'-Y'c) / 1.7184 | C'rc <= 0 + // = (R'-Y'c) / 0.9936 | C'rc > 0 + // + // as per the BT.2020 specification, table 4. This is a non-linear + // transformation because (constant) luminance receives non-equal + // contributions from the three different channels. + GLSLF("// constant luminance conversion\n"); + GLSL(color.br = color.br * mix(vec2(1.5816, 0.9936), + vec2(1.9404, 1.7184), + lessThanEqual(color.br, vec2(0))) + + color.gg;) + // Expand channels to camera-linear light. This shader currently just + // assumes everything uses the BT.2020 12-bit gamma function, since the + // difference between 10 and 12-bit is negligible for anything other + // than 12-bit content. + GLSL(color.rgb = mix(color.rgb / vec3(4.5), + pow((color.rgb + vec3(0.0993))/vec3(1.0993), vec3(1.0/0.45)), + lessThanEqual(vec3(0.08145), color.rgb));) + // Calculate the green channel from the expanded RYcB + // The BT.2020 specification says Yc = 0.2627*R + 0.6780*G + 0.0593*B + GLSL(color.g = (color.g - 0.2627*color.r - 0.0593*color.b)/0.6780;) + // Re-compand to receive the R'G'B' result, same as other systems + GLSL(color.rgb = mix(color.rgb * vec3(4.5), + vec3(1.0993) * pow(color.rgb, vec3(0.45)) - vec3(0.0993), + lessThanEqual(vec3(0.0181), color.rgb));) + } + + GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);) + + if (p->user_gamma_enabled) { + p->use_indirect = true; + gl_sc_uniform_f(sc, "user_gamma", user_gamma); + GLSL(color.rgb = pow(color.rgb, vec3(1.0 / user_gamma));) + } + + if (!p->has_alpha) + GLSL(color.a = 1.0;) } static void get_scale_factors(struct gl_video *p, double xy[2]) @@ -1101,7 +1371,9 @@ static void get_scale_factors(struct gl_video *p, double xy[2]) (double)(p->src_rect.y1 - p->src_rect.y0); } -static void pass_scale_main(struct gl_video *p, bool use_indirect) +// Takes care of the main scaling and post-conversions such as gamut/gamma +// mapping or color management. +static void pass_render_main(struct gl_video *p) { // Figure out the main scaler. double xy[2]; @@ -1123,15 +1395,150 @@ static void pass_scale_main(struct gl_video *p, bool use_indirect) scale_factor = FFMAX(1.0, 1.0 / f); } - GLSLF("// main scaling\n"); - if (!use_indirect && strcmp(scaler, "bilinear") == 0) { - // implicitly scale in pass_video_to_screen - } else { - finish_pass_fbo(p, &p->indirect_fbo, p->image_w, p->image_h, 0); + bool use_cms = p->use_lut_3d || p->opts.target_prim != MP_CSP_PRIM_AUTO + || p->opts.target_trc != MP_CSP_TRC_AUTO; - int w = p->dst_rect.x1 - p->dst_rect.x0; - int h = p->dst_rect.y1 - p->dst_rect.y0; - pass_scale(p, 0, scaler, scale_factor, w, h); + // Pre-conversion, like linear light/sigmoidization + GLSLF("// scaler pre-conversion\n"); + bool use_linear = p->opts.linear_scaling || p->opts.sigmoid_upscaling + || use_cms || p->image_params.gamma == MP_CSP_TRC_LINEAR; + if (use_linear) { + p->use_indirect = true; + switch (p->image_params.gamma) { + case MP_CSP_TRC_SRGB: + GLSL(color.rgb = mix(color.rgb / vec3(12.92), + pow((color.rgb + vec3(0.055))/vec3(1.055), + vec3(2.4)), + lessThanEqual(vec3(0.04045), color.rgb));) + break; + case MP_CSP_TRC_BT_1886: + GLSL(color.rgb = pow(color.rgb, vec3(1.961));) + break; + case MP_CSP_TRC_GAMMA22: + GLSL(color.rgb = pow(color.rgb, vec3(2.2));) + break; + } + } + + bool use_sigmoid = use_linear && p->opts.sigmoid_upscaling && upscaling; + float sig_center, sig_slope, sig_offset, sig_scale; + if (use_sigmoid) { + p->use_indirect = true; + // Coefficients for the sigmoidal transform are taken from the + // formula here: http://www.imagemagick.org/Usage/color_mods/#sigmoidal + sig_center = p->opts.sigmoid_center; + sig_slope = p->opts.sigmoid_slope; + // This function needs to go through (0,0) and (1,1) so we compute the + // values at 1 and 0, and then scale/shift them, respectively. + sig_offset = 1.0/(1+expf(sig_slope * sig_center)); + sig_scale = 1.0/(1+expf(sig_slope * (sig_center-1))) - sig_offset; + GLSLF("color.rgb = %f - log(1.0/(color.rgb * %f + %f) - 1.0)/%f;\n", + sig_center, sig_scale, sig_offset, sig_slope); + } + + // Compute the cropped and rotated transformation + float sx = (p->src_rect.x1 - p->src_rect.x0) / (float)p->image_w, + sy = (p->src_rect.y1 - p->src_rect.y0) / (float)p->image_h, + ox = p->src_rect.x0, + oy = p->src_rect.y0; + float transform[3][2] = {{sx,0.0}, {0.0,sy}, {ox,oy}}; + + int xc = 0, yc = 1, + vp_w = p->dst_rect.x1 - p->dst_rect.x0, + vp_h = p->dst_rect.y1 - p->dst_rect.y0; + + if ((p->image_params.rotate % 180) == 90) { + for (int n = 0; n < 3; n++) + MPSWAP(float, transform[n][xc], transform[n][yc]); + MPSWAP(int, xc, yc); + MPSWAP(int, vp_w, vp_h); + } + + GLSLF("// main scaling\n"); + if (!p->use_indirect && strcmp(scaler, "bilinear") == 0) { + // implicitly scale in pass_video_to_screen, but set up the textures + // manually (for cropping etc.). Special care has to be taken for the + // chroma planes (everything except luma=tex0), to make sure the offset + // is scaled to the correct reference frame (in the case of subsampled + // input) + float tchroma[3][2]; + memcpy(tchroma, transform, sizeof(float[3][2])); + tchroma[2][xc] /= 1 << p->image_desc.chroma_xs; + tchroma[2][yc] /= 1 << p->image_desc.chroma_ys; + + for (int n = 0; n < p->plane_count; n++) + gl_matrix_mul_rect(n>0 ? tchroma : transform, &p->pass_tex[n].src); + } else { + finish_pass_fbo(p, &p->indirect_fbo, p->image_w, p->image_h, 0, 0); + pass_sample(p, 0, 0, scaler, scale_factor, vp_w, vp_h, transform); + } + + GLSLF("// scaler post-conversion\n"); + if (use_sigmoid) { + // Inverse of the transformation above + GLSLF("color.rgb = (1.0/(1.0 + exp(%f * (%f - color.rgb))) - %f) / %f;\n", + sig_slope, sig_center, sig_offset, sig_scale); + } + + GLSLF("// color management\n"); + enum mp_csp_trc trc_dst = p->opts.target_trc; + enum mp_csp_prim prim_src = p->image_params.primaries, + prim_dst = p->opts.target_prim; + + if (p->use_lut_3d) { + // The 3DLUT is hard-coded against BT.2020's gamut during creation, and + // we never want to adjust its output (so treat it as linear) + prim_dst = MP_CSP_PRIM_BT_2020; + trc_dst = MP_CSP_TRC_LINEAR; + } + + if (prim_dst == MP_CSP_PRIM_AUTO) + prim_dst = prim_src; + if (trc_dst == MP_CSP_TRC_AUTO) { + trc_dst = p->image_params.gamma; + // Pick something more reasonable for linear light inputs + if (p->image_params.gamma == MP_CSP_TRC_LINEAR) + trc_dst = MP_CSP_TRC_GAMMA22; + } + + // Adapt to the right colorspace if necessary + if (prim_src != prim_dst) { + struct mp_csp_primaries csp_src = mp_get_csp_primaries(prim_src), + csp_dst = mp_get_csp_primaries(prim_dst); + float m[3][3] = {{0}}; + mp_get_cms_matrix(csp_src, csp_dst, MP_INTENT_RELATIVE_COLORIMETRIC, m); + gl_sc_uniform_mat3(p->sc, "cms_matrix", true, &m[0][0]); + GLSL(color.rgb = cms_matrix * color.rgb;) + } + + if (p->use_lut_3d) { + gl_sc_uniform_sampler(p->sc, "lut_3d", GL_TEXTURE_3D, TEXUNIT_3DLUT); + // For the 3DLUT we are arbitrarily using 2.4 as input gamma to reduce + // the severity of quantization errors. + GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);) + GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.4));) + GLSL(color.rgb = texture3D(lut_3d, color.rgb).rgb;) + } + + // Don't perform any gamut mapping unless linear light input is present to + // begin with + if (use_linear && trc_dst != MP_CSP_TRC_LINEAR) { + GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);) + switch (trc_dst) { + case MP_CSP_TRC_SRGB: + GLSL(color.rgb = mix(color.rgb * vec3(12.92), + vec3(1.055) * pow(color.rgb, + vec3(1.0/2.4)) + - vec3(0.055), + lessThanEqual(vec3(0.0031308), color.rgb));) + break; + case MP_CSP_TRC_BT_1886: + GLSL(color.rgb = pow(color.rgb, vec3(1.0/1.961));) + break; + case MP_CSP_TRC_GAMMA22: + GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.2));) + break; + } } } @@ -1236,10 +1643,89 @@ static void pass_dither(struct gl_video *p) dither_quantization;) } -static void pass_video_to_screen(struct gl_video *p, int fbo) +// The main rendering function, takes care of everything up to and including +// color management +static void pass_draw_frame(struct gl_video *p) +{ + p->use_indirect = false; // set to true as needed by pass_* + pass_read_video(p); + pass_convert_yuv(p); + pass_render_main(p); +} + +static void pass_draw_to_screen(struct gl_video *p, int fbo) { pass_dither(p); - finish_pass_direct(p, fbo, p->vp_w, p->vp_h, &p->dst_rect); + int flags = (p->image_params.rotate % 90 ? 0 : p->image_params.rotate / 90) + | (p->image.image_flipped ? 4 : 0); + finish_pass_direct(p, fbo, p->vp_w, p->vp_h, &p->dst_rect, flags); +} + +// Draws an interpolate frame to fbo, based on the frame timing in t +static void gl_video_interpolate_frame(struct gl_video *p, int fbo, + struct frame_timing *t) +{ + int vp_w = p->dst_rect.x1 - p->dst_rect.x0, + vp_h = p->dst_rect.y1 - p->dst_rect.y0, + fuzz = FBOTEX_FUZZY_W | FBOTEX_FUZZY_H; + size_t surface_nxt = fbosurface_next(p->surface_now); + + // First of all, figure out if we have a frame availble at all, and draw + // it manually + reset the queue if not + if (!p->surfaces[p->surface_now].pts) { + pass_draw_frame(p); + finish_pass_fbo(p, &p->surfaces[p->surface_now].fbotex, + vp_w, vp_h, 0, fuzz); + p->surfaces[p->surface_now].pts = t ? t->pts : 0; + p->surface_idx = p->surface_now; + } + + // Render a new frame if it came in and there's room in the queue + size_t surface_dst = fbosurface_next(p->surface_idx); + if (t && surface_dst != p->surface_now && + p->surfaces[p->surface_idx].pts < t->pts) { + MP_STATS(p, "new-pts"); + pass_draw_frame(p); + finish_pass_fbo(p, &p->surfaces[surface_dst].fbotex, + vp_w, vp_h, 0, fuzz); + p->surfaces[surface_dst].pts = t->pts; + p->surface_idx = surface_dst; + } + + // Finally, draw the right mix of frames to the screen. + pass_load_fbotex(p, &p->surfaces[p->surface_now].fbotex, 0, vp_w, vp_h); + if (!t || p->surfaces[surface_nxt].pts < p->surfaces[p->surface_now].pts) { + // No next frame available (eg. start of playback, after reconfigure + // or end of file, so just draw the current frame instead of blending. + // Also occurs when no timing information is available (eg. paused) + GLSL(vec4 color = texture(texture0, texcoord0);) + p->is_interpolated = false; + } else { + int64_t next_pts = p->surfaces[surface_nxt].pts, + vsync_interval = t->next_vsync - t->prev_vsync; + double inter_coeff = (double)(next_pts - t->next_vsync) / vsync_interval, + threshold = p->opts.smoothmotion_threshold; + inter_coeff = inter_coeff <= 0.0 + threshold ? 0.0 : inter_coeff; + inter_coeff = inter_coeff >= 1.0 - threshold ? 1.0 : inter_coeff; + inter_coeff = 1.0 - inter_coeff; + gl_sc_uniform_f(p->sc, "inter_coeff", inter_coeff); + p->is_interpolated = inter_coeff > 0; + + MP_STATS(p, "frame-mix"); + MP_DBG(p, "inter frame ppts: %lld, pts: %lld, vsync: %lld, mix: %f\n", + (long long)p->surfaces[p->surface_now].pts, + (long long)p->surfaces[surface_nxt].pts, + (long long)t->next_vsync, inter_coeff); + + pass_load_fbotex(p, &p->surfaces[surface_nxt].fbotex, 1, vp_w, vp_h); + GLSL(vec4 color = mix(texture(texture0, texcoord0), + texture(texture1, texcoord1), + inter_coeff);) + // Dequeue the current frame if it's no longer needed + if (t->next_vsync + vsync_interval > p->surfaces[surface_nxt].pts) + p->surface_now = surface_nxt; + } + pass_draw_to_screen(p, fbo); } // (fbo==0 makes BindFramebuffer select the screen backbuffer) @@ -1263,11 +1749,12 @@ void gl_video_render_frame(struct gl_video *p, int fbo, struct frame_timing *t) gl_sc_set_vao(p->sc, &p->vao); - bool indirect = false; - pass_read_video(p, &indirect); - pass_convert_yuv(p); - pass_scale_main(p, indirect); - pass_video_to_screen(p, fbo); + if (p->opts.smoothmotion) { + gl_video_interpolate_frame(p, fbo, t); + } else { + pass_draw_frame(p); + pass_draw_to_screen(p, fbo); + } debug_check_gl(p, "after video rendering"); @@ -1325,6 +1812,8 @@ void gl_video_resize(struct gl_video *p, int vp_w, int vp_h, p->osd_rect = *osd; p->vp_w = vp_w; p->vp_h = vp_h; + + gl_video_reset_surfaces(p); } static bool get_image(struct gl_video *p, struct mp_image *mpi) @@ -1479,15 +1968,24 @@ static void check_gl_features(struct gl_video *p) disabled[n_disabled++] = "dithering (GLES unsupported)"; } - int use_cms = p->opts.srgb || p->use_lut_3d; + int use_cms = p->opts.target_prim != MP_CSP_PRIM_AUTO || + p->opts.target_trc != MP_CSP_TRC_AUTO || p->use_lut_3d; - // srgb_compand() not available - if (!have_mix && p->opts.srgb) { - p->opts.srgb = false; - disabled[n_disabled++] = "sRGB output (GLSL version)"; + // mix() is needed for some gamma functions + if (!have_mix && (p->opts.linear_scaling || p->opts.sigmoid_upscaling)) { + p->opts.linear_scaling = false; + p->opts.sigmoid_upscaling = false; + disabled[n_disabled++] = "linear/sigmoid scaling (GLSL version)"; + } + if (!have_mix && use_cms) { + p->opts.target_prim = MP_CSP_PRIM_AUTO; + p->opts.target_trc = MP_CSP_TRC_AUTO; + p->use_lut_3d = false; + disabled[n_disabled++] = "color management (GLSL version)"; } if (use_cms && !test_fbo(p, &have_fbo)) { - p->opts.srgb = false; + p->opts.target_prim = MP_CSP_PRIM_AUTO; + p->opts.target_trc = MP_CSP_TRC_AUTO; p->use_lut_3d = false; disabled[n_disabled++] = "color management (FBO)"; } @@ -1495,11 +1993,6 @@ static void check_gl_features(struct gl_video *p) p->opts.smoothmotion = false; disabled[n_disabled++] = "smoothmotion (FBO)"; } - // because of bt709_expand() - if (!have_mix && p->use_lut_3d) { - p->use_lut_3d = false; - disabled[n_disabled++] = "color management (GLSL version)"; - } if (gl->es && p->opts.pbo) { p->opts.pbo = 0; disabled[n_disabled++] = "PBOs (GLES unsupported)"; @@ -1600,9 +2093,7 @@ void gl_video_unset_gl_state(struct gl_video *p) void gl_video_reset(struct gl_video *p) { - for (int i = 0; i < FBOSURFACES_MAX; i++) - p->surfaces[i].pts = 0; - p->surface_idx = 0; + gl_video_reset_surfaces(p); } bool gl_video_showing_interpolated_frame(struct gl_video *p) @@ -1771,7 +2262,7 @@ void gl_video_config(struct gl_video *p, struct mp_image_params *params) init_video(p); } - //check_resize(p); + gl_video_reset_surfaces(p); } void gl_video_set_output_depth(struct gl_video *p, int r, int g, int b) @@ -1795,7 +2286,6 @@ struct gl_video *gl_video_init(GL *gl, struct mp_log *log, struct osd_state *osd .opts = gl_video_opts_def, .gl_target = GL_TEXTURE_2D, .texture_16bit_depth = 16, - .user_gamma = 1.0f, .scalers = { { .index = 0, .name = "bilinear" }, { .index = 1, .name = "bilinear" }, diff --git a/video/out/gl_video.h b/video/out/gl_video.h index 1de619870f..9c70e9ba82 100644 --- a/video/out/gl_video.h +++ b/video/out/gl_video.h @@ -31,12 +31,13 @@ struct lut3d { struct gl_video_opts { char *scalers[2]; char *dscaler; + float gamma; + int gamma_auto; + int target_prim; + int target_trc; float scaler_params[2][2]; float scaler_radius[2]; float scaler_antiring[2]; - float gamma; - int gamma_auto; - int srgb; int linear_scaling; int fancy_downscaling; int sigmoid_upscaling;