vo_opengl: implement debanding (and remove source-shader)

The removal of source-shader is a side effect, since this effectively
replaces it - and the video-reading code has been significantly
restructured to make more sense and be more readable.

This means users no longer have to constantly download and maintain a
separate deband.glsl installation alongside mpv, which was the only real
use case for source-shader that we found either way.
This commit is contained in:
Niklas Haas 2015-09-05 17:39:27 +02:00 committed by wm4
parent 95d5bee832
commit 97363e176d
7 changed files with 227 additions and 97 deletions

View File

@ -550,17 +550,9 @@ Available video output drivers are:
feature doesn't work correctly with different scale factors in
different directions.
``source-shader=<file>``, ``scale-shader=<file>``, ``pre-shaders=<files>``, ``post-shaders=<files>``
``pre-shaders=<files>``, ``post-shaders=<files>``, ``scale-shader=<file>``
Custom GLSL fragment shaders.
source-shader
This gets applied directly onto the source planes, before
any sort of upscaling or conversion whatsoever. For YCbCr content,
this means it gets applied on the luma and chroma planes
separately. In general, this shader shouldn't be making any
assumptions about the colorspace. It could be RGB, YCbCr, XYZ or
something else entirely. It's used purely for fixing numerical
quirks of the input, eg. debanding or deblocking.
pre-shaders (list)
These get applied after conversion to RGB and before linearization
and upscaling. Operates on non-linear RGB (same as input). This is
@ -601,10 +593,6 @@ Available video output drivers are:
never resets (regardless of seeks).
vec2 image_size
The size in pixels of the input image.
float cmul (source-shader only)
The multiplier needed to pull colors up to the right bit depth. The
source-shader must multiply any sampled colors by this, in order
to normalize them to the full scale.
For example, a shader that inverts the colors could look like this::
@ -614,6 +602,37 @@ Available video output drivers are:
return vec4(1.0 - color.rgb, color.a);
}
``deband``
Enable the debanding algorithm. This greatly reduces the amount of
visible banding, blocking and other quantization artifacts, at the
expensive of very slightly blurring some of the finest details. In
practice, it's virtually always an improvement - the only reason to
disable it would be for performance.
``deband-iterations=<1..16>``
The number of debanding steps to perform per sample. Each step reduces
a bit more banding, but takes time to compute. Note that the strength
of each step falls off very quickly, so high numbers are practically
useless. (Default 4)
If the performance hit of debanding is too great, you can reduce this
to 2 or 1 with marginal visual quality loss.
``deband-threshold=<0..4096>``
The debanding filter's cut-off threshold. Higher numbers increase the
debanding strength dramatically but progressively diminish image
details. (Default 64)
``deband-range=<1..64>``
The debanding filter's initial radius. The radius increases linearly
for each iteration. A higher radius will find more gradients, but
a lower radius will smooth more aggressively. (Default 8)
``deband-grain=<0..4096>``
Add some extra noise to the image. This significantly helps cover up
remaining quantization artifacts. Higher numbers add more noise.
(Default 48)
``sigmoid-upscaling``
When upscaling, use a sigmoidal color transform to avoid emphasizing
ringing artifacts. This also implies ``linear-scaling``.
@ -840,7 +859,7 @@ Available video output drivers are:
This is equivalent to::
--vo=opengl:scale=spline36:cscale=spline36:dscale=mitchell:dither-depth=auto:fancy-downscaling:sigmoid-upscaling:pbo
--vo=opengl:scale=spline36:cscale=spline36:dscale=mitchell:dither-depth=auto:fancy-downscaling:sigmoid-upscaling:pbo:deband
Note that some cheaper LCDs do dithering that gravely interferes with
``opengl``'s dithering. Disabling dithering with ``dither-depth=no`` helps.

View File

@ -578,6 +578,14 @@ void gl_sc_hadd(struct gl_shader_cache *sc, const char *text)
sc->header_text = talloc_strdup_append(sc->header_text, text);
}
void gl_sc_haddf(struct gl_shader_cache *sc, const char *textf, ...)
{
va_list ap;
va_start(ap, textf);
ta_xvasprintf_append(&sc->header_text, textf, ap);
va_end(ap);
}
const char *gl_sc_loadfile(struct gl_shader_cache *sc, const char *path)
{
if (!path || !path[0] || !sc->global)

View File

@ -125,6 +125,7 @@ void gl_sc_destroy(struct gl_shader_cache *sc);
void gl_sc_add(struct gl_shader_cache *sc, const char *text);
void gl_sc_addf(struct gl_shader_cache *sc, const char *textf, ...);
void gl_sc_hadd(struct gl_shader_cache *sc, const char *text);
void gl_sc_haddf(struct gl_shader_cache *sc, const char *textf, ...);
const char *gl_sc_loadfile(struct gl_shader_cache *sc, const char *path);
void gl_sc_uniform_sampler(struct gl_shader_cache *sc, char *name, GLenum target,
int unit);

View File

@ -157,7 +157,7 @@ struct gl_video {
struct video_image image;
struct fbotex chroma_merge_fbo;
struct fbotex source_fbo;
struct fbotex chroma_deband_fbo;
struct fbotex indirect_fbo;
struct fbotex blend_subs_fbo;
struct fbosurface surfaces[FBOSURFACES_MAX];
@ -341,6 +341,7 @@ const struct gl_video_opts gl_video_opts_hq_def = {
.gamma = 1.0f,
.blend_subs = 0,
.pbo = 1,
.deband = 1,
};
static int validate_scaler_opt(struct mp_log *log, const m_option_t *opt,
@ -411,16 +412,18 @@ const struct m_sub_options gl_video_conf = {
({"no", 0},
{"yes", 1},
{"video", 2})),
OPT_STRING("source-shader", source_shader, 0),
OPT_STRING("scale-shader", scale_shader, 0),
OPT_STRINGLIST("pre-shaders", pre_shaders, 0),
OPT_STRINGLIST("post-shaders", post_shaders, 0),
OPT_FLAG("deband", deband, 0),
OPT_SUBSTRUCT("deband", deband_opts, deband_conf, 0),
OPT_REMOVED("approx-gamma", "this is always enabled now"),
OPT_REMOVED("cscale-down", "chroma is never downscaled"),
OPT_REMOVED("scale-sep", "this is set automatically whenever sane"),
OPT_REMOVED("indirect", "this is set automatically whenever sane"),
OPT_REMOVED("srgb", "use target-prim=bt709:target-trc=srgb instead"),
OPT_REMOVED("source-shader", "use :deband to enable debanding"),
OPT_REPLACED("lscale", "scale"),
OPT_REPLACED("lscale-down", "scale-down"),
@ -531,7 +534,7 @@ static void uninit_rendering(struct gl_video *p)
p->dither_texture = 0;
fbotex_uninit(&p->chroma_merge_fbo);
fbotex_uninit(&p->source_fbo);
fbotex_uninit(&p->chroma_deband_fbo);
fbotex_uninit(&p->indirect_fbo);
fbotex_uninit(&p->blend_subs_fbo);
@ -1091,102 +1094,93 @@ static void pass_read_video(struct gl_video *p)
struct gl_transform chromafix;
pass_set_image_textures(p, &p->image, &chromafix);
// The custom shader logic is a bit tricky, but there are basically three
// different places it can occur: RGB, or chroma *and* luma (which are
// treated separately even for 4:4:4 content, but the minor speed loss
// is not worth the complexity it would require).
const char *shader = gl_sc_loadfile(p->sc, p->opts.source_shader);
// Since this is before normalization, we have to take into account
// the bit depth. Specifically, we want the shader to perform normalization
// to 16 bit because otherwise it results in bad quantization, especially
// with 8-bit FBOs (where it just destroys the image completely)
int in_bits = p->image_desc.component_bits,
tx_bits = (in_bits + 7) & ~7;
float cmul = ((1 << tx_bits) - 1.0) / ((1 << in_bits) - 1.0);
// Custom source shaders are required to output at range [0.0, 1.0]
p->use_normalized_range = shader != NULL;
float tex_mul = ((1 << tx_bits) - 1.0) / ((1 << in_bits) - 1.0);
if (p->image_desc.flags & MP_IMGFLAG_XYZ) {
cmul = 1.0;
p->use_normalized_range = true;
}
bool color_defined = false;
if (p->plane_count > 1) {
// Chroma processing (merging -> debanding -> scaling)
struct src_tex luma = p->pass_tex[0];
struct src_tex alpha = p->pass_tex[3];
int c_w = p->pass_tex[1].src.x1 - p->pass_tex[1].src.x0;
int c_h = p->pass_tex[1].src.y1 - p->pass_tex[1].src.y0;
const struct scaler_config *cscale = &p->opts.scaler[2];
// Special case for non-planar content
if (p->plane_count == 1) {
if (shader) {
load_shader(p, shader);
GLSLF("// custom source-shader (RGB)\n");
gl_sc_uniform_f(p->sc, "cmul", cmul);
GLSL(vec4 color = sample(texture0, texcoord0, texture_size0);)
} else {
GLSL(vec4 color = texture(texture0, texcoord0);)
bool merged = false;
if (p->plane_count > 2) {
// For simplicity and performance, we merge the chroma planes
// into a single texture before scaling or debanding, so the shader
// doesn't need to run multiple times.
GLSLF("// chroma merging\n");
GLSL(vec4 color = vec4(texture(texture1, texcoord1).x,
texture(texture2, texcoord2).x,
0.0, 1.0);)
// We also pull up to the full dynamic range of the texture to avoid
// heavy clipping when using low-bit-depth FBOs
GLSLF("color.xy *= %f;\n", tex_mul);
assert(c_w == p->pass_tex[2].src.x1 - p->pass_tex[2].src.x0);
assert(c_h == p->pass_tex[2].src.y1 - p->pass_tex[2].src.y0);
finish_pass_fbo(p, &p->chroma_merge_fbo, c_w, c_h, 1, 0);
p->use_normalized_range = true;
merged = true;
}
return;
if (p->opts.deband) {
pass_sample_deband(p->sc, p->opts.deband_opts, 1, merged ? 1.0 : tex_mul,
p->image_w, p->image_h, &p->lfg);
GLSL(color.zw = vec2(0.0, 1.0);) // skip unused
finish_pass_fbo(p, &p->chroma_deband_fbo, c_w, c_h, 1, 0);
p->use_normalized_range = true;
}
// Sample either directly or by upscaling
if (p->image_desc.flags & MP_IMGFLAG_SUBSAMPLED) {
GLSLF("// chroma scaling\n");
pass_sample(p, 1, &p->scaler[2], cscale, 1.0,
p->image_w, p->image_h, chromafix);
GLSL(vec2 chroma = color.xy;)
color_defined = true; // pass_sample defines vec4 color
} else {
GLSL(vec2 chroma = texture(texture1, texcoord1).xy;)
}
p->pass_tex[0] = luma; // Restore the luma and alpha planes
p->pass_tex[3] = alpha;
}
// Chroma preprocessing (merging -> shaders -> scaling)
struct src_tex luma = p->pass_tex[0];
struct src_tex alpha = p->pass_tex[3];
int c_w = p->pass_tex[1].src.x1 - p->pass_tex[1].src.x0;
int c_h = p->pass_tex[1].src.y1 - p->pass_tex[1].src.y0;
const struct scaler_config *cscale = &p->opts.scaler[2];
// As an unfortunate side-effect of re-using the vec4 color constant in
// both the luma and chroma stages, vec4 color may or may not be defined
// at this point. If it's missing, define it since the code from here on
// relies on it.
if (!color_defined)
GLSL(vec4 color;)
bool merged = false;
if (p->plane_count > 2) {
// For simplicity and performance, we merge the chroma planes
// into a single texture before scaling or shading, so the shader
// doesn't need to run multiple times.
GLSLF("// chroma merging\n");
GLSL(vec4 color = vec4(texture(texture1, texcoord1).r,
texture(texture2, texcoord2).r,
0.0, 1.0);)
// We also pull up here in this case to avoid the issues described
// above.
GLSLF("color.rg *= %f;\n", cmul);
// Sample the main (luma/RGB) plane. This is inside a sub-block to avoid
// colliding with the vec4 color that may be left over from the chroma
// stuff
GLSL(vec4 main;)
GLSLF("{\n");
if (p->opts.deband) {
pass_sample_deband(p->sc, p->opts.deband_opts, 0, tex_mul,
p->image_w, p->image_h, &p->lfg);
p->use_normalized_range = true;
merged = true;
assert(c_w == p->pass_tex[2].src.x1 - p->pass_tex[2].src.x0);
assert(c_h == p->pass_tex[2].src.y1 - p->pass_tex[2].src.y0);
finish_pass_fbo(p, &p->chroma_merge_fbo, c_w, c_h, 1, 0);
}
if (shader) {
// Chroma plane shader logic
load_shader(p, shader);
gl_sc_uniform_f(p->sc, "cmul", merged ? 1.0 : cmul);
GLSLF("// custom source-shader (chroma)\n");
GLSL(vec4 color = sample(texture1, texcoord1, texture_size1);)
GLSL(color.ba = vec2(0.0, 1.0);) // skip unused
finish_pass_fbo(p, &p->source_fbo, c_w, c_h, 1, 0);
}
if (p->image_desc.flags & MP_IMGFLAG_SUBSAMPLED) {
GLSLF("// chroma scaling\n");
pass_sample(p, 1, &p->scaler[2], cscale, 1.0, p->image_w, p->image_h,
chromafix);
GLSL(vec2 chroma = color.rg;)
}
p->pass_tex[0] = luma; // Restore the luma plane
if (shader) {
load_shader(p, shader);
gl_sc_uniform_f(p->sc, "cmul", cmul);
GLSLF("// custom source-shader (luma)\n");
GLSL(float luma = sample(texture0, texcoord0, texture_size0).r;)
} else {
GLSL(float luma = texture(texture0, texcoord0).r;)
GLSL(vec4 color = texture(texture0, texcoord0);)
if (p->use_normalized_range)
GLSLF("luma *= %f;\n", cmul);
GLSLF("color *= %f;\n", tex_mul);
}
GLSL(main = color;)
GLSLF("}\n");
GLSL(color = vec4(luma, chroma, 1.0);)
p->pass_tex[3] = alpha; // Restore the alpha plane (if set)
// Set up the right combination of planes
GLSL(color = main;)
if (p->plane_count > 1)
GLSL(color.yz = chroma;)
if (p->has_alpha && p->plane_count >= 4) {
GLSL(color.a = texture(texture3, texcoord3).r;)
if (p->use_normalized_range)
GLSLF("color.a *= %f;\n", cmul);
GLSLF("color.a *= %f;\n", tex_mul);
}
}

View File

@ -67,6 +67,14 @@ struct scaler {
struct filter_kernel kernel_storage;
};
struct deband_opts {
int enabled;
int iterations;
float threshold;
float range;
float grain;
};
struct gl_video_opts {
int dumb_mode;
struct scaler_config scaler[4];
@ -97,6 +105,8 @@ struct gl_video_opts {
char *scale_shader;
char **pre_shaders;
char **post_shaders;
int deband;
struct deband_opts *deband_opts;
};
extern const struct m_sub_options gl_video_conf;

View File

@ -27,10 +27,13 @@
#define GLSL(x) gl_sc_add(sc, #x "\n");
#define GLSLF(...) gl_sc_addf(sc, __VA_ARGS__)
#define GLSLH(x) gl_sc_hadd(sc, #x "\n");
#define GLSLHF(...) gl_sc_haddf(sc, __VA_ARGS__)
// Set up shared/commonly used variables
void sampler_prelude(struct gl_shader_cache *sc, int tex_num)
{
GLSLF("#undef tex\n");
GLSLF("#define tex texture%d\n", tex_num);
GLSLF("vec2 pos = texcoord%d;\n", tex_num);
GLSLF("vec2 size = texture_size%d;\n", tex_num);
@ -337,3 +340,89 @@ void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc)
break;
}
}
// Wide usage friendly PRNG, shamelessly stolen from a GLSL tricks forum post.
// Obtain random numbers by calling rand(h), followed by h = permute(h) to
// update the state.
static void prng_init(struct gl_shader_cache *sc, AVLFG *lfg)
{
GLSLH(float mod289(float x) { return x - floor(x / 289.0) * 289.0; })
GLSLH(float permute(float x) { return mod289((34.0*x + 1.0) * x); })
GLSLH(float rand(float x) { return fract(x / 41.0); })
// Initialize the PRNG by hashing the position + a random uniform
GLSL(vec3 _m = vec3(pos, random) + vec3(1.0);)
GLSL(float h = permute(permute(permute(_m.x)+_m.y)+_m.z);)
gl_sc_uniform_f(sc, "random", (double)av_lfg_get(lfg) / UINT32_MAX);
}
const struct deband_opts deband_opts_def = {
.iterations = 4,
.threshold = 64.0,
.range = 8.0,
.grain = 48.0,
};
#define OPT_BASE_STRUCT struct deband_opts
const struct m_sub_options deband_conf = {
.opts = (const m_option_t[]) {
OPT_INTRANGE("iterations", iterations, 0, 1, 16),
OPT_FLOATRANGE("threshold", threshold, 0, 0.0, 4096.0),
OPT_FLOATRANGE("range", range, 0, 1.0, 64.0),
OPT_FLOATRANGE("grain", grain, 0, 0.0, 4096.0),
{0}
},
.size = sizeof(struct deband_opts),
.defaults = &deband_opts_def,
};
// Stochastically sample a debanded result from a given texture
void pass_sample_deband(struct gl_shader_cache *sc, struct deband_opts *opts,
int tex_num, float tex_mul, float img_w, float img_h,
AVLFG *lfg)
{
// Set up common variables and initialize the PRNG
GLSLF("// debanding (tex %d)\n", tex_num);
sampler_prelude(sc, tex_num);
prng_init(sc, lfg);
// Helper: Compute a stochastic approximation of the avg color around a
// pixel
GLSLH(vec4 average(sampler2D tex, vec2 pos, float range, inout float h) {)
// Compute a random rangle and distance
GLSLH(float dist = rand(h) * range; h = permute(h);)
GLSLH(float dir = rand(h) * 6.2831853; h = permute(h);)
GLSLHF("vec2 pt = dist / vec2(%f, %f);\n", img_w, img_h);
GLSLH(vec2 o = vec2(cos(dir), sin(dir));)
// Sample at quarter-turn intervals around the source pixel
GLSLH(vec4 ref[4];)
GLSLH(ref[0] = texture(tex, pos + pt * vec2( o.x, o.y));)
GLSLH(ref[1] = texture(tex, pos + pt * vec2(-o.y, o.x));)
GLSLH(ref[2] = texture(tex, pos + pt * vec2(-o.x, -o.y));)
GLSLH(ref[3] = texture(tex, pos + pt * vec2( o.y, -o.x));)
// Return the (normalized) average
GLSLHF("return %f * (ref[0] + ref[1] + ref[2] + ref[3])/4.0;\n", tex_mul);
GLSLH(})
// Sample the source pixel
GLSLF("vec4 color = %f * texture(tex, pos);\n", tex_mul);
GLSLF("vec4 avg, diff;\n");
for (int i = 1; i <= opts->iterations; i++) {
// Sample the average pixel and use it instead of the original if
// the difference is below the given threshold
GLSLF("avg = average(tex, pos, %f, h);\n", i * opts->range);
GLSL(diff = abs(color - avg);)
GLSLF("color = mix(avg, color, greaterThan(diff, vec4(%f)));\n",
opts->threshold / (i * 16384.0));
}
// Add some random noise to smooth out residual differences
GLSL(vec3 noise;)
GLSL(noise.x = rand(h); h = permute(h);)
GLSL(noise.y = rand(h); h = permute(h);)
GLSL(noise.z = rand(h); h = permute(h);)
GLSLF("color.xyz += %f * (noise - vec3(0.5));\n", opts->grain/8192.0);
}

View File

@ -23,10 +23,15 @@
#ifndef MP_GL_VIDEO_SHADERS_H
#define MP_GL_VIDEO_SHADERS_H
#include <libavutil/lfg.h>
#include "common.h"
#include "utils.h"
#include "video.h"
extern const struct deband_opts deband_opts_def;
extern const struct m_sub_options deband_conf;
void sampler_prelude(struct gl_shader_cache *sc, int tex_num);
void pass_sample_separated_gen(struct gl_shader_cache *sc, struct scaler *scaler,
int d_x, int d_y);
@ -40,4 +45,8 @@ void pass_sample_oversample(struct gl_shader_cache *sc, struct scaler *scaler,
void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc);
void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc);
void pass_sample_deband(struct gl_shader_cache *sc, struct deband_opts *opts,
int tex_num, float tex_mul, float img_w, float img_h,
AVLFG *lfg);
#endif