gl_video: improve dithering

Use a different algorithm to generate the dithering matrix. This
looks much better than the previous ordered dither matrix with its
cross-hatch artifacts.

The matrix generation algorithm as well as its implementation was
contributed by Wessel Dankers aka Fruit. The code in dither.c is
his implementation, reformatted and with static global variables
removed by me.

The new matrix is uploaded as float texture - before this commit, it
was a normal integer fixed point matrix. This means dithering will
be disabled on systems without float textures.

The size of the dithering matrix can be configured, as the matrix is
generated at runtime. The generation of the matrix can take rather
long, and is already unacceptable with size 8. The default is at 6,
which takes about 100 ms on a Core2 Duo system with dither.c compiled
at -O2, which I consider just about acceptable.

The old ordered dithering is still available and can be selected by
putting the dither=ordered sub-option. The ordered dither matrix
generation code was moved to dither.c. This function was originally
written by Uoti Urpala.
This commit is contained in:
wm4 2013-05-26 01:48:39 +02:00
parent 39225ed196
commit 58a7d81dc5
9 changed files with 349 additions and 24 deletions

View File

@ -342,6 +342,24 @@ opengl
detected. Often, LCD panels will do dithering on their own, which
conflicts with vo_opengl's dithering, and leads to ugly output.
dither-size=<2-8>
Set the size of the dither matrix (default: 6). The actual size of
the matrix is ``(N^2) x (N^2)`` for an option value of ``N``, so a
value of 6 gives a size of 64x64. The matrix is generated at startup
time, and a large matrix can take rather long to compute (seconds).
Used for ``fruit`` dithering only.
dither=<fruit|ordered|no>
Select dithering algorithm (default: fruit).
temporal-dither
Enable temporal dithering. (Only active if dithering is enabled in
general.) This changes between 8 different dithering pattern on each
frame by changing the orientation of the tiled dithering matrix.
Unfortunately, this can lead to flicker on LCD displays, since these
have a high reaction time.
debug
Check for OpenGL errors, i.e. call glGetError(). Also request a
debug OpenGL context (which does nothing with current graphics drivers

View File

@ -90,7 +90,7 @@ SOURCES-$(DIRECT3D) += video/out/vo_direct3d.c \
SOURCES-$(DSOUND) += audio/out/ao_dsound.c
SOURCES-$(GL) += video/out/gl_common.c video/out/gl_osd.c \
video/out/vo_opengl.c video/out/gl_lcms.c \
video/out/gl_video.c \
video/out/gl_video.c video/out/dither.c \
video/out/vo_opengl_old.c \
video/out/pnm_loader.c

239
video/out/dither.c Normal file
View File

@ -0,0 +1,239 @@
/******************************************************************************
dither.c - generate a dithering matrix for downsampling images
Copyright © 2013 Wessel Dankers <wsl@fruit.je>
This file is part of mpv.
mpv is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
mpv is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with mpv. If not, see <http://www.gnu.org/licenses/>.
You can alternatively redistribute this file and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
******************************************************************************/
#include <stdio.h>
#include <stdint.h>
#include <stdbool.h>
#include <stdlib.h>
#include <inttypes.h>
#include <string.h>
#include <assert.h>
#include <math.h>
#include <libavutil/lfg.h>
#include "talloc.h"
#include "dither.h"
#define MAX_SIZEB 8
#define MAX_SIZE (1 << MAX_SIZEB)
#define MAX_SIZE2 (MAX_SIZE * MAX_SIZE)
typedef uint_fast32_t index_t;
#define WRAP_SIZE2(k, x) ((index_t)((index_t)(x) & ((k)->size2 - 1)))
#define XY(k, x, y) ((index_t)(((x) | ((y) << (k)->sizeb))))
struct ctx {
unsigned int sizeb, size, size2;
unsigned int gauss_radius;
unsigned int gauss_middle;
uint64_t gauss[MAX_SIZE2];
index_t randomat[MAX_SIZE2];
bool calcmat[MAX_SIZE2];
uint64_t gaussmat[MAX_SIZE2];
index_t unimat[MAX_SIZE2];
AVLFG avlfg;
};
static void makegauss(struct ctx *k, unsigned int sizeb)
{
assert(sizeb >= 1 && sizeb <= MAX_SIZEB);
memset(k, 0, sizeof(*k));
av_lfg_init(&k->avlfg, 123);
k->sizeb = sizeb;
k->size = 1 << k->sizeb;
k->size2 = k->size * k->size;
k->gauss_radius = k->size / 2 - 1;
k->gauss_middle = XY(k, k->gauss_radius, k->gauss_radius);
unsigned int gauss_size = k->gauss_radius * 2 + 1;
unsigned int gauss_size2 = gauss_size * gauss_size;
for (index_t c = 0; c < k->size2; c++)
k->gauss[c] = 0;
long double sigma = -logl(1.5 / UINT64_MAX * gauss_size2) / k->gauss_radius;
for (index_t gy = 0; gy <= k->gauss_radius; gy++) {
for (index_t gx = 0; gx <= gy; gx++) {
int cx = (int)gx - k->gauss_radius;
int cy = (int)gy - k->gauss_radius;
int sq = cx * cx + cy * cy;
long double e = expl(-sqrtl(sq) * sigma);
uint64_t v = e / gauss_size2 * UINT64_MAX;
k->gauss[XY(k, gx, gy)] =
k->gauss[XY(k, gy, gx)] =
k->gauss[XY(k, gx, gauss_size - 1 - gy)] =
k->gauss[XY(k, gy, gauss_size - 1 - gx)] =
k->gauss[XY(k, gauss_size - 1 - gx, gy)] =
k->gauss[XY(k, gauss_size - 1 - gy, gx)] =
k->gauss[XY(k, gauss_size - 1 - gx, gauss_size - 1 - gy)] =
k->gauss[XY(k, gauss_size - 1 - gy, gauss_size - 1 - gx)] = v;
}
}
uint64_t total = 0;
for (index_t c = 0; c < k->size2; c++) {
uint64_t oldtotal = total;
total += k->gauss[c];
assert(total >= oldtotal);
}
}
static void setbit(struct ctx *k, index_t c)
{
if (k->calcmat[c])
return;
k->calcmat[c] = true;
uint64_t *m = k->gaussmat;
uint64_t *me = k->gaussmat + k->size2;
uint64_t *g = k->gauss + WRAP_SIZE2(k, k->gauss_middle + k->size2 - c);
uint64_t *ge = k->gauss + k->size2;
while (g < ge)
*m++ += *g++;
g = k->gauss;
while (m < me)
*m++ += *g++;
}
static index_t getmin(struct ctx *k)
{
uint64_t min = UINT64_MAX;
index_t resnum = 0;
unsigned int size2 = k->size2;
for (index_t c = 0; c < size2; c++) {
if (k->calcmat[c])
continue;
uint64_t total = k->gaussmat[c];
if (total <= min) {
if (total != min) {
min = total;
resnum = 0;
}
k->randomat[resnum++] = c;
}
}
if (resnum == 1)
return k->randomat[0];
if (resnum == size2)
return size2 / 2;
return k->randomat[av_lfg_get(&k->avlfg) % resnum];
}
static void makeuniform(struct ctx *k)
{
unsigned int size2 = k->size2;
for (index_t c = 0; c < size2; c++) {
index_t r = getmin(k);
setbit(k, r);
k->unimat[r] = c;
}
}
// out_matrix is a reactangular tsize * tsize array, where tsize = (1 << size).
void mp_make_fruit_dither_matrix(float *out_matrix, int size)
{
struct ctx *k = talloc(NULL, struct ctx);
makegauss(k, size);
makeuniform(k);
float invscale = k->size2;
for(index_t y = 0; y < k->size; y++) {
for(index_t x = 0; x < k->size; x++)
out_matrix[x + y * k->size] = k->unimat[XY(k, x, y)] / invscale;
}
talloc_free(k);
}
void mp_make_ordered_dither_matrix(unsigned char *m, int size)
{
m[0] = 0;
for (int sz = 1; sz < size; sz *= 2) {
int offset[] = {sz*size, sz, sz * (size+1), 0};
for (int i = 0; i < 4; i++)
for (int y = 0; y < sz * size; y += size)
for (int x = 0; x < sz; x++)
m[x+y+offset[i]] = m[x+y] * 4 + (3-i) * 256/size/size;
}
}
#if 0
static int index_cmp(const void *a, const void *b)
{
index_t x = *(const index_t *)a;
index_t y = *(const index_t *)b;
return x < y ? -1 : x > y;
}
static void fsck(struct ctx *k)
{
qsort(k->unimat, k->size2, sizeof k->unimat[0], index_cmp);
for (index_t c = 0; c < k->size2; c++)
assert(k->unimat[c] == c);
}
uint16_t r[MAX_SIZE2];
static void print(struct ctx *k)
{
#if 0
puts("#include <stdint.h>");
printf("static const int mp_dither_size = %d;\n", k->size);
printf("static const int mp_dither_size2 = %d;\n", k->size2);
printf("static const uint16_t mp_dither_matrix[] = {\n");
for(index_t y = 0; y < k->size; y++) {
printf("\t");
for(index_t x = 0; x < k->size; x++)
printf("%4"PRIuFAST32", ", k->unimat[XY(k, x, y)]);
printf("\n");
}
puts("};");
#else
for(index_t y = 0; y < k->size; y++) {
for(index_t x = 0; x < k->size; x++)
r[XY(k, x, y)] = k->unimat[XY(k, x, y)];
}
#endif
}
#include "osdep/timer.h"
int main(void)
{
mp_time_init();
struct ctx *k = malloc(sizeof(struct ctx));
int64_t s = mp_time_us();
makegauss(k, 6);
makeuniform(k);
print(k);
fsck(k);
int64_t l = mp_time_us() - s;
printf("time: %f ms\n", l / 1000.0);
return 0;
}
#endif

2
video/out/dither.h Normal file
View File

@ -0,0 +1,2 @@
void mp_make_fruit_dither_matrix(float *out_matrix, int size);
void mp_make_ordered_dither_matrix(unsigned char *m, int size);

View File

@ -259,6 +259,7 @@ struct gl_functions gl_functions[] = {
DEF_FN(Uniform2f),
DEF_FN(Uniform3f),
DEF_FN(Uniform1i),
DEF_FN(UniformMatrix2fv),
DEF_FN(UniformMatrix3fv),
DEF_FN(TexImage3D),
{0},

View File

@ -305,6 +305,8 @@ struct GL {
void (GLAPIENTRY *Uniform3f)(GLint, GLfloat, GLfloat, GLfloat);
void (GLAPIENTRY *Uniform4f)(GLint, GLfloat, GLfloat, GLfloat, GLfloat);
void (GLAPIENTRY *Uniform1i)(GLint, GLint);
void (GLAPIENTRY *UniformMatrix2fv)(GLint, GLsizei, GLboolean,
const GLfloat *);
void (GLAPIENTRY *UniformMatrix3fv)(GLint, GLsizei, GLboolean,
const GLfloat *);
void (GLAPIENTRY *UniformMatrix4x3fv)(GLint, GLsizei, GLboolean,

View File

@ -37,6 +37,7 @@
#include "aspect.h"
#include "video/memcpy_pic.h"
#include "bitmap_packer.h"
#include "dither.h"
static const char vo_opengl_shaders[] =
// Generated from gl_video_shaders.glsl
@ -192,6 +193,10 @@ struct gl_video {
int frames_rendered;
// Cached because computing it can take relatively long
int last_dither_matrix_size;
float *last_dither_matrix;
void *scratch;
};
@ -229,6 +234,7 @@ static const char *osd_shaders[SUBBITMAP_COUNT] = {
static const struct gl_video_opts gl_video_opts_def = {
.npot = 1,
.dither_depth = -1,
.dither_size = 6,
.fbo_format = GL_RGB,
.scale_sep = 1,
.scalers = { "bilinear", "bilinear" },
@ -269,6 +275,10 @@ const struct m_sub_options gl_video_conf = {
{"rgba32f", GL_RGBA32F})),
OPT_CHOICE_OR_INT("dither-depth", dither_depth, 0, -1, 16,
({"no", -1}, {"auto", 0})),
OPT_CHOICE("dither", dither_algo, 0,
({"fruit", 0}, {"ordered", 1}, {"no", -1})),
OPT_INTRANGE("dither-size-fruit", dither_size, 0, 2, 8),
OPT_FLAG("temporal-dither", temporal_dither, 0),
OPT_FLAG("alpha", enable_alpha, 0),
{0}
},
@ -754,6 +764,7 @@ static void compile_shaders(struct gl_video *p)
shader_def_opt(&header_final, "USE_3DLUT", p->use_lut_3d);
shader_def_opt(&header_final, "USE_SRGB", p->opts.srgb);
shader_def_opt(&header_final, "USE_DITHER", p->dither_texture != 0);
shader_def_opt(&header_final, "USE_TEMPORAL_DITHER", p->opts.temporal_dither);
if (p->opts.scale_sep && p->scalers[0].kernel) {
header_sep = talloc_strdup(tmp, "");
@ -926,18 +937,6 @@ static void init_scaler(struct gl_video *p, struct scaler *scaler)
debug_check_gl(p, "after initializing scaler");
}
static void make_dither_matrix(unsigned char *m, int size)
{
m[0] = 0;
for (int sz = 1; sz < size; sz *= 2) {
int offset[] = {sz*size, sz, sz * (size+1), 0};
for (int i = 0; i < 4; i++)
for (int y = 0; y < sz * size; y += size)
for (int x = 0; x < sz; x++)
m[x+y+offset[i]] = m[x+y] * 4 + (3-i) * 256/size/size;
}
}
static void init_dither(struct gl_video *p)
{
GL *gl = p->gl;
@ -947,30 +946,54 @@ static void init_dither(struct gl_video *p)
if (p->opts.dither_depth > 0)
dst_depth = p->opts.dither_depth;
if (p->opts.dither_depth < 0)
if (p->opts.dither_depth < 0 || p->opts.dither_algo < 0)
return;
mp_msg(MSGT_VO, MSGL_V, "[gl] Dither to %d.\n", dst_depth);
int tex_size;
void *tex_data;
GLenum tex_type;
unsigned char temp[256];
if (p->opts.dither_algo == 0) {
int sizeb = p->opts.dither_size;
int size = 1 << sizeb;
if (p->last_dither_matrix_size != size) {
p->last_dither_matrix = talloc_realloc(p, p->last_dither_matrix,
float, size * size);
mp_make_fruit_dither_matrix(p->last_dither_matrix, sizeb);
p->last_dither_matrix_size = size;
}
tex_size = size;
tex_type = GL_FLOAT;
tex_data = p->last_dither_matrix;
} else {
assert(sizeof(temp) >= 8 * 8);
mp_make_ordered_dither_matrix(temp, 8);
tex_size = 8;
tex_type = GL_UNSIGNED_BYTE;
tex_data = temp;
}
// This defines how many bits are considered significant for output on
// screen. The superfluous bits will be used for rounded according to the
// screen. The superfluous bits will be used for rounding according to the
// dither matrix. The precision of the source implicitly decides how many
// dither patterns can be visible.
p->dither_quantization = (1 << dst_depth) - 1;
int size = 8;
p->dither_multiply = p->dither_quantization + 1.0 / (size*size);
unsigned char dither[256];
make_dither_matrix(dither, size);
p->dither_size = size;
p->dither_multiply = p->dither_quantization + 1.0 / (tex_size * tex_size);
p->dither_size = tex_size;
gl->ActiveTexture(GL_TEXTURE0 + TEXUNIT_DITHER);
gl->GenTextures(1, &p->dither_texture);
gl->BindTexture(GL_TEXTURE_2D, p->dither_texture);
gl->PixelStorei(GL_UNPACK_ALIGNMENT, 1);
gl->PixelStorei(GL_UNPACK_ROW_LENGTH, 0);
gl->TexImage2D(GL_TEXTURE_2D, 0, GL_RED, size, size, 0, GL_RED,
GL_UNSIGNED_BYTE, dither);
gl->TexImage2D(GL_TEXTURE_2D, 0, GL_RED, tex_size, tex_size, 0, GL_RED,
tex_type, tex_data);
gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
@ -1164,6 +1187,25 @@ static void uninit_video(struct gl_video *p)
fbotex_uninit(p, &p->scale_sep_fbo);
}
static void change_dither_trafo(struct gl_video *p)
{
GL *gl = p->gl;
int program = p->final_program;
int phase = p->frames_rendered % 8u;
float r = phase * (M_PI / 2); // rotate
float m = phase < 4 ? 1 : -1; // mirror
gl->UseProgram(program);
float matrix[2][2] = {{cos(r), -sin(r) },
{sin(r) * m, cos(r) * m}};
gl->UniformMatrix2fv(gl->GetUniformLocation(program, "dither_trafo"),
1, GL_TRUE, &matrix[0][0]);
gl->UseProgram(0);
}
static void render_to_fbo(struct gl_video *p, struct fbotex *fbo, int w, int h,
int tex_w, int tex_h)
{
@ -1206,6 +1248,9 @@ void gl_video_render_frame(struct gl_video *p)
struct video_image *vimg = &p->image;
bool is_flipped = vimg->image_flipped;
if (p->opts.temporal_dither)
change_dither_trafo(p);
if (p->dst_rect.x0 > p->vp_x || p->dst_rect.y0 > p->vp_y
|| p->dst_rect.x1 < p->vp_x + p->vp_w
|| p->dst_rect.y1 < p->vp_y + p->vp_h)
@ -1274,6 +1319,8 @@ void gl_video_render_frame(struct gl_video *p)
gl->UseProgram(0);
p->frames_rendered++;
debug_check_gl(p, "after video rendering");
}
@ -1542,6 +1589,14 @@ static void check_gl_features(struct gl_video *p)
}
}
if (!have_float_tex && p->opts.dither_depth >= 0) {
// only fruit dithering uses float textures
if (p->opts.dither_algo == 0) {
p->opts.dither_depth = -1;
disabled[n_disabled++] = "dithering (float tex.)";
}
}
if (!have_srgb && p->opts.srgb) {
p->opts.srgb = false;
disabled[n_disabled++] = "sRGB";

View File

@ -39,6 +39,9 @@ struct gl_video_opts {
int npot;
int pbo;
int dither_depth;
int dither_algo;
int dither_size;
int temporal_dither;
int fbo_format;
int stereo_mode;
int enable_alpha;

View File

@ -121,6 +121,7 @@ uniform sampler2D lut_l_2d;
uniform sampler3D lut_3d;
uniform sampler2D dither;
uniform mat4x3 colormatrix;
uniform mat2 dither_trafo;
uniform vec3 inv_gamma;
uniform float input_gamma;
uniform float conv_gamma;
@ -376,7 +377,11 @@ void main() {
color.rgb = srgb_compand(color.rgb);
#endif
#ifdef USE_DITHER
float dither_value = texture(dither, gl_FragCoord.xy / dither_size).r;
vec2 dither_pos = gl_FragCoord.xy / dither_size;
#ifdef USE_TEMPORAL_DITHER
dither_pos = dither_trafo * dither_pos;
#endif
float dither_value = texture(dither, dither_pos).r;
color = floor(color * dither_multiply + dither_value ) / dither_quantization;
#endif
#ifdef USE_ALPHA