From 98052873dadf619536c3ab379d45a998f2cf0999 Mon Sep 17 00:00:00 2001 From: wm4 Date: Sat, 31 Mar 2012 01:13:38 +0200 Subject: [PATCH] libvo: add vo_gl3 This new vo is heavily based on vo_gl.c. It provides better scale filters, dithering, and optional color management with LittleCMS2. It requires OpenGL 3. Many features are enabled by default, so it will be slower than vo_gl. However, it can be tuned to behave almost as vo_gl. --- .gitignore | 2 + Makefile | 8 +- configure | 21 + libvo/filter_kernels.c | 279 +++++ libvo/filter_kernels.h | 45 + libvo/gl_common.c | 16 + libvo/gl_common.h | 4 + libvo/video_out.c | 4 + libvo/vo_gl3.c | 2418 +++++++++++++++++++++++++++++++++++++ libvo/vo_gl3_shaders.glsl | 316 +++++ 10 files changed, 3112 insertions(+), 1 deletion(-) create mode 100644 libvo/filter_kernels.c create mode 100644 libvo/filter_kernels.h create mode 100644 libvo/vo_gl3.c create mode 100644 libvo/vo_gl3_shaders.glsl diff --git a/.gitignore b/.gitignore index e90239351c..8c4cd6181a 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,5 @@ /TAGS /locale /po + +libvo/vo_gl3_shaders.h diff --git a/Makefile b/Makefile index f6b869e8a6..ae2267f5ec 100644 --- a/Makefile +++ b/Makefile @@ -451,7 +451,7 @@ SRCS_MPLAYER-$(ESD) += libao2/ao_esd.c SRCS_MPLAYER-$(FBDEV) += libvo/vo_fbdev.c libvo/vo_fbdev2.c SRCS_MPLAYER-$(GGI) += libvo/vo_ggi.c SRCS_MPLAYER-$(GIF) += libvo/vo_gif89a.c -SRCS_MPLAYER-$(GL) += libvo/gl_common.c libvo/vo_gl.c \ +SRCS_MPLAYER-$(GL) += libvo/gl_common.c libvo/vo_gl.c libvo/vo_gl3.c \ pnm_loader.c SRCS_MPLAYER-$(GL_COCOA) += libvo/cocoa_common.m SRCS_MPLAYER-$(GL_SDL) += libvo/sdl_common.c @@ -510,6 +510,7 @@ SRCS_MPLAYER = command.c \ libao2/audio_out.c \ libvo/aspect.c \ libvo/csputils.c \ + libvo/filter_kernels.c \ libvo/geometry.c \ libvo/old_vo_wrapper.c \ libvo/spuenc.c \ @@ -605,6 +606,11 @@ codec-cfg$(EXESUF): codec-cfg.c codec-cfg.h codecs.conf.h: codec-cfg$(EXESUF) etc/codecs.conf ./$^ > $@ +libvo/vo_gl3_shaders.h: libvo/vo_gl3_shaders.glsl + python ./bin_to_header.py $^ $@ + +libvo/vo_gl3.c: libvo/vo_gl3_shaders.h + # ./configure must be rerun if it changed config.mak: configure @echo "############################################################" diff --git a/configure b/configure index 53a9ead726..3731ba9458 100755 --- a/configure +++ b/configure @@ -338,6 +338,7 @@ Optional features: --enable-smb enable Samba (SMB) input [autodetect] --enable-live enable LIVE555 Streaming Media [disable] --enable-nemesi enable Nemesi Streaming Media [autodetect] + --enable-lcms2 enable LCMS2 support [autodetect] --disable-vcd disable VCD support [autodetect] --disable-bluray disable Blu-ray support [autodetect] --disable-dvdnav disable libdvdnav [autodetect] @@ -637,6 +638,7 @@ _xanim=auto _real=auto _live=no _nemesi=auto +_lcms2=auto _native_rtsp=yes _xinerama=auto _mga=auto @@ -990,6 +992,8 @@ for ac_option do --disable-live) _live=no ;; --enable-nemesi) _nemesi=yes ;; --disable-nemesi) _nemesi=no ;; + --enable-lcms2) _lcms2=yes ;; + --disable-lcms2) _lcms2=no ;; --enable-xinerama) _xinerama=yes ;; --disable-xinerama) _xinerama=no ;; --enable-mga) _mga=yes ;; @@ -5726,6 +5730,20 @@ else fi echores "$_qtx" +echocheck "LCMS2 support" +if test "$_lcms2" = auto ; then + _lcms2=no + if pkg_config_add lcms2 ; then + _lcms2=yes + fi +fi +if test "$_lcms2" = yes; then + def_lcms2="#define CONFIG_LCMS2 1" +else + def_lcms2="#undef CONFIG_LCMS2" +fi +echores "$_lcms2" + echocheck "Nemesi Streaming Media libraries" if test "$_nemesi" = auto && test "$networking" = yes ; then _nemesi=no @@ -6518,6 +6536,7 @@ LIBDV = $_libdv LIBDVDCSS_INTERNAL = $_libdvdcss_internal LIBMAD = $_mad LIBNEMESI = $_nemesi +LCMS2 = $_lcms2 LIBNUT = $_libnut LIBPOSTPROC = $libpostproc LIBSMBCLIENT = $_smb @@ -6874,6 +6893,8 @@ $def_smb $def_socklen_t $def_vstream +$def_lcms2 + /* libvo options */ $def_3dfx diff --git a/libvo/filter_kernels.c b/libvo/filter_kernels.c new file mode 100644 index 0000000000..2c2f56ee51 --- /dev/null +++ b/libvo/filter_kernels.c @@ -0,0 +1,279 @@ +/* + * This file is part of mplayer2. + * + * Most code for computing the weights is taken from Anti-Grain Geometry (AGG) + * (licensed under GPL 2 or later), with modifications. + * Copyright (C) 2002-2006 Maxim Shemanarev + * http://vector-agg.cvs.sourceforge.net/viewvc/vector-agg/agg-2.5/include/agg_image_filters.h?view=markup + * + * Also see glumpy (BSD licensed), contains the same code in Python: + * http://code.google.com/p/glumpy/source/browse/glumpy/image/filter.py + * + * Also see: Paul Heckbert's "zoom" + * + * Also see XBMC: ConvolutionKernels.cpp etc. + * + * mplayer2 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mplayer2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with mplayer2; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include +#include +#include +#include + +#include "filter_kernels.h" + +// NOTE: all filters are separable, symmetric, and are intended for use with +// a lookup table/texture. + +const struct filter_kernel *mp_find_filter_kernel(const char *name) +{ + for (const struct filter_kernel *k = mp_filter_kernels; k->name; k++) { + if (strcmp(k->name, name) == 0) + return k; + } + return NULL; +} + +// sizes = sorted list of available filter sizes, terminated with size 0 +// inv_scale = source_size / dest_size +bool mp_init_filter(struct filter_kernel *filter, const int *sizes, + double inv_scale) +{ + // only downscaling requires widening the filter + filter->inv_scale = inv_scale >= 1.0 ? inv_scale : 1.0; + double support = filter->radius * filter->inv_scale; + int size = ceil(2.0 * support); + // round up to smallest available size that's still large enough + if (size < sizes[0]) + size = sizes[0]; + const int *cursize = sizes; + while (size > *cursize && *cursize) + cursize++; + if (*cursize) { + filter->size = *cursize; + return true; + } else { + // The filter doesn't fit - instead of failing completely, use the + // largest filter available. This is incorrect, but better than refusing + // to do anything. + filter->size = cursize[-1]; + filter->inv_scale = filter->size / 2.0 / filter->radius; + return false; + } +} + +// Calculate the 1D filtering kernel for N sample points. +// N = number of samples, which is filter->size +// The weights will be stored in out_w[0] to out_w[N - 1] +// f = x0 - abs(x0), subpixel position in the range [0,1) or [0,1]. +void mp_compute_weights(struct filter_kernel *filter, double f, float *out_w) +{ + assert(filter->size > 0); + double sum = 0; + for (int n = 0; n < filter->size; n++) { + double x = f - (n - filter->size / 2 + 1); + double w = filter->weight(filter, fabs(x) / filter->inv_scale); + out_w[n] = w; + sum += w; + } + //normalize + for (int n = 0; n < filter->size; n++) + out_w[n] /= sum; +} + +// Fill the given array with weights for the range [0.0, 1.0]. The array is +// interpreted as rectangular array of count * filter->size items. +void mp_compute_lut(struct filter_kernel *filter, int count, float *out_array) +{ + for (int n = 0; n < count; n++) { + mp_compute_weights(filter, n / (double)(count - 1), + out_array + filter->size * n); + } +} + +typedef struct filter_kernel kernel; + +static double bilinear(kernel *k, double x) +{ + return 1.0 - x; +} + +static double hanning(kernel *k, double x) +{ + return 0.5 + 0.5 * cos(M_PI * x); +} + +static double hamming(kernel *k, double x) +{ + return 0.54 + 0.46 * cos(M_PI * x); +} + +static double hermite(kernel *k, double x) +{ + return (2.0 * x - 3.0) * x * x + 1.0; +} + +static double quadric(kernel *k, double x) +{ + // NOTE: glumpy uses 0.75, AGG uses 0.5 + if (x < 0.5) + return 0.75 - x * x; + if (x < 1.5) + return 0.5 * (x - 1.5) * (x - 1.5); + return 0; +} + +static double bc_pow3(double x) +{ + return (x <= 0) ? 0 : x * x * x; +} + +static double bicubic(kernel *k, double x) +{ + return (1.0/6.0) * ( bc_pow3(x + 2) + - 4 * bc_pow3(x + 1) + + 6 * bc_pow3(x) + - 4 * bc_pow3(x - 1)); +} + +static double bessel_i0(double epsilon, double x) +{ + double sum = 1; + double y = x * x / 4; + double t = y; + for (int i = 2; t > epsilon; i++) { + sum += t; + t *= y / (i * i); + } + return sum; +} + +static double kaiser(kernel *k, double x) +{ + double a = k->params[0]; + double b = k->params[1]; + double epsilon = 1e-12; + double i0a = 1 / bessel_i0(epsilon, b); + return bessel_i0(epsilon, a * sqrt(1 - x * x)) * i0a; +} + +static double catmull_rom(kernel *k, double x) +{ + if (x < 1.0) + return 0.5 * (2.0 + x * x * (-5.0 + x * 3.0)); + if (x < 2.0) + return 0.5 * (4.0 + x * (-8.0 + x * (5.0 - x))); + return 0; +} + +// Mitchell-Netravali +static double mitchell(kernel *k, double x) +{ + double b = k->params[0]; + double c = k->params[1]; + double + p0 = (6.0 - 2.0 * b) / 6.0, + p2 = (-18.0 + 12.0 * b + 6.0 * c) / 6.0, + p3 = (12.0 - 9.0 * b - 6.0 * c) / 6.0, + q0 = (8.0 * b + 24.0 * c) / 6.0, + q1 = (-12.0 * b - 48.0 * c) / 6.0, + q2 = (6.0 * b + 30.0 * c) / 6.0, + q3 = (-b - 6.0 * c) / 6.0; + if (x < 1.0) + return p0 + x * x * (p2 + x * p3); + if (x < 2.0) + return q0 + x * (q1 + x * (q2 + x * q3)); + return 0; +} + +static double spline16(kernel *k, double x) +{ + if (x < 1.0) + return ((x - 9.0/5.0 ) * x - 1.0/5.0 ) * x + 1.0; + return ((-1.0/3.0 * (x-1) + 4.0/5.0) * (x-1) - 7.0/15.0 ) * (x-1); +} + +static double spline36(kernel *k, double x) +{ + if(x < 1.0) + return ((13.0/11.0 * x - 453.0/209.0) * x - 3.0/209.0) * x + 1.0; + if(x < 2.0) + return ((-6.0/11.0 * (x - 1) + 270.0/209.0) * (x - 1) - 156.0/209.0) + * (x - 1); + return ((1.0/11.0 * (x - 2) - 45.0/209.0) * (x - 2) + 26.0/209.0) + * (x - 2); +} + +static double gaussian(kernel *k, double x) +{ + return exp(-2.0 * x * x) * sqrt(2.0 / M_PI); +} + +static double sinc(kernel *k, double x) +{ + if (x == 0.0) + return 1.0; + double pix = M_PI * x; + return sin(pix) / pix; +} + +static double lanczos(kernel *k, double x) +{ + double radius = k->size / 2; + if (x < -radius || x > radius) + return 0; + if (x == 0) + return 1; + double pix = M_PI * x; + return radius * sin(pix) * sin(pix / radius) / (pix * pix); +} + +static double blackman(kernel *k, double x) +{ + double radius = k->size / 2; + if (x == 0.0) + return 1.0; + if (x > radius) + return 0.0; + x *= M_PI; + double xr = x / radius; + return (sin(x) / x) * (0.42 + 0.5 * cos(xr) + 0.08 * cos(2 * xr)); +} + +const struct filter_kernel mp_filter_kernels[] = { + {"bilinear_slow", 1, bilinear}, + {"hanning", 1, hanning}, + {"hamming", 1, hamming}, + {"hermite", 1, hermite}, + {"quadric", 1.5, quadric}, + {"bicubic", 2, bicubic}, + {"kaiser", 1, kaiser, .params = {6.33, 6.33} }, + {"catmull_rom", 2, catmull_rom}, + {"mitchell", 2, mitchell, .params = {1.0/3.0, 1.0/3.0} }, + {"spline16", 2, spline16}, + {"spline36", 3, spline36}, + {"gaussian", 2, gaussian}, + {"sinc2", 2, sinc}, + {"sinc3", 3, sinc}, + {"sinc4", 4, sinc}, + {"lanczos2", 2, lanczos}, + {"lanczos3", 3, lanczos}, + {"lanczos4", 4, lanczos}, + {"blackman2", 2, blackman}, + {"blackman3", 3, blackman}, + {"blackman4", 4, blackman}, + {0} +}; diff --git a/libvo/filter_kernels.h b/libvo/filter_kernels.h new file mode 100644 index 0000000000..46a392c40a --- /dev/null +++ b/libvo/filter_kernels.h @@ -0,0 +1,45 @@ +/* + * This file is part of mplayer2. + * + * mplayer2 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mplayer2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with mplayer2; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef MPLAYER_FILTER_KERNELS_H +#define MPLAYER_FILTER_KERNELS_H + +#include + +struct filter_kernel { + const char *name; + double radius; + double (*weight)(struct filter_kernel *kernel, double x); + + // The filter params can be changed at runtime. Only used by some filters. + float params[2]; + // The following values are set by mp_init_filter() at runtime. + // Number of coefficients; equals the rounded up radius multiplied with 2. + int size; + double inv_scale; +}; + +extern const struct filter_kernel mp_filter_kernels[]; + +const struct filter_kernel *mp_find_filter_kernel(const char *name); +bool mp_init_filter(struct filter_kernel *filter, const int *sizes, + double scale); +void mp_compute_weights(struct filter_kernel *filter, double f, float *out_w); +void mp_compute_lut(struct filter_kernel *filter, int count, float *out_array); + +#endif /* MPLAYER_FILTER_KERNELS_H */ diff --git a/libvo/gl_common.c b/libvo/gl_common.c index 230e5dc2bd..b867af81b0 100644 --- a/libvo/gl_common.c +++ b/libvo/gl_common.c @@ -2587,3 +2587,19 @@ void uninit_mpglcontext(MPGLContext *ctx) } talloc_free(ctx); } + +void mp_log_source(int mod, int lev, const char *src) +{ + int line = 1; + if (!src) + return; + while (*src) { + const char *end = strchr(src, '\n'); + const char *next = end + 1; + if (!end) + next = end = src + strlen(src); + mp_msg(mod, lev, "[%3d] %.*s\n", line, (int)(end - src), src); + line++; + src = next; + } +} diff --git a/libvo/gl_common.h b/libvo/gl_common.h index 2f6b4b5083..08e21f28b4 100644 --- a/libvo/gl_common.h +++ b/libvo/gl_common.h @@ -431,6 +431,10 @@ void uninit_mpglcontext(MPGLContext *ctx); int create_mpglcontext(struct MPGLContext *ctx, int gl_flags, int gl_version, uint32_t d_width, uint32_t d_height, uint32_t flags); +// print a multi line string with line numbers (e.g. for shader sources) +// mod, lev: module and log level, as in mp_msg() +void mp_log_source(int mod, int lev, const char *src); + //function pointers loaded from the OpenGL library struct GL { void (GLAPIENTRY *Begin)(GLenum); diff --git a/libvo/video_out.c b/libvo/video_out.c index bbab3f0907..094d5b1a12 100644 --- a/libvo/video_out.c +++ b/libvo/video_out.c @@ -84,6 +84,7 @@ extern struct vo_driver video_out_vdpau; extern struct vo_driver video_out_xv; extern struct vo_driver video_out_gl_nosw; extern struct vo_driver video_out_gl; +extern struct vo_driver video_out_gl3; extern struct vo_driver video_out_dga; extern struct vo_driver video_out_sdl; extern struct vo_driver video_out_3dfx; @@ -169,6 +170,9 @@ const struct vo_driver *video_out_drivers[] = #ifdef CONFIG_XV &video_out_xv, #endif +#ifdef CONFIG_GL + &video_out_gl3, +#endif #ifdef CONFIG_X11 #ifdef CONFIG_GL &video_out_gl_nosw, diff --git a/libvo/vo_gl3.c b/libvo/vo_gl3.c new file mode 100644 index 0000000000..1947839816 --- /dev/null +++ b/libvo/vo_gl3.c @@ -0,0 +1,2418 @@ +/* + * This file is part of MPlayer. + * + * MPlayer is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * MPlayer is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with MPlayer; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * You can alternatively redistribute this file and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include "config.h" + +#include + +#ifdef CONFIG_LCMS2 +#include +#include "stream/stream.h" +#endif + +#include "talloc.h" +#include "bstr.h" +#include "mp_msg.h" +#include "subopt-helper.h" +#include "video_out.h" +#include "libmpcodecs/vfcap.h" +#include "libmpcodecs/mp_image.h" +#include "geometry.h" +#include "osd.h" +#include "sub/font_load.h" +#include "sub/sub.h" +#include "eosd_packer.h" + +#include "gl_common.h" +#include "filter_kernels.h" +#include "aspect.h" +#include "fastmemcpy.h" +#include "sub/ass_mp.h" + +// Generated from libvo/vo_gl3_shaders.glsl +#include "libvo/vo_gl3_shaders.h" + +// How many parts the OSD may consist of at most. +#define MAX_OSD_PARTS 20 + +// Pixel width of 1D lookup textures. +#define LOOKUP_TEXTURE_SIZE 256 + +// Texture units 0-2 are used by the video, with unit 0 for free use. +// Units 3-4 are used for scaler LUTs. +#define TEXUNIT_SCALERS 3 +#define TEXUNIT_3DLUT 5 +#define TEXUNIT_DITHER 6 + +// lscale/cscale arguments that map directly to shader filter routines. +// Note that the convolution filters are not included in this list. +static const char *fixed_scale_filters[] = { + "bilinear", + "bicubic_fast", + "sharpen3", + "sharpen5", + NULL +}; + +struct lut_tex_format { + int pixels; + GLint internal_format; + GLenum format; +}; + +// Indexed with filter_kernel->size. +// This must match the weightsN functions in the shader. +// Each entry uses (size+3)/4 pixels per LUT entry, and size/pixels components +// per pixel. +struct lut_tex_format lut_tex_formats[] = { + [2] = {1, GL_RG16F, GL_RG}, + [4] = {1, GL_RGBA16F, GL_RGBA}, + [6] = {2, GL_RGB16F, GL_RGB}, + [8] = {2, GL_RGBA16F, GL_RGBA}, + [12] = {3, GL_RGBA16F, GL_RGBA}, + [16] = {4, GL_RGBA16F, GL_RGBA}, +}; + +// must be sorted, and terminated with 0 +static const int filter_sizes[] = {2, 4, 6, 8, 12, 16, 0}; + +struct vertex { + float position[2]; + uint8_t color[4]; + float texcoord[2]; +}; + +#define VERTEX_ATTRIB_POSITION 0 +#define VERTEX_ATTRIB_COLOR 1 +#define VERTEX_ATTRIB_TEXCOORD 2 + +// 2 triangles primitives per quad = 6 vertices per quad +// (GL_QUAD is deprecated, strips can't be used with EOSD image lists) +#define VERTICES_PER_QUAD 6 + +struct texplane { + int shift_x, shift_y; + GLuint gl_texture; + int gl_buffer; + int buffer_size; + void *buffer_ptr; +}; + +struct scaler { + int index; + const char *name; + float params[2]; + struct filter_kernel *kernel; + GLuint gl_lut; + const char *lut_name; + + // kernel points here + struct filter_kernel kernel_storage; +}; + +struct fbotex { + GLuint fbo; + GLuint texture; + int tex_w, tex_h; // size of .texture + int vp_w, vp_h; // viewport of fbo / used part of the texture +}; + +struct gl_priv { + struct vo *vo; + MPGLContext *glctx; + GL *gl; + const char *shader_version; + + int use_indirect; + int use_gamma; + int use_srgb; + int use_scale_sep; + int use_fancy_downscaling; + int use_lut_3d; + int use_npot; + int use_pbo; + int use_glFinish; + int use_gl_debug; + int use_gl2; + + int dither_depth; + int swap_interval; + GLint fbo_format; + int stereo_mode; + int osd_color; + + GLuint vertex_buffer; + GLuint vao; + + GLuint osd_program, eosd_program; + GLuint indirect_program, scale_sep_program, final_program; + + GLuint osd_textures[MAX_OSD_PARTS]; + int osd_textures_count; + struct vertex osd_va[MAX_OSD_PARTS * VERTICES_PER_QUAD]; + + GLuint eosd_texture; + int eosd_texture_width, eosd_texture_height; + GLuint eosd_buffer; + struct vertex *eosd_va; + struct eosd_packer *eosd; + + GLuint lut_3d_texture; + int lut_3d_w, lut_3d_h, lut_3d_d; + void *lut_3d_data; + + GLuint dither_texture; + float dither_quantization; + float dither_multiply; + + uint32_t image_width; + uint32_t image_height; + uint32_t image_format; + int texture_width; + int texture_height; + + bool is_yuv; + bool is_linear_rgb; + + // per pixel (full pixel when packed, each component when planar) + int plane_bytes; + int plane_bits; + int component_bits; + + GLint gl_internal_format; + GLenum gl_format; + GLenum gl_type; + + int plane_count; + struct texplane planes[3]; + + struct fbotex indirect_fbo; // RGB target + struct fbotex scale_sep_fbo; // first pass when doing 2 pass scaling + + // state for luma (0) and chroma (1) scalers + struct scaler scalers[2]; + // luma scaler parameters (the same are used for chroma) + float scaler_params[2]; + + struct mp_csp_details colorspace; + struct mp_csp_equalizer video_eq; + + int mpi_flipped; + int vo_flipped; + + struct vo_rect src_rect; // displayed part of the source video + struct vo_rect dst_rect; // video rectangle on output window + int border_x, border_y; // OSD borders + int vp_x, vp_y, vp_w, vp_h; // GL viewport +}; + +struct fmt_entry { + int mp_format; + GLint internal_format; + GLenum format; + int component_bits; + GLenum type; +}; + +static const struct fmt_entry mp_to_gl_formats[] = { + {IMGFMT_RGB48NE, GL_RGB16, GL_RGB, 16, GL_UNSIGNED_SHORT}, + {IMGFMT_RGB24, GL_RGB, GL_RGB, 8, GL_UNSIGNED_BYTE}, + {IMGFMT_RGBA, GL_RGBA, GL_RGBA, 8, GL_UNSIGNED_BYTE}, + {IMGFMT_RGB15, GL_RGBA, GL_RGBA, 5, GL_UNSIGNED_SHORT_1_5_5_5_REV}, + {IMGFMT_RGB16, GL_RGB, GL_RGB, 6, GL_UNSIGNED_SHORT_5_6_5_REV}, + {IMGFMT_BGR15, GL_RGBA, GL_BGRA, 5, GL_UNSIGNED_SHORT_1_5_5_5_REV}, + {IMGFMT_BGR16, GL_RGB, GL_RGB, 6, GL_UNSIGNED_SHORT_5_6_5}, + {IMGFMT_BGR24, GL_RGB, GL_BGR, 8, GL_UNSIGNED_BYTE}, + {IMGFMT_BGRA, GL_RGBA, GL_BGRA, 8, GL_UNSIGNED_BYTE}, + {0}, +}; + + +static const char help_text[]; + +static void uninit_rendering(struct gl_priv *p); +static void delete_shaders(struct gl_priv *p); + + +static void default_tex_params(struct GL *gl, GLenum target, GLint filter) +{ + gl->TexParameteri(target, GL_TEXTURE_MIN_FILTER, filter); + gl->TexParameteri(target, GL_TEXTURE_MAG_FILTER, filter); + gl->TexParameteri(target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + gl->TexParameteri(target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); +} + +static void debug_check_gl(struct gl_priv *p, const char *msg) +{ + if (p->use_gl_debug) + glCheckError(p->gl, msg); +} + +static void tex_size(struct gl_priv *p, int w, int h, int *texw, int *texh) +{ + if (p->use_npot) { + *texw = w; + *texh = h; + } else { + *texw = 32; + while (*texw < w) + *texw *= 2; + *texh = 32; + while (*texh < h) + *texh *= 2; + } +} + +static void draw_triangles(struct gl_priv *p, struct vertex *vb, int vert_count) +{ + GL *gl = p->gl; + + assert(vert_count % 3 == 0); + + gl->BindBuffer(GL_ARRAY_BUFFER, p->vertex_buffer); + gl->BufferData(GL_ARRAY_BUFFER, vert_count * sizeof(struct vertex), vb, + GL_DYNAMIC_DRAW); + gl->BindBuffer(GL_ARRAY_BUFFER, 0); + + gl->BindVertexArray(p->vao); + gl->DrawArrays(GL_TRIANGLES, 0, vert_count); + gl->BindVertexArray(0); + + debug_check_gl(p, "after rendering"); +} + +// Write a textured quad to a vertex array. +// va = destination vertex array, VERTICES_PER_QUAD entries will be overwritten +// x0, y0, x1, y1 = destination coordinates of the quad +// tx0, ty0, tx1, ty1 = source texture coordinates (usually in pixels) +// texture_w, texture_h = size of the texture, or an inverse factor +// color = optional color for all vertices, NULL for opaque white +// flip = flip vertically +static void write_quad(struct vertex *va, + float x0, float y0, float x1, float y1, + float tx0, float ty0, float tx1, float ty1, + float texture_w, float texture_h, + const uint8_t color[4], bool flip) +{ + static const uint8_t white[4] = { 255, 255, 255, 255 }; + + if (!color) + color = white; + + tx0 /= texture_w; + ty0 /= texture_h; + tx1 /= texture_w; + ty1 /= texture_h; + + if (flip) { + float tmp = ty0; + ty0 = ty1; + ty1 = tmp; + } + +#define COLOR_INIT {color[0], color[1], color[2], color[3]} + va[0] = (struct vertex) { {x0, y0}, COLOR_INIT, {tx0, ty0} }; + va[1] = (struct vertex) { {x0, y1}, COLOR_INIT, {tx0, ty1} }; + va[2] = (struct vertex) { {x1, y0}, COLOR_INIT, {tx1, ty0} }; + va[3] = (struct vertex) { {x1, y1}, COLOR_INIT, {tx1, ty1} }; + va[4] = va[2]; + va[5] = va[1]; +#undef COLOR_INIT +} + +static void fbotex_init(struct gl_priv *p, struct fbotex *fbo, int w, int h) +{ + GL *gl = p->gl; + + assert(!fbo->fbo); + assert(!fbo->texture); + + tex_size(p, w, h, &fbo->tex_w, &fbo->tex_h); + + fbo->vp_w = w; + fbo->vp_h = h; + + mp_msg(MSGT_VO, MSGL_V, "[gl] Create FBO: %dx%d\n", fbo->tex_w, fbo->tex_h); + + gl->GenFramebuffers(1, &fbo->fbo); + gl->GenTextures(1, &fbo->texture); + gl->BindTexture(GL_TEXTURE_2D, fbo->texture); + gl->TexImage2D(GL_TEXTURE_2D, 0, p->fbo_format, fbo->tex_w, fbo->tex_h, 0, + GL_RGB, GL_UNSIGNED_BYTE, NULL); + default_tex_params(gl, GL_TEXTURE_2D, GL_LINEAR); + gl->BindFramebuffer(GL_FRAMEBUFFER, fbo->fbo); + gl->FramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + GL_TEXTURE_2D, fbo->texture, 0); + + if (gl->CheckFramebufferStatus(GL_FRAMEBUFFER) + != GL_FRAMEBUFFER_COMPLETE) + { + mp_msg(MSGT_VO, MSGL_ERR, "[gl] Error: framebuffer completeness " + "check failed!\n"); + } + + gl->BindFramebuffer(GL_FRAMEBUFFER, 0); + + debug_check_gl(p, "after creating framebuffer & associated texture"); +} + +static void fbotex_uninit(struct gl_priv *p, struct fbotex *fbo) +{ + GL *gl = p->gl; + + gl->DeleteFramebuffers(1, &fbo->fbo); + gl->DeleteTextures(1, &fbo->texture); + *fbo = (struct fbotex) {0}; +} + +static void matrix_ortho2d(float m[3][3], float x0, float x1, + float y0, float y1) +{ + memset(m, 0, 9 * sizeof(float)); + m[0][0] = 2.0f / (x1 - x0); + m[1][1] = 2.0f / (y1 - y0); + m[2][0] = -(x1 + x0) / (x1 - x0); + m[2][1] = -(y1 + y0) / (y1 - y0); + m[2][2] = 1.0f; +} + +static void update_uniforms(struct gl_priv *p, GLuint program) +{ + GL *gl = p->gl; + GLint loc; + + if (program == 0) + return; + + gl->UseProgram(program); + + struct mp_csp_params cparams = { + .colorspace = p->colorspace, + .input_bits = p->plane_bits, + .texture_bits = (p->plane_bits + 7) & ~7, + }; + mp_csp_copy_equalizer_values(&cparams, &p->video_eq); + + loc = gl->GetUniformLocation(program, "transform"); + if (loc >= 0) { + float matrix[3][3]; + matrix_ortho2d(matrix, 0, p->vp_w, p->vp_h, 0); + gl->UniformMatrix3fv(loc, 1, GL_FALSE, &matrix[0][0]); + } + + loc = gl->GetUniformLocation(program, "colormatrix"); + if (loc >= 0) { + float yuv2rgb[3][4] = {{0}}; + if (p->is_yuv) + mp_get_yuv2rgb_coeffs(&cparams, yuv2rgb); + gl->UniformMatrix4x3fv(loc, 1, GL_TRUE, &yuv2rgb[0][0]); + } + + gl->Uniform3f(gl->GetUniformLocation(program, "inv_gamma"), + 1.0 / cparams.rgamma, + 1.0 / cparams.ggamma, + 1.0 / cparams.bgamma); + + gl->Uniform1i(gl->GetUniformLocation(program, "texture1"), 0); + gl->Uniform1i(gl->GetUniformLocation(program, "texture2"), 1); + gl->Uniform1i(gl->GetUniformLocation(program, "texture3"), 2); + + gl->Uniform1i(gl->GetUniformLocation(program, "lut_3d"), TEXUNIT_3DLUT); + + for (int n = 0; n < 2; n++) { + const char *lut = p->scalers[n].lut_name; + if (lut) + gl->Uniform1i(gl->GetUniformLocation(program, lut), + TEXUNIT_SCALERS + n); + } + + gl->Uniform1i(gl->GetUniformLocation(program, "dither"), TEXUNIT_DITHER); + gl->Uniform1f(gl->GetUniformLocation(program, "dither_quantization"), + p->dither_quantization); + gl->Uniform1f(gl->GetUniformLocation(program, "dither_multiply"), + p->dither_multiply); + + float sparam1 = p->scaler_params[0]; + gl->Uniform1f(gl->GetUniformLocation(program, "filter_param1"), + isnan(sparam1) ? 0.5f : sparam1); + + gl->UseProgram(0); + + debug_check_gl(p, "update_uniforms()"); +} + +static void update_all_uniforms(struct gl_priv *p) +{ + update_uniforms(p, p->osd_program); + update_uniforms(p, p->eosd_program); + update_uniforms(p, p->indirect_program); + update_uniforms(p, p->scale_sep_program); + update_uniforms(p, p->final_program); +} + +#define SECTION_HEADER "#!section " + +static char *get_section(void *talloc_ctx, struct bstr source, + const char *section) +{ + char *res = talloc_strdup(talloc_ctx, ""); + bool copy = false; + while (source.len) { + struct bstr line = bstr_getline(source, &source); + if (bstr_eatstart(&line, bstr(SECTION_HEADER))) { + copy = bstrcmp0(line, section) == 0; + } else if (copy) { + res = talloc_asprintf_append_buffer(res, "%.*s\n", BSTR_P(line)); + } + } + return res; +} + +static char *t_concat(void *talloc_ctx, const char *s1, const char *s2) +{ + return talloc_asprintf(talloc_ctx, "%s%s", s1, s2); +} + +static GLuint create_shader(GL *gl, GLenum type, const char *header, + const char *source) +{ + void *tmp = talloc_new(NULL); + const char *full_source = t_concat(tmp, header, source); + + GLuint shader = gl->CreateShader(type); + gl->ShaderSource(shader, 1, &full_source, NULL); + gl->CompileShader(shader); + GLint status; + gl->GetShaderiv(shader, GL_COMPILE_STATUS, &status); + GLint log_length; + gl->GetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length); + + int pri = status ? (log_length > 1 ? MSGL_V : MSGL_DBG2) : MSGL_ERR; + const char *typestr = type == GL_VERTEX_SHADER ? "vertex" : "fragment"; + if (mp_msg_test(MSGT_VO, pri)) { + mp_msg(MSGT_VO, pri, "[gl] %s shader source:\n", typestr); + mp_log_source(MSGT_VO, pri, full_source); + } + if (log_length > 1) { + GLchar *log = talloc_zero_size(tmp, log_length + 1); + gl->GetShaderInfoLog(shader, log_length, NULL, log); + mp_msg(MSGT_VO, pri, "[gl] %s shader compile log (status=%d):\n%s\n", + typestr, status, log); + } + + talloc_free(tmp); + + return shader; +} + +static void prog_create_shader(GL *gl, GLuint program, GLenum type, + const char *header, const char *source) +{ + GLuint shader = create_shader(gl, type, header, source); + gl->AttachShader(program, shader); + gl->DeleteShader(shader); +} + +static void link_shader(GL *gl, GLuint program) +{ + gl->LinkProgram(program); + GLint status; + gl->GetProgramiv(program, GL_LINK_STATUS, &status); + GLint log_length; + gl->GetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length); + + int pri = status ? (log_length > 1 ? MSGL_V : MSGL_DBG2) : MSGL_ERR; + if (mp_msg_test(MSGT_VO, pri)) { + GLchar *log = talloc_zero_size(NULL, log_length + 1); + gl->GetProgramInfoLog(program, log_length, NULL, log); + mp_msg(MSGT_VO, pri, "[gl] shader link log (status=%d): %s\n", + status, log); + talloc_free(log); + } +} + +static void bind_attrib_locs(GL *gl, GLuint program) +{ + gl->BindAttribLocation(program, VERTEX_ATTRIB_POSITION, "vertex_position"); + gl->BindAttribLocation(program, VERTEX_ATTRIB_COLOR, "vertex_color"); + gl->BindAttribLocation(program, VERTEX_ATTRIB_TEXCOORD, "vertex_texcoord"); +} + +static GLuint create_program(GL *gl, const char *name, const char *header, + const char *vertex, const char *frag) +{ + mp_msg(MSGT_VO, MSGL_V, "[gl] compiling shader program '%s'\n", name); + mp_msg(MSGT_VO, MSGL_V, "[gl] header:\n"); + mp_log_source(MSGT_VO, MSGL_V, header); + GLuint prog = gl->CreateProgram(); + prog_create_shader(gl, prog, GL_VERTEX_SHADER, header, vertex); + prog_create_shader(gl, prog, GL_FRAGMENT_SHADER, header, frag); + bind_attrib_locs(gl, prog); + link_shader(gl, prog); + return prog; +} + +static void shader_def(char **shader, const char *name, + const char *value) +{ + *shader = talloc_asprintf_append(*shader, "#define %s %s\n", name, value); +} + +static void shader_def_opt(char **shader, const char *name, bool b) +{ + if (b) + shader_def(shader, name, "1"); +} + +static void shader_setup_scaler(char **shader, struct scaler *scaler, int pass) +{ + const char *target = scaler->index == 0 ? "SAMPLE_L" : "SAMPLE_C"; + if (!scaler->kernel) { + *shader = talloc_asprintf_append(*shader, "#define %s sample_%s\n", + target, scaler->name); + } else { + int size = scaler->kernel->size; + if (pass != -1) { + // The direction/pass assignment is rather arbitrary, but fixed in + // other parts of the code (like FBO setup). + const char *direction = pass == 0 ? "0, 1" : "1, 0"; + *shader = talloc_asprintf_append(*shader, "#define %s(p0, p1) " + "sample_convolution_sep%d(vec2(%s), %s, p0, p1)\n", + target, size, direction, scaler->lut_name); + } else { + *shader = talloc_asprintf_append(*shader, "#define %s(p0, p1) " + "sample_convolution%d(%s, p0, p1)\n", + target, size, scaler->lut_name); + } + } +} + +// return false if RGB or 4:4:4 YUV +static bool input_is_subsampled(struct gl_priv *p) +{ + for (int i = 0; i < p->plane_count; i++) + if (p->planes[i].shift_x || p->planes[i].shift_y) + return true; + return false; +} + +static void compile_shaders(struct gl_priv *p) +{ + GL *gl = p->gl; + + delete_shaders(p); + + void *tmp = talloc_new(NULL); + + struct bstr src = { (char*)vo_gl3_shaders, sizeof(vo_gl3_shaders) }; + char *vertex_shader = get_section(tmp, src, "vertex_all"); + char *shader_prelude = get_section(tmp, src, "prelude"); + char *s_video = get_section(tmp, src, "frag_video"); + char *s_eosd = get_section(tmp, src, "frag_eosd"); + char *s_osd = get_section(tmp, src, "frag_osd"); + + char *header = talloc_asprintf(tmp, "#version %s\n%s", p->shader_version, + shader_prelude); + + char *header_eosd = talloc_strdup(tmp, header); + shader_def_opt(&header_eosd, "USE_3DLUT", p->use_lut_3d); + + p->eosd_program = + create_program(gl, "eosd", header_eosd, vertex_shader, s_eosd); + + p->osd_program = + create_program(gl, "osd", header, vertex_shader, s_osd); + + char *header_conv = talloc_strdup(tmp, ""); + char *header_final = talloc_strdup(tmp, ""); + char *header_sep = NULL; + + bool convert_input_to_linear = !p->is_linear_rgb + && (p->use_srgb || p->use_lut_3d); + + shader_def_opt(&header_conv, "USE_PLANAR", p->plane_count > 1); + shader_def_opt(&header_conv, "USE_GBRP", p->image_format == IMGFMT_GBRP); + shader_def_opt(&header_conv, "USE_YGRAY", p->is_yuv && p->plane_count == 1); + shader_def_opt(&header_conv, "USE_COLORMATRIX", p->is_yuv); + shader_def_opt(&header_conv, "USE_LINEAR_CONV", convert_input_to_linear); + + shader_def_opt(&header_final, "USE_LINEAR_CONV_INV", p->use_lut_3d); + shader_def_opt(&header_final, "USE_GAMMA_POW", p->use_gamma); + shader_def_opt(&header_final, "USE_3DLUT", p->use_lut_3d); + shader_def_opt(&header_final, "USE_DITHER", p->dither_texture != 0); + + if (p->use_scale_sep && p->scalers[0].kernel) { + header_sep = talloc_strdup(tmp, ""); + shader_def_opt(&header_sep, "FIXED_SCALE", true); + shader_setup_scaler(&header_sep, &p->scalers[0], 0); + shader_setup_scaler(&header_final, &p->scalers[0], 1); + } else { + shader_setup_scaler(&header_final, &p->scalers[0], -1); + } + + // We want to do scaling in linear light. Scaling is closely connected to + // texture sampling due to how the shader is structured (or if GL bilinear + // scaling is used). The purpose of the "indirect" pass is to convert the + // input video to linear RGB. + // Another purpose is reducing input to a single texture for scaling. + bool use_indirect = p->use_indirect; + + // Don't sample from input video textures before converting the input to + // linear light. (Unneeded when sRGB textures are used.) + if (convert_input_to_linear) + use_indirect = true; + + // It doesn't make sense to scale the chroma with cscale in the 1. scale + // step and with lscale in the 2. step. If the chroma is subsampled, a + // convolution filter wouldn't even work entirely correctly, because the + // luma scaler would sample two texels instead of one per tap for chroma. + // Also, even with 4:4:4 YUV or planar RGB, the indirection might be faster, + // because the shader can't use one scaler for sampling from 3 textures. It + // has to fetch the coefficients for each texture separately, even though + // they're the same (this is not an inherent restriction, but would require + // to restructure the shader). + if (header_sep && p->plane_count > 1) + use_indirect = true; + + if (input_is_subsampled(p)) { + shader_setup_scaler(&header_conv, &p->scalers[1], -1); + } else { + // Force using the luma scaler on chroma. If the "indirect" stage is + // used, the actual scaling will happen in the next stage. + shader_def(&header_conv, "SAMPLE_C", + use_indirect ? "sample_bilinear" : "SAMPLE_L"); + } + + if (use_indirect) { + // We don't use filtering for the Y-plane (luma), because it's never + // scaled in this scenario. + shader_def(&header_conv, "SAMPLE_L", "sample_bilinear"); + shader_def_opt(&header_conv, "FIXED_SCALE", true); + header_conv = t_concat(tmp, header, header_conv); + p->indirect_program = + create_program(gl, "indirect", header_conv, vertex_shader, s_video); + } else if (header_sep) { + header_sep = t_concat(tmp, header_sep, header_conv); + } else { + header_final = t_concat(tmp, header_final, header_conv); + } + + if (header_sep) { + header_sep = t_concat(tmp, header, header_sep); + p->scale_sep_program = + create_program(gl, "scale_sep", header_sep, vertex_shader, s_video); + } + + header_final = t_concat(tmp, header, header_final); + p->final_program = + create_program(gl, "final", header_final, vertex_shader, s_video); + + debug_check_gl(p, "shader compilation"); + + talloc_free(tmp); +} + +static void delete_program(GL *gl, GLuint *prog) +{ + gl->DeleteProgram(*prog); + *prog = 0; +} + +static void delete_shaders(struct gl_priv *p) +{ + GL *gl = p->gl; + + delete_program(gl, &p->osd_program); + delete_program(gl, &p->eosd_program); + delete_program(gl, &p->indirect_program); + delete_program(gl, &p->scale_sep_program); + delete_program(gl, &p->final_program); +} + +static double get_scale_factor(struct gl_priv *p) +{ + double sx = p->dst_rect.width / (double)p->src_rect.width; + double sy = p->dst_rect.height / (double)p->src_rect.height; + // xxx: actually we should use different scalers in X/Y directions if the + // scale factors are different due to anamorphic content + return FFMIN(sx, sy); +} + +static bool update_scale_factor(struct gl_priv *p, struct filter_kernel *kernel) +{ + double scale = get_scale_factor(p); + if (!p->use_fancy_downscaling && scale < 1.0) + scale = 1.0; + return mp_init_filter(kernel, filter_sizes, FFMAX(1.0, 1.0 / scale)); +} + +static void init_scaler(struct gl_priv *p, struct scaler *scaler) +{ + GL *gl = p->gl; + + assert(scaler->name); + + scaler->kernel = NULL; + + const struct filter_kernel *t_kernel = mp_find_filter_kernel(scaler->name); + if (!t_kernel) + return; + + scaler->kernel_storage = *t_kernel; + scaler->kernel = &scaler->kernel_storage; + + for (int n = 0; n < 2; n++) { + if (!isnan(p->scaler_params[n])) + scaler->kernel->params[n] = p->scaler_params[n]; + } + + update_scale_factor(p, scaler->kernel); + + int size = scaler->kernel->size; + assert(size < FF_ARRAY_ELEMS(lut_tex_formats)); + struct lut_tex_format *fmt = &lut_tex_formats[size]; + bool use_2d = fmt->pixels > 1; + bool is_luma = scaler->index == 0; + scaler->lut_name = use_2d + ? (is_luma ? "lut_l_2d" : "lut_c_2d") + : (is_luma ? "lut_l_1d" : "lut_c_1d"); + + gl->ActiveTexture(GL_TEXTURE0 + TEXUNIT_SCALERS + scaler->index); + GLenum target = use_2d ? GL_TEXTURE_2D : GL_TEXTURE_1D; + + if (!scaler->gl_lut) + gl->GenTextures(1, &scaler->gl_lut); + + gl->BindTexture(target, scaler->gl_lut); + gl->PixelStorei(GL_UNPACK_ALIGNMENT, 4); + gl->PixelStorei(GL_UNPACK_ROW_LENGTH, 0); + + float *weights = talloc_array(NULL, float, LOOKUP_TEXTURE_SIZE * size); + mp_compute_lut(scaler->kernel, LOOKUP_TEXTURE_SIZE, weights); + if (use_2d) { + gl->TexImage2D(GL_TEXTURE_2D, 0, fmt->internal_format, fmt->pixels, + LOOKUP_TEXTURE_SIZE, 0, fmt->format, GL_FLOAT, + weights); + } else { + gl->TexImage1D(GL_TEXTURE_1D, 0, fmt->internal_format, + LOOKUP_TEXTURE_SIZE, 0, fmt->format, GL_FLOAT, + weights); + } + talloc_free(weights); + + gl->TexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + gl->TexParameteri(target, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + gl->TexParameteri(target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + gl->TexParameteri(target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + + gl->ActiveTexture(GL_TEXTURE0); + + debug_check_gl(p, "after initializing scaler"); +} + +static void make_dither_matrix(unsigned char *m, int size) +{ + m[0] = 0; + for (int sz = 1; sz < size; sz *= 2) { + int offset[] = {sz*size, sz, sz * (size+1), 0}; + for (int i = 0; i < 4; i++) + for (int y = 0; y < sz * size; y += size) + for (int x = 0; x < sz; x++) + m[x+y+offset[i]] = m[x+y] * 4 + (3-i) * 256/size/size; + } +} + +static void init_dither(struct gl_priv *p) +{ + GL *gl = p->gl; + + // Assume 8 bits per component if unknown. + int dst_depth = p->glctx->depth_g ? p->glctx->depth_g : 8; + if (p->dither_depth > 0) + dst_depth = p->dither_depth; + + int src_depth = p->component_bits; + if (p->use_lut_3d) + src_depth = 16; + + if (dst_depth >= src_depth || p->dither_depth < 0 || src_depth < 0) + return; + + mp_msg(MSGT_VO, MSGL_V, "[gl] Dither %d->%d.\n", src_depth, dst_depth); + + // This defines how many bits are considered significant for output on + // screen. The superfluous bits will be used for rounded according to the + // dither matrix. The precision of the source implicitly decides how many + // dither patterns can be visible. + p->dither_quantization = (1 << dst_depth) - 1; + int size = 8; + p->dither_multiply = p->dither_quantization + 1.0 / (size*size); + unsigned char dither[256]; + make_dither_matrix(dither, size); + + gl->ActiveTexture(GL_TEXTURE0 + TEXUNIT_DITHER); + gl->GenTextures(1, &p->dither_texture); + gl->BindTexture(GL_TEXTURE_2D, p->dither_texture); + gl->PixelStorei(GL_UNPACK_ALIGNMENT, 1); + gl->PixelStorei(GL_UNPACK_ROW_LENGTH, 0); + gl->TexImage2D(GL_TEXTURE_2D, 0, GL_RED, size, size, 0, GL_RED, + GL_UNSIGNED_BYTE, dither); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); + gl->ActiveTexture(GL_TEXTURE0); +} + +static void reinit_rendering(struct gl_priv *p) +{ + mp_msg(MSGT_VO, MSGL_V, "[gl] Reinit rendering.\n"); + + debug_check_gl(p, "before scaler initialization"); + + uninit_rendering(p); + + init_dither(p); + + init_scaler(p, &p->scalers[0]); + init_scaler(p, &p->scalers[1]); + + compile_shaders(p); + + if (p->indirect_program && !p->indirect_fbo.fbo) + fbotex_init(p, &p->indirect_fbo, p->texture_width, p->texture_height); +} + +static void uninit_rendering(struct gl_priv *p) +{ + GL *gl = p->gl; + + delete_shaders(p); + + for (int n = 0; n < 2; n++) { + gl->DeleteTextures(1, &p->scalers->gl_lut); + p->scalers->gl_lut = 0; + p->scalers->lut_name = NULL; + p->scalers->kernel = NULL; + } + + gl->DeleteTextures(1, &p->dither_texture); + p->dither_texture = 0; +} + +static void init_lut_3d(struct gl_priv *p) +{ + GL *gl = p->gl; + + gl->GenTextures(1, &p->lut_3d_texture); + gl->ActiveTexture(GL_TEXTURE0 + TEXUNIT_3DLUT); + gl->BindTexture(GL_TEXTURE_3D, p->lut_3d_texture); + gl->PixelStorei(GL_UNPACK_ALIGNMENT, 4); + gl->PixelStorei(GL_UNPACK_ROW_LENGTH, 0); + gl->TexImage3D(GL_TEXTURE_3D, 0, GL_RGB16, p->lut_3d_w, p->lut_3d_h, + p->lut_3d_d, 0, GL_RGB, GL_UNSIGNED_SHORT, p->lut_3d_data); + gl->TexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + gl->TexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + gl->TexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + gl->TexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + gl->TexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE); + gl->ActiveTexture(GL_TEXTURE0); + + debug_check_gl(p, "after 3d lut creation"); +} + +static void init_video(struct gl_priv *p) +{ + GL *gl = p->gl; + + if (p->use_lut_3d && !p->lut_3d_texture) + init_lut_3d(p); + + if (!p->is_yuv && (p->use_srgb || p->use_lut_3d)) { + p->is_linear_rgb = true; + p->gl_internal_format = GL_SRGB; + } + + int eq_caps = MP_CSP_EQ_CAPS_GAMMA; + if (p->is_yuv) + eq_caps |= MP_CSP_EQ_CAPS_COLORMATRIX; + p->video_eq.capabilities = eq_caps; + + debug_check_gl(p, "before video texture creation"); + + tex_size(p, p->image_width, p->image_height, + &p->texture_width, &p->texture_height); + + for (int n = 0; n < p->plane_count; n++) { + struct texplane *plane = &p->planes[n]; + + int w = p->texture_width >> plane->shift_x; + int h = p->texture_height >> plane->shift_y; + + mp_msg(MSGT_VO, MSGL_V, "[gl] Texture for plane %d: %dx%d\n", n, w, h); + + gl->ActiveTexture(GL_TEXTURE0 + n); + gl->GenTextures(1, &plane->gl_texture); + gl->BindTexture(GL_TEXTURE_2D, plane->gl_texture); + + gl->TexImage2D(GL_TEXTURE_2D, 0, p->gl_internal_format, w, h, 0, + p->gl_format, p->gl_type, NULL); + default_tex_params(gl, GL_TEXTURE_2D, GL_LINEAR); + } + gl->ActiveTexture(GL_TEXTURE0); + + debug_check_gl(p, "after video texture creation"); + + reinit_rendering(p); +} + +static void uninit_video(struct gl_priv *p) +{ + GL *gl = p->gl; + + uninit_rendering(p); + + for (int n = 0; n < 3; n++) { + struct texplane *plane = &p->planes[n]; + + gl->DeleteTextures(1, &plane->gl_texture); + plane->gl_texture = 0; + gl->DeleteBuffers(1, &plane->gl_buffer); + plane->gl_buffer = 0; + plane->buffer_ptr = NULL; + plane->buffer_size = 0; + } + + fbotex_uninit(p, &p->indirect_fbo); + fbotex_uninit(p, &p->scale_sep_fbo); +} + +static void render_to_fbo(struct gl_priv *p, struct fbotex *fbo, int w, int h, + int tex_w, int tex_h) +{ + GL *gl = p->gl; + + gl->Viewport(0, 0, fbo->vp_w, fbo->vp_h); + gl->BindFramebuffer(GL_FRAMEBUFFER, fbo->fbo); + + struct vertex vb[VERTICES_PER_QUAD]; + write_quad(vb, -1, -1, 1, 1, + 0, 0, w, h, + tex_w, tex_h, + NULL, false); + draw_triangles(p, vb, VERTICES_PER_QUAD); + + gl->BindFramebuffer(GL_FRAMEBUFFER, 0); + gl->Viewport(p->vp_x, p->vp_y, p->vp_w, p->vp_h); + +} + +static void handle_pass(struct gl_priv *p, struct fbotex **source, + struct fbotex *fbo, GLuint program) +{ + GL *gl = p->gl; + + if (!program) + return; + + gl->BindTexture(GL_TEXTURE_2D, (*source)->texture); + gl->UseProgram(program); + render_to_fbo(p, fbo, (*source)->vp_w, (*source)->vp_h, + (*source)->tex_w, (*source)->tex_h); + *source = fbo; +} + +static void do_render(struct gl_priv *p) +{ + GL *gl = p->gl; + struct vertex vb[VERTICES_PER_QUAD]; + bool is_flipped = p->mpi_flipped ^ p->vo_flipped; + + // Order of processing: + // [indirect -> [scale_sep ->]] final + + struct fbotex dummy = { + .vp_w = p->image_width, .vp_h = p->image_height, + .tex_w = p->texture_width, .tex_h = p->texture_height, + .texture = p->planes[0].gl_texture, + }; + struct fbotex *source = &dummy; + + handle_pass(p, &source, &p->indirect_fbo, p->indirect_program); + handle_pass(p, &source, &p->scale_sep_fbo, p->scale_sep_program); + + gl->BindTexture(GL_TEXTURE_2D, source->texture); + gl->UseProgram(p->final_program); + + float final_texw = p->image_width * source->tex_w / (float)source->vp_w; + float final_texh = p->image_height * source->tex_h / (float)source->vp_h; + + if (p->use_srgb && !p->use_lut_3d) + gl->Enable(GL_FRAMEBUFFER_SRGB); + + if (p->stereo_mode) { + int w = p->src_rect.width; + int imgw = p->image_width; + + glEnable3DLeft(gl, p->stereo_mode); + + write_quad(vb, + p->dst_rect.left, p->dst_rect.top, + p->dst_rect.right, p->dst_rect.bottom, + p->src_rect.left / 2, p->src_rect.top, + p->src_rect.left / 2 + w / 2, p->src_rect.bottom, + final_texw, final_texh, + NULL, is_flipped); + draw_triangles(p, vb, VERTICES_PER_QUAD); + + glEnable3DRight(gl, p->stereo_mode); + + write_quad(vb, + p->dst_rect.left, p->dst_rect.top, + p->dst_rect.right, p->dst_rect.bottom, + p->src_rect.left / 2 + imgw / 2, p->src_rect.top, + p->src_rect.left / 2 + imgw / 2 + w / 2, p->src_rect.bottom, + final_texw, final_texh, + NULL, is_flipped); + draw_triangles(p, vb, VERTICES_PER_QUAD); + + glDisable3D(gl, p->stereo_mode); + } else { + write_quad(vb, + p->dst_rect.left, p->dst_rect.top, + p->dst_rect.right, p->dst_rect.bottom, + p->src_rect.left, p->src_rect.top, + p->src_rect.right, p->src_rect.bottom, + final_texw, final_texh, + NULL, is_flipped); + draw_triangles(p, vb, VERTICES_PER_QUAD); + } + + gl->Disable(GL_FRAMEBUFFER_SRGB); + + gl->UseProgram(0); + + debug_check_gl(p, "after video rendering"); +} + +static void update_window_sized_objects(struct gl_priv *p) +{ + if (p->scale_sep_program) { + if (p->dst_rect.height > p->scale_sep_fbo.tex_h) { + fbotex_uninit(p, &p->scale_sep_fbo); + // Round up to an arbitrary alignment to make window resizing or + // panscan controls smoother (less texture reallocations). + int height = FFALIGN(p->dst_rect.height, 256); + fbotex_init(p, &p->scale_sep_fbo, p->image_width, height); + } + p->scale_sep_fbo.vp_w = p->image_width; + p->scale_sep_fbo.vp_h = p->dst_rect.height; + } +} + +static void resize(struct gl_priv *p) +{ + GL *gl = p->gl; + struct vo *vo = p->vo; + + mp_msg(MSGT_VO, MSGL_V, "[gl] Resize: %dx%d\n", vo->dwidth, vo->dheight); + p->vp_x = 0, p->vp_y = 0; + if (WinID >= 0) { + int w = vo->dwidth, h = vo->dheight; + int old_y = vo->dheight; + geometry(&p->vp_x, &p->vp_y, &w, &h, vo->dwidth, vo->dheight); + p->vp_y = old_y - h - p->vp_y; + } + p->vp_w = vo->dwidth, p->vp_h = vo->dheight; + gl->Viewport(p->vp_x, p->vp_y, p->vp_w, p->vp_h); + + struct vo_rect borders; + calc_src_dst_rects(vo, p->image_width, p->image_height, &p->src_rect, + &p->dst_rect, &borders, NULL); + p->border_x = borders.left; + p->border_y = borders.top; + + bool need_scaler_reinit = false; // filter size change needed + bool need_scaler_update = false; // filter LUT change needed + bool too_small = false; + for (int n = 0; n < 2; n++) { + if (p->scalers[n].kernel) { + struct filter_kernel tkernel = *p->scalers[n].kernel; + struct filter_kernel old = tkernel; + bool ok = update_scale_factor(p, &tkernel); + too_small |= !ok; + need_scaler_reinit |= (tkernel.size != old.size); + need_scaler_update |= (tkernel.inv_scale != old.inv_scale); + } + } + if (need_scaler_reinit) { + reinit_rendering(p); + } else if (need_scaler_update) { + init_scaler(p, &p->scalers[0]); + init_scaler(p, &p->scalers[1]); + } + if (too_small) + mp_msg(MSGT_VO, MSGL_WARN, "[gl] Can't downscale that much, window " + "output may look suboptimal.\n"); + + update_window_sized_objects(p); + update_all_uniforms(p); + +#ifdef CONFIG_FREETYPE + // adjust font size to display size + force_load_font = 1; +#endif + vo_osd_changed(OSDTYPE_OSD); + + gl->Clear(GL_COLOR_BUFFER_BIT); + vo->want_redraw = true; +} + +static void flip_page(struct vo *vo) +{ + struct gl_priv *p = vo->priv; + GL *gl = p->gl; + + if (p->use_glFinish) + gl->Finish(); + + p->glctx->swapGlBuffers(p->glctx); + + if (p->dst_rect.left > p->vp_x || p->dst_rect.top > p->vp_y + || p->dst_rect.right < p->vp_x + p->vp_w + || p->dst_rect.bottom < p->vp_y + p->vp_h) + { + gl->Clear(GL_COLOR_BUFFER_BIT); + } +} + +static int draw_slice(struct vo *vo, uint8_t *src[], int stride[], int w, int h, + int x, int y) +{ + struct gl_priv *p = vo->priv; + GL *gl = p->gl; + + p->mpi_flipped = stride[0] < 0; + + for (int n = 0; n < p->plane_count; n++) { + gl->ActiveTexture(GL_TEXTURE0 + n); + gl->BindTexture(GL_TEXTURE_2D, p->planes[n].gl_texture); + int xs = p->planes[n].shift_x, ys = p->planes[n].shift_y; + glUploadTex(gl, GL_TEXTURE_2D, p->gl_format, p->gl_type, src[n], + stride[n], x >> xs, y >> ys, w >> xs, h >> ys, 0); + } + gl->ActiveTexture(GL_TEXTURE0); + + return 0; +} + +static uint32_t get_image(struct vo *vo, mp_image_t *mpi) +{ + struct gl_priv *p = vo->priv; + GL *gl = p->gl; + + if (!p->use_pbo) + return VO_FALSE; + + // We don't support alpha planes. (Disabling PBOs with normal draw calls is + // an undesired, but harmless side-effect.) + if (mpi->num_planes != p->plane_count) + return VO_FALSE; + + if (mpi->flags & MP_IMGFLAG_READABLE) + return VO_FALSE; + if (mpi->type != MP_IMGTYPE_STATIC && mpi->type != MP_IMGTYPE_TEMP && + (mpi->type != MP_IMGTYPE_NUMBERED || mpi->number)) + return VO_FALSE; + mpi->flags &= ~MP_IMGFLAG_COMMON_PLANE; + for (int n = 0; n < p->plane_count; n++) { + struct texplane *plane = &p->planes[n]; + mpi->stride[n] = (mpi->width >> plane->shift_x) * p->plane_bytes; + int needed_size = (mpi->height >> plane->shift_y) * mpi->stride[n]; + if (!plane->gl_buffer) + gl->GenBuffers(1, &plane->gl_buffer); + gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, plane->gl_buffer); + if (needed_size > plane->buffer_size) { + plane->buffer_size = needed_size; + gl->BufferData(GL_PIXEL_UNPACK_BUFFER, plane->buffer_size, + NULL, GL_DYNAMIC_DRAW); + } + if (!plane->buffer_ptr) + plane->buffer_ptr = gl->MapBuffer(GL_PIXEL_UNPACK_BUFFER, + GL_WRITE_ONLY); + mpi->planes[n] = plane->buffer_ptr; + gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + } + mpi->flags |= MP_IMGFLAG_DIRECT; + return VO_TRUE; +} + +static uint32_t draw_image(struct gl_priv *p, mp_image_t *mpi) +{ + GL *gl = p->gl; + int n; + + assert(mpi->num_planes >= p->plane_count); + + mp_image_t mpi2 = *mpi; + int w = mpi->w, h = mpi->h; + if (mpi->flags & MP_IMGFLAG_DRAW_CALLBACK) + goto skip_upload; + mpi2.flags = 0; + mpi2.type = MP_IMGTYPE_TEMP; + mpi2.width = mpi2.w; + mpi2.height = mpi2.h; + if (!(mpi->flags & MP_IMGFLAG_DIRECT) + && !p->planes[0].buffer_ptr + && get_image(p->vo, &mpi2) == VO_TRUE) + { + for (n = 0; n < p->plane_count; n++) { + struct texplane *plane = &p->planes[n]; + int xs = plane->shift_x, ys = plane->shift_y; + int line_bytes = (mpi->w >> xs) * p->plane_bytes; + memcpy_pic(mpi2.planes[n], mpi->planes[n], line_bytes, mpi->h >> ys, + mpi2.stride[n], mpi->stride[n]); + } + mpi = &mpi2; + } + p->mpi_flipped = mpi->stride[0] < 0; + for (n = 0; n < p->plane_count; n++) { + struct texplane *plane = &p->planes[n]; + int xs = plane->shift_x, ys = plane->shift_y; + void *plane_ptr = mpi->planes[n]; + if (mpi->flags & MP_IMGFLAG_DIRECT) { + gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, plane->gl_buffer); + if (!gl->UnmapBuffer(GL_PIXEL_UNPACK_BUFFER)) + mp_msg(MSGT_VO, MSGL_FATAL, "[gl] Video PBO upload failed. " + "Remove the 'pbo' suboption.\n"); + plane->buffer_ptr = NULL; + plane_ptr = NULL; // PBO offset 0 + } + gl->ActiveTexture(GL_TEXTURE0 + n); + gl->BindTexture(GL_TEXTURE_2D, plane->gl_texture); + glUploadTex(gl, GL_TEXTURE_2D, p->gl_format, p->gl_type, plane_ptr, + mpi->stride[n], 0, 0, w >> xs, h >> ys, 0); + } + gl->ActiveTexture(GL_TEXTURE0); + gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); +skip_upload: + do_render(p); + return VO_TRUE; +} + +static mp_image_t *get_screenshot(struct gl_priv *p) +{ + GL *gl = p->gl; + + mp_image_t *image = alloc_mpi(p->texture_width, p->texture_height, + p->image_format); + + // NOTE about image formats with alpha plane: we don't even have the alpha + // anymore. We never upload it to any texture, as it would be a waste of + // time. On the other hand, we can't find a "similar", non-alpha image + // format easily. So we just leave the alpha plane of the newly allocated + // image as-is, and hope that the alpha is ignored by the receiver of the + // screenshot. (If not, code should be added to make it fully opaque.) + + for (int n = 0; n < p->plane_count; n++) { + gl->ActiveTexture(GL_TEXTURE0 + n); + gl->BindTexture(GL_TEXTURE_2D, p->planes[n].gl_texture); + glDownloadTex(gl, GL_TEXTURE_2D, p->gl_format, p->gl_type, + image->planes[n], image->stride[n]); + } + gl->ActiveTexture(GL_TEXTURE0); + + image->width = p->image_width; + image->height = p->image_height; + + image->w = p->vo->aspdat.prew; + image->h = p->vo->aspdat.preh; + + return image; +} + +static mp_image_t *get_window_screenshot(struct gl_priv *p) +{ + GL *gl = p->gl; + + mp_image_t *image = alloc_mpi(p->vp_w, p->vp_h, IMGFMT_RGB24); + gl->PixelStorei(GL_PACK_ALIGNMENT, 4); + gl->PixelStorei(GL_PACK_ROW_LENGTH, 0); + gl->ReadBuffer(GL_FRONT); + // flip image while reading + for (int y = 0; y < p->vp_h; y++) { + gl->ReadPixels(p->vp_x, p->vp_y + p->vp_h - y - 1, p->vp_w, 1, + GL_RGB, GL_UNSIGNED_BYTE, + image->planes[0] + y * image->stride[0]); + } + return image; +} + +static void clear_osd(struct gl_priv *p) +{ + GL *gl = p->gl; + + if (!p->osd_textures_count) + return; + gl->DeleteTextures(p->osd_textures_count, p->osd_textures); + p->osd_textures_count = 0; +} + +static void create_osd_texture(void *ctx, int x0, int y0, int w, int h, + unsigned char *src, unsigned char *srca, + int stride) +{ + struct gl_priv *p = ctx; + GL *gl = p->gl; + + if (w <= 0 || h <= 0 || stride < w) { + mp_msg(MSGT_VO, MSGL_V, "Invalid dimensions OSD for part!\n"); + return; + } + + if (p->osd_textures_count >= MAX_OSD_PARTS) { + mp_msg(MSGT_VO, MSGL_ERR, "Too many OSD parts, contact the developers!\n"); + return; + } + + int sx, sy; + tex_size(p, w, h, &sx, &sy); + + gl->GenTextures(1, &p->osd_textures[p->osd_textures_count]); + gl->BindTexture(GL_TEXTURE_2D, p->osd_textures[p->osd_textures_count]); + gl->TexImage2D(GL_TEXTURE_2D, 0, GL_RG, sx, sy, 0, GL_RG, GL_UNSIGNED_BYTE, + NULL); + default_tex_params(gl, GL_TEXTURE_2D, GL_NEAREST); + unsigned char *tmp = malloc(stride * h * 2); + // Convert alpha from weird MPlayer scale. + for (int i = 0; i < h * stride; i++) { + tmp[i*2+0] = src[i]; + tmp[i*2+1] = -srca[i]; + } + glUploadTex(gl, GL_TEXTURE_2D, GL_RG, GL_UNSIGNED_BYTE, tmp, stride * 2, + 0, 0, w, h, 0); + free(tmp); + + gl->BindTexture(GL_TEXTURE_2D, 0); + + uint8_t color[4] = {(p->osd_color >> 16) & 0xff, (p->osd_color >> 8) & 0xff, + p->osd_color & 0xff, 0xff - (p->osd_color >> 24)}; + + write_quad(&p->osd_va[p->osd_textures_count * VERTICES_PER_QUAD], + x0, y0, x0 + w, y0 + h, 0, 0, w, h, + sx, sy, color, false); + + p->osd_textures_count++; +} + +static void draw_osd(struct vo *vo, struct osd_state *osd) +{ + struct gl_priv *p = vo->priv; + GL *gl = p->gl; + + if (vo_osd_changed(0)) { + clear_osd(p); + osd_draw_text_ext(osd, vo->dwidth, vo->dheight, p->border_x, + p->border_y, p->border_x, + p->border_y, p->image_width, + p->image_height, create_osd_texture, p); + } + + if (p->osd_textures_count > 0) { + gl->Enable(GL_BLEND); + // OSD bitmaps use premultiplied alpha. + gl->BlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA); + + gl->UseProgram(p->osd_program); + + for (int n = 0; n < p->osd_textures_count; n++) { + gl->BindTexture(GL_TEXTURE_2D, p->osd_textures[n]); + draw_triangles(p, &p->osd_va[n * VERTICES_PER_QUAD], + VERTICES_PER_QUAD); + } + + gl->UseProgram(0); + + gl->Disable(GL_BLEND); + gl->BindTexture(GL_TEXTURE_2D, 0); + } +} + +static void gen_eosd(struct gl_priv *p, mp_eosd_images_t *imgs) +{ + GL *gl = p->gl; + + bool need_repos, need_upload, need_allocate; + eosd_packer_generate(p->eosd, imgs, &need_repos, &need_upload, + &need_allocate); + + if (!need_repos) + return; + + if (!p->eosd_texture) { + gl->GenTextures(1, &p->eosd_texture); + gl->GenBuffers(1, &p->eosd_buffer); + } + + gl->BindTexture(GL_TEXTURE_2D, p->eosd_texture); + + if (need_allocate) { + tex_size(p, p->eosd->surface.w, p->eosd->surface.h, + &p->eosd_texture_width, &p->eosd_texture_height); + gl->TexImage2D(GL_TEXTURE_2D, 0, GL_RED, + p->eosd_texture_width, p->eosd_texture_height, 0, + GL_RED, GL_UNSIGNED_BYTE, NULL); + default_tex_params(gl, GL_TEXTURE_2D, GL_NEAREST); + gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, p->eosd_buffer); + gl->BufferData(GL_PIXEL_UNPACK_BUFFER, + p->eosd->surface.w * p->eosd->surface.h, + NULL, + GL_DYNAMIC_COPY); + gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + } + + p->eosd_va = talloc_realloc_size(p->eosd, p->eosd_va, + p->eosd->targets_count + * sizeof(struct vertex) + * VERTICES_PER_QUAD); + + if (need_upload && p->use_pbo) { + gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, p->eosd_buffer); + char *data = gl->MapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY); + if (!data) { + mp_msg(MSGT_VO, MSGL_FATAL, "[gl] Error: can't upload subtitles! " + "Subtitles will look corrupted.\n"); + } else { + for (int n = 0; n < p->eosd->targets_count; n++) { + struct eosd_target *target = &p->eosd->targets[n]; + ASS_Image *i = target->ass_img; + + void *pdata = data + target->source.y0 * p->eosd->surface.w + + target->source.x0; + + memcpy_pic(pdata, i->bitmap, i->w, i->h, + p->eosd->surface.w, i->stride); + } + if (!gl->UnmapBuffer(GL_PIXEL_UNPACK_BUFFER)) + mp_msg(MSGT_VO, MSGL_FATAL, "[gl] EOSD PBO upload failed. " + "Remove the 'pbo' suboption.\n"); + struct eosd_rect rc; + eosd_packer_calculate_source_bb(p->eosd, &rc); + glUploadTex(gl, GL_TEXTURE_2D, GL_RED, GL_UNSIGNED_BYTE, NULL, + p->eosd->surface.w, rc.x0, rc.y0, + rc.x1 - rc.x0, rc.y1 - rc.y0, 0); + } + gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + } else if (need_upload) { + // non-PBO upload + for (int n = 0; n < p->eosd->targets_count; n++) { + struct eosd_target *target = &p->eosd->targets[n]; + ASS_Image *i = target->ass_img; + + glUploadTex(gl, GL_TEXTURE_2D, GL_RED, GL_UNSIGNED_BYTE, i->bitmap, + i->stride, target->source.x0, target->source.y0, + i->w, i->h, 0); + } + } + + gl->BindTexture(GL_TEXTURE_2D, 0); + + debug_check_gl(p, "EOSD upload"); + + for (int n = 0; n < p->eosd->targets_count; n++) { + struct eosd_target *target = &p->eosd->targets[n]; + ASS_Image *i = target->ass_img; + uint8_t color[4] = { i->color >> 24, (i->color >> 16) & 0xff, + (i->color >> 8) & 0xff, 255 - (i->color & 0xff) }; + + write_quad(&p->eosd_va[n * VERTICES_PER_QUAD], + target->dest.x0, target->dest.y0, + target->dest.x1, target->dest.y1, + target->source.x0, target->source.y0, + target->source.x1, target->source.y1, + p->eosd_texture_width, p->eosd_texture_height, + color, false); + } +} + +static void draw_eosd(struct gl_priv *p, mp_eosd_images_t *imgs) +{ + GL *gl = p->gl; + + gen_eosd(p, imgs); + + if (p->eosd->targets_count == 0) + return; + + gl->Enable(GL_BLEND); + gl->BlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + gl->BindTexture(GL_TEXTURE_2D, p->eosd_texture); + gl->UseProgram(p->eosd_program); + draw_triangles(p, p->eosd_va, p->eosd->targets_count * VERTICES_PER_QUAD); + gl->UseProgram(0); + gl->BindTexture(GL_TEXTURE_2D, 0); + gl->Disable(GL_BLEND); +} + +static void setup_vertex_array(GL *gl) +{ + size_t stride = sizeof(struct vertex); + + gl->EnableVertexAttribArray(VERTEX_ATTRIB_POSITION); + gl->VertexAttribPointer(VERTEX_ATTRIB_POSITION, 2, GL_FLOAT, GL_FALSE, + stride, (void*)offsetof(struct vertex, position)); + + gl->EnableVertexAttribArray(VERTEX_ATTRIB_COLOR); + gl->VertexAttribPointer(VERTEX_ATTRIB_COLOR, 4, GL_UNSIGNED_BYTE, GL_TRUE, + stride, (void*)offsetof(struct vertex, color)); + + gl->EnableVertexAttribArray(VERTEX_ATTRIB_TEXCOORD); + gl->VertexAttribPointer(VERTEX_ATTRIB_TEXCOORD, 2, GL_FLOAT, GL_FALSE, + stride, (void*)offsetof(struct vertex, texcoord)); +} + +static int init_gl(struct gl_priv *p) +{ + GL *gl = p->gl; + + debug_check_gl(p, "before init_gl"); + + const char *vendor = gl->GetString(GL_VENDOR); + const char *version = gl->GetString(GL_VERSION); + const char *renderer = gl->GetString(GL_RENDERER); + const char *glsl = gl->GetString(GL_SHADING_LANGUAGE_VERSION); + mp_msg(MSGT_VO, MSGL_V, "[gl] GL_RENDERER='%s', GL_VENDOR='%s', " + "GL_VERSION='%s', GL_SHADING_LANGUAGE_VERSION='%s'" + "\n", renderer, vendor, version, glsl); + mp_msg(MSGT_VO, MSGL_V, "[gl] Display depth: R=%d, G=%d, B=%d\n", + p->glctx->depth_r, p->glctx->depth_g, p->glctx->depth_b); + + GLint major, minor; + gl->GetIntegerv(GL_MAJOR_VERSION, &major); + gl->GetIntegerv(GL_MINOR_VERSION, &minor); + + p->shader_version = "130"; + + // Hack for OSX: it only creates 3.2 contexts. + if (MPGL_VER(major, minor) >= MPGL_VER(3, 2)) + p->shader_version = "150"; + + gl->Disable(GL_DITHER); + gl->Disable(GL_BLEND); + gl->Disable(GL_DEPTH_TEST); + gl->DepthMask(GL_FALSE); + gl->Disable(GL_CULL_FACE); + gl->DrawBuffer(GL_BACK); + + gl->GenBuffers(1, &p->vertex_buffer); + gl->GenVertexArrays(1, &p->vao); + + gl->BindBuffer(GL_ARRAY_BUFFER, p->vertex_buffer); + gl->BindVertexArray(p->vao); + setup_vertex_array(gl); + gl->BindBuffer(GL_ARRAY_BUFFER, 0); + gl->BindVertexArray(0); + + GLint max_texture_size; + gl->GetIntegerv(GL_MAX_TEXTURE_SIZE, &max_texture_size); + eosd_packer_reinit(p->eosd, max_texture_size, max_texture_size); + + gl->ClearColor(0.0f, 0.0f, 0.0f, 0.0f); + gl->Clear(GL_COLOR_BUFFER_BIT); + if (gl->SwapInterval && p->swap_interval >= 0) + gl->SwapInterval(p->swap_interval); + + debug_check_gl(p, "after init_gl"); + + return 1; +} + +static void uninit_gl(struct gl_priv *p) +{ + GL *gl = p->gl; + + // NOTE: GL functions might not be loaded yet + if (!(p->glctx && p->gl->DeleteTextures)) + return; + + uninit_video(p); + + gl->DeleteVertexArrays(1, &p->vao); + p->vao = 0; + gl->DeleteBuffers(1, &p->vertex_buffer); + p->vertex_buffer = 0; + + clear_osd(p); + gl->DeleteTextures(1, &p->eosd_texture); + p->eosd_texture = 0; + gl->DeleteBuffers(1, &p->eosd_buffer); + p->eosd_buffer = 0; + eosd_packer_reinit(p->eosd, 0, 0); + + gl->DeleteTextures(1, &p->lut_3d_texture); + p->lut_3d_texture = 0; +} + +static bool init_format(int fmt, struct gl_priv *init) +{ + bool supported = false; + struct gl_priv dummy; + if (!init) + init = &dummy; + + mp_image_t dummy_img = {0}; + mp_image_setfmt(&dummy_img, fmt); + + init->image_format = fmt; + init->component_bits = -1; + + // RGB/packed formats + for (const struct fmt_entry *e = mp_to_gl_formats; e->mp_format; e++) { + if (e->mp_format == fmt) { + supported = true; + init->plane_bits = dummy_img.bpp; + init->gl_format = e->format; + init->gl_internal_format = e->internal_format; + init->component_bits = e->component_bits; + init->gl_type = e->type; + break; + } + } + + // YUV/planar formats + if (!supported && mp_get_chroma_shift(fmt, NULL, NULL, &init->plane_bits)) { + init->gl_format = GL_RED; + init->component_bits = init->plane_bits; + if (init->plane_bits == 8) { + supported = true; + init->gl_internal_format = GL_RED; + init->gl_type = GL_UNSIGNED_BYTE; + } else if (IMGFMT_IS_YUVP16_NE(fmt)) { + supported = true; + init->gl_internal_format = GL_R16; + init->gl_type = GL_UNSIGNED_SHORT; + } + } + + // RGB/planar + if (!supported && fmt == IMGFMT_GBRP) { + supported = true; + init->plane_bits = init->component_bits = 8; + init->gl_format = GL_RED; + init->gl_internal_format = GL_RED; + init->gl_type = GL_UNSIGNED_BYTE; + } + + if (!supported) + return false; + + init->plane_bytes = (init->plane_bits + 7) / 8; + init->is_yuv = dummy_img.flags & MP_IMGFLAG_YUV; + init->is_linear_rgb = false; + + // NOTE: we throw away the additional alpha plane, if one exists. + init->plane_count = dummy_img.num_planes > 2 ? 3 : 1; + assert(dummy_img.num_planes >= init->plane_count); + assert(dummy_img.num_planes <= init->plane_count + 1); + + for (int n = 0; n < init->plane_count; n++) { + struct texplane *plane = &init->planes[n]; + + plane->shift_x = n > 0 ? dummy_img.chroma_x_shift : 0; + plane->shift_y = n > 0 ? dummy_img.chroma_y_shift : 0; + } + + return true; +} + +static int query_format(uint32_t format) +{ + int caps = VFCAP_CSP_SUPPORTED | VFCAP_CSP_SUPPORTED_BY_HW | VFCAP_FLIP | + VFCAP_HWSCALE_UP | VFCAP_HWSCALE_DOWN | VFCAP_ACCEPT_STRIDE | + VFCAP_OSD | VFCAP_EOSD | VFCAP_EOSD_UNSCALED; + if (!init_format(format, NULL)) + return 0; + return caps; +} + +static bool config_window(struct gl_priv *p, uint32_t d_width, + uint32_t d_height, uint32_t flags) +{ + if (p->stereo_mode == GL_3D_QUADBUFFER) + flags |= VOFLAG_STEREO; + + int mpgl_version = MPGL_VER(3, 0); + int mpgl_flags = p->use_gl_debug ? MPGLFLAG_DEBUG : 0; + + if (p->use_gl2) + mpgl_version = MPGL_VER(2, 1); + + if (create_mpglcontext(p->glctx, mpgl_flags, mpgl_version, d_width, + d_height, flags) == SET_WINDOW_FAILED) + return false; + + if (!p->vertex_buffer) + init_gl(p); + + return true; +} + +static int config(struct vo *vo, uint32_t width, uint32_t height, + uint32_t d_width, uint32_t d_height, uint32_t flags, + uint32_t format) +{ + struct gl_priv *p = vo->priv; + + if (!config_window(p, d_width, d_height, flags)) + return -1; + + p->vo_flipped = !!(flags & VOFLAG_FLIPPING); + + if (p->image_format != format || p->image_width != width + || p->image_height != height) + { + uninit_video(p); + p->image_height = height; + p->image_width = width; + init_format(format, p); + init_video(p); + } + + resize(p); + + return 0; +} + +static void check_events(struct vo *vo) +{ + struct gl_priv *p = vo->priv; + + int e = p->glctx->check_events(vo); + if (e & VO_EVENT_REINIT) { + uninit_gl(p); + init_gl(p); + init_video(p); + resize(p); + } + if (e & VO_EVENT_RESIZE) + resize(p); + if (e & VO_EVENT_EXPOSE) + vo->want_redraw = true; +} + +static int control(struct vo *vo, uint32_t request, void *data) +{ + struct gl_priv *p = vo->priv; + + switch (request) { + case VOCTRL_QUERY_FORMAT: + return query_format(*(uint32_t *)data); + case VOCTRL_GET_IMAGE: + return get_image(vo, data); + case VOCTRL_DRAW_IMAGE: + return draw_image(p, data); + case VOCTRL_DRAW_EOSD: + if (!data) + return VO_FALSE; + draw_eosd(p, data); + return VO_TRUE; + case VOCTRL_GET_EOSD_RES: { + mp_eosd_res_t *r = data; + r->w = vo->dwidth; + r->h = vo->dheight; + r->ml = r->mr = p->border_x; + r->mt = r->mb = p->border_y; + return VO_TRUE; + } + case VOCTRL_ONTOP: + if (!p->glctx->ontop) + break; + p->glctx->ontop(vo); + return VO_TRUE; + case VOCTRL_FULLSCREEN: + p->glctx->fullscreen(vo); + resize(p); + return VO_TRUE; + case VOCTRL_BORDER: + if (!p->glctx->border) + break; + p->glctx->border(vo); + resize(p); + return VO_TRUE; + case VOCTRL_GET_PANSCAN: + return VO_TRUE; + case VOCTRL_SET_PANSCAN: + resize(p); + return VO_TRUE; + case VOCTRL_GET_EQUALIZER: { + struct voctrl_get_equalizer_args *args = data; + return mp_csp_equalizer_get(&p->video_eq, args->name, args->valueptr) + >= 0 ? VO_TRUE : VO_NOTIMPL; + } + case VOCTRL_SET_EQUALIZER: { + struct voctrl_set_equalizer_args *args = data; + if (mp_csp_equalizer_set(&p->video_eq, args->name, args->value) < 0) + return VO_NOTIMPL; + if (!p->use_gamma && p->video_eq.values[MP_CSP_EQ_GAMMA] != 0) { + mp_msg(MSGT_VO, MSGL_V, "[gl] Auto-enabling gamma.\n"); + p->use_gamma = true; + compile_shaders(p); + } + update_all_uniforms(p); + vo->want_redraw = true; + return VO_TRUE; + } + case VOCTRL_SET_YUV_COLORSPACE: { + if (p->is_yuv) { + p->colorspace = *(struct mp_csp_details *)data; + update_all_uniforms(p); + vo->want_redraw = true; + } + return VO_TRUE; + } + case VOCTRL_GET_YUV_COLORSPACE: + *(struct mp_csp_details *)data = p->colorspace; + return VO_TRUE; + case VOCTRL_UPDATE_SCREENINFO: + if (!p->glctx->update_xinerama_info) + break; + p->glctx->update_xinerama_info(vo); + return VO_TRUE; + case VOCTRL_SCREENSHOT: { + struct voctrl_screenshot_args *args = data; + if (args->full_window) + args->out_image = get_window_screenshot(p); + else + args->out_image = get_screenshot(p); + return true; + } + case VOCTRL_REDRAW_FRAME: + do_render(p); + return true; + } + return VO_NOTIMPL; +} + +static void uninit(struct vo *vo) +{ + struct gl_priv *p = vo->priv; + + uninit_gl(p); + uninit_mpglcontext(p->glctx); + p->glctx = NULL; + p->gl = NULL; +} + +#ifdef CONFIG_LCMS2 + +static void lcms2_error_handler(cmsContext ctx, cmsUInt32Number code, + const char *msg) +{ + mp_msg(MSGT_VO, MSGL_ERR, "[gl] lcms2: %s\n", msg); +} + +static struct bstr load_file(struct gl_priv *p, void *talloc_ctx, + const char *filename) +{ + struct bstr res = {0}; + stream_t *s = open_stream(filename, p->vo->opts, NULL); + if (s) { + res = stream_read_complete(s, talloc_ctx, 1000000000, 0); + free_stream(s); + } + return res; +} + +#define LUT3D_CACHE_HEADER "mplayer2 3dlut cache 1.0\n" + +static bool load_icc(struct gl_priv *p, const char *icc_file, + const char *icc_cache, int icc_intent, + int s_r, int s_g, int s_b) +{ + void *tmp = talloc_new(p); + uint16_t *output = talloc_array(tmp, uint16_t, s_r * s_g * s_b * 3); + + if (icc_intent == -1) + icc_intent = INTENT_ABSOLUTE_COLORIMETRIC; + + mp_msg(MSGT_VO, MSGL_INFO, "[gl] Opening ICC profile '%s'\n", icc_file); + struct bstr iccdata = load_file(p, tmp, icc_file); + if (!iccdata.len) + goto error_exit; + + char *cache_info = talloc_asprintf(tmp, "intent=%d, size=%dx%dx%d\n", + icc_intent, s_r, s_g, s_b); + + // check cache + if (icc_cache) { + mp_msg(MSGT_VO, MSGL_INFO, "[gl] Opening 3D LUT cache in file '%s'.\n", + icc_cache); + struct bstr cachedata = load_file(p, tmp, icc_cache); + if (bstr_eatstart(&cachedata, bstr(LUT3D_CACHE_HEADER)) + && bstr_eatstart(&cachedata, bstr(cache_info)) + && bstr_eatstart(&cachedata, iccdata) + && cachedata.len == talloc_get_size(output)) + { + memcpy(output, cachedata.start, cachedata.len); + goto done; + } else { + mp_msg(MSGT_VO, MSGL_WARN, "[gl] 3D LUT cache invalid!\n"); + } + } + + cmsSetLogErrorHandler(lcms2_error_handler); + + cmsHPROFILE profile = cmsOpenProfileFromMem(iccdata.start, iccdata.len); + if (!profile) + goto error_exit; + + cmsCIExyY d65; + cmsWhitePointFromTemp(&d65, 6504); + static const cmsCIExyYTRIPLE bt709prim = { + .Red = {0.64, 0.33, 1.0}, + .Green = {0.30, 0.60, 1.0}, + .Blue = {0.15, 0.06, 1.0}, + }; + cmsToneCurve *tonecurve = cmsBuildGamma(NULL, 2.2); + cmsHPROFILE vid_profile = cmsCreateRGBProfile(&d65, &bt709prim, + (cmsToneCurve*[3]){tonecurve, tonecurve, tonecurve}); + cmsFreeToneCurve(tonecurve); + cmsHTRANSFORM trafo = cmsCreateTransform(vid_profile, TYPE_RGB_16, + profile, TYPE_RGB_16, + icc_intent, + cmsFLAGS_HIGHRESPRECALC); + cmsCloseProfile(profile); + cmsCloseProfile(vid_profile); + + if (!trafo) + goto error_exit; + + // transform a (s_r)x(s_g)x(s_b) cube, with 3 components per channel + uint16_t *input = talloc_array(tmp, uint16_t, s_r * 3); + for (int b = 0; b < s_b; b++) { + for (int g = 0; g < s_g; g++) { + for (int r = 0; r < s_r; r++) { + input[r * 3 + 0] = r * 65535 / (s_r - 1); + input[r * 3 + 1] = g * 65535 / (s_g - 1); + input[r * 3 + 2] = b * 65535 / (s_b - 1); + } + size_t base = (b * s_r * s_g + g * s_r) * 3; + cmsDoTransform(trafo, input, output + base, s_r); + } + } + + cmsDeleteTransform(trafo); + + if (icc_cache) { + FILE *out = fopen(icc_cache, "wb"); + if (out) { + fprintf(out, "%s%s", LUT3D_CACHE_HEADER, cache_info); + fwrite(iccdata.start, iccdata.len, 1, out); + fwrite(output, talloc_get_size(output), 1, out); + fclose(out); + } + } + +done: + + p->lut_3d_data = talloc_steal(p, output); + p->lut_3d_w = s_r, p->lut_3d_h = s_g, p->lut_3d_d = s_b; + p->use_lut_3d = true; + + talloc_free(tmp); + return true; + +error_exit: + mp_msg(MSGT_VO, MSGL_FATAL, "[gl] Error loading ICC profile.\n"); + talloc_free(tmp); + return false; +} + +#else /* CONFIG_LCMS2 */ + +static bool load_icc(struct gl_priv *p, ...) +{ + mp_msg(MSGT_VO, MSGL_FATAL, "[gl] LCMS2 support not compiled.\n"); + return false; +} + +#endif /* CONFIG_LCMS2 */ + +static bool parse_3dlut_size(const char *s, int *p1, int *p2, int *p3) +{ + if (sscanf(s, "%dx%dx%d", p1, p2, p3) != 3) + return false; + for (int n = 0; n < 3; n++) { + int s = ((int[]) { *p1, *p2, *p3 })[n]; + if (s < 2 || s > 256 || ((s - 1) & s)) + return false; + } + return true; +} + +static int lut3d_size_valid(void *arg) +{ + char *s = *(char **)arg; + int p1, p2, p3; + return parse_3dlut_size(s, &p1, &p2, &p3); +} + +static int backend_valid(void *arg) +{ + return mpgl_find_backend(*(const char **)arg) >= 0; +} + +struct fbo_format { + const char *name; + GLint format; +}; + +const struct fbo_format fbo_formats[] = { + {"rgb", GL_RGB}, + {"rgba", GL_RGBA}, + {"rgb8", GL_RGB8}, + {"rgb16", GL_RGB16}, + {"rgb16f", GL_RGB16F}, + {"rgb32f", GL_RGB32F}, + {0} +}; + +static GLint find_fbo_format(const char *name) +{ + for (const struct fbo_format *fmt = fbo_formats; fmt->name; fmt++) { + if (strcmp(fmt->name, name) == 0) + return fmt->format; + } + return -1; +} + +static int fbo_format_valid(void *arg) +{ + return find_fbo_format(*(const char **)arg) >= 0; +} + +static bool can_use_filter_kernel(const struct filter_kernel *kernel) +{ + if (!kernel) + return false; + struct filter_kernel k = *kernel; + return mp_init_filter(&k, filter_sizes, 1); +} + +static const char* handle_scaler_opt(const char *name) +{ + const struct filter_kernel *kernel = mp_find_filter_kernel(name); + if (can_use_filter_kernel(kernel)) + return kernel->name; + + for (const char **filter = fixed_scale_filters; *filter; filter++) { + if (strcmp(*filter, name) == 0) + return *filter; + } + + return NULL; +} + +static int scaler_valid(void *arg) +{ + return handle_scaler_opt(*(const char **)arg) != NULL; +} + +#if 0 +static void print_scalers(void) +{ + mp_msg(MSGT_VO, MSGL_INFO, "Available scalers:\n"); + for (const char **e = fixed_scale_filters; *e; e++) { + mp_msg(MSGT_VO, MSGL_INFO, " %s\n", *e); + } + for (const struct filter_kernel *e = mp_filter_kernels; e->name; e++) { + if (can_use_filter_kernel(e)) + mp_msg(MSGT_VO, MSGL_INFO, " %s\n", e->name); + } +} +#endif + +static int preinit(struct vo *vo, const char *arg) +{ + struct gl_priv *p = talloc_zero(vo, struct gl_priv); + vo->priv = p; + + *p = (struct gl_priv) { + .vo = vo, + .colorspace = MP_CSP_DETAILS_DEFAULTS, + .use_npot = 1, + .use_pbo = 0, + .swap_interval = 1, + .osd_color = 0xffffff, + .fbo_format = GL_RGB16, + .use_scale_sep = 1, + .use_fancy_downscaling = 1, + .scalers = { + { .index = 0, .name = "lanczos2" }, + { .index = 1, .name = "bilinear" }, + }, + .scaler_params = {NAN, NAN}, + }; + + + char *scalers[2] = {0}; + char *backend_arg = NULL; + char *fbo_format = NULL; + char *icc_profile = NULL; + char *icc_cache = NULL; + int icc_intent = -1; + char *icc_size_str = NULL; + + const opt_t subopts[] = { + {"gamma", OPT_ARG_BOOL, &p->use_gamma}, + {"srgb", OPT_ARG_BOOL, &p->use_srgb}, + {"npot", OPT_ARG_BOOL, &p->use_npot}, + {"pbo", OPT_ARG_BOOL, &p->use_pbo}, + {"glfinish", OPT_ARG_BOOL, &p->use_glFinish}, + {"swapinterval", OPT_ARG_INT, &p->swap_interval}, + {"osdcolor", OPT_ARG_INT, &p->osd_color}, + {"stereo", OPT_ARG_INT, &p->stereo_mode}, + {"lscale", OPT_ARG_MSTRZ, &scalers[0], scaler_valid}, + {"cscale", OPT_ARG_MSTRZ, &scalers[1], scaler_valid}, + {"lparam1", OPT_ARG_FLOAT, &p->scaler_params[0]}, + {"lparam2", OPT_ARG_FLOAT, &p->scaler_params[1]}, + {"fancy-downscaling", OPT_ARG_BOOL, &p->use_fancy_downscaling}, + {"debug", OPT_ARG_BOOL, &p->use_gl_debug}, + {"force-gl2", OPT_ARG_BOOL, &p->use_gl2}, + {"indirect", OPT_ARG_BOOL, &p->use_indirect}, + {"scale-sep", OPT_ARG_BOOL, &p->use_scale_sep}, + {"fbo-format", OPT_ARG_MSTRZ, &fbo_format, fbo_format_valid}, + {"backend", OPT_ARG_MSTRZ, &backend_arg, backend_valid}, + {"icc-profile", OPT_ARG_MSTRZ, &icc_profile}, + {"icc-cache", OPT_ARG_MSTRZ, &icc_cache}, + {"icc-intent", OPT_ARG_INT, &icc_intent}, + {"3dlut-size", OPT_ARG_MSTRZ, &icc_size_str, + lut3d_size_valid}, + {"dither-depth", OPT_ARG_INT, &p->dither_depth}, + {NULL} + }; + + if (subopt_parse(arg, subopts) != 0) { + mp_msg(MSGT_VO, MSGL_FATAL, help_text); + goto err_out; + } + + int backend = backend_arg ? mpgl_find_backend(backend_arg) : GLTYPE_AUTO; + free(backend_arg); + + if (fbo_format) + p->fbo_format = find_fbo_format(fbo_format); + free(fbo_format); + + for (int n = 0; n < 2; n++) { + if (scalers[n]) + p->scalers[n].name = handle_scaler_opt(scalers[n]); + free(scalers[n]); + } + + int s_r = 128, s_g = 256, s_b = 64; + if (icc_size_str) + parse_3dlut_size(icc_size_str, &s_r, &s_g, &s_b); + free(icc_size_str); + + bool success = true; + if (icc_profile) { + success = load_icc(p, icc_profile, icc_cache, icc_intent, + s_r, s_g, s_b); + } + free(icc_profile); + free(icc_cache); + + if (!success) + goto err_out; + + p->eosd = eosd_packer_create(vo); + + p->glctx = init_mpglcontext(backend, vo); + if (!p->glctx) + goto err_out; + p->gl = p->glctx->gl; + + if (true) { + if (!config_window(p, 320, 200, VOFLAG_HIDDEN)) + goto err_out; + // We created a window to test whether the GL context could be + // created and so on. Destroy that window to make sure all state + // associated with it is lost. + uninit(vo); + p->glctx = init_mpglcontext(backend, vo); + if (!p->glctx) + goto err_out; + p->gl = p->glctx->gl; + } + + return 0; + +err_out: + uninit(vo); + return -1; +} + +const struct vo_driver video_out_gl3 = { + .is_new = true, + .info = &(const vo_info_t) { + "OpenGL 3.x", + "gl3", + "Based on vo_gl.c by Reimar Doeffinger", + "" + }, + .preinit = preinit, + .config = config, + .control = control, + .draw_slice = draw_slice, + .draw_osd = draw_osd, + .flip_page = flip_page, + .check_events = check_events, + .uninit = uninit, +}; + +static const char help_text[] = +"\n--vo=gl3 command line help:\n" +"Example: mplayer --vo=gl3:scale-sep:lscale=lanczos2\n" +"\nOptions:\n" +" lscale=\n" +" Set the scaling filter. Possible choices:\n" +" bilinear: bilinear texture filtering (fastest).\n" +" bicubic_fast: bicubic filter (without lookup texture).\n" +" sharpen3: unsharp masking (sharpening) with radius=3.\n" +" sharpen5: unsharp masking (sharpening) with radius=5.\n" +" lanczos2: Lanczos with radius=2 (default, recommended).\n" +" lanczos3: Lanczos with radius=3 (not recommended).\n" +" mitchell: Mitchell-Netravali.\n" +" Default: lanczos2\n" +" lparam1= / lparam2=\n" +" Set parameters for configurable filters. Affects chroma scaler\n" +" as well.\n" +" Filters which use this:\n" +" mitchell: b and c params (defaults: b=1/3 c=1/3)\n" +" kaiser: (defaults: 6.33 6.33)\n" +" sharpen3: lparam1 sets sharpening strength (default: 0.5)\n" +" sharpen5: as with sharpen3\n" +" osdcolor=<0xAARRGGBB>\n" +" Use the given color for the OSD.\n" +" stereo=\n" +" 0: normal display\n" +" 1: side-by-side to red-cyan stereo\n" +" 2: side-by-side to green-magenta stereo\n" +" 3: side-by-side to quadbuffer stereo\n" +" srgb\n" +" Enable gamma-correct scaling by working in linear light. This\n" +" makes use of sRGB textures and framebuffers.\n" +" This option forces the options 'indirect' and 'gamma'.\n" +" NOTE: for BT.709 colorspaces, a gamma of 2.35 is assumed. For\n" +" other YUV colorspaces, 2.2 is assumed. RGB input is always\n" +" assumed to be in sRGB.\n" +" pbo\n" +" Enable use of PBOs. This is faster, but can sometimes lead to\n" +" sparodic and temporary image corruption.\n" +" dither-depth=\n" +" Positive non-zero values select the target bit depth.\n" +" -1: Disable any dithering done by mplayer.\n" +" 0: Automatic selection. If output bit depth can't be detected,\n" +" 8 bits per component are assumed.\n" +" 8: Dither to 8 bit output.\n" +" Default: 0.\n" +" Note that dithering will always be disabled if the bit depth\n" +" of the video is lower or qual to the detected dither-depth.\n" +" If color management is enabled, input depth is assumed to be\n" +" 16 bits, because the 3D LUT output is 16 bit wide.\n" +" debug\n" +" Check for OpenGL errors, i.e. call glGetError(). Also request a\n" +" debug OpenGL context.\n" +"Less useful options:\n" +" swapinterval=\n" +" Interval in displayed frames between to buffer swaps.\n" +" 1 is equivalent to enable VSYNC, 0 to disable VSYNC.\n" +" no-scale-sep\n" +" When using a separable scale filter for luma, usually two filter\n" +" passes are done. This is often faster. However, it forces\n" +" conversion to RGB in an extra pass, so it can actually be slower\n" +" if used with fast filters on small screen resolutions. Using\n" +" this options will make rendering a single operation.\n" +" Note that chroma scalers are always done as 1-pass filters.\n" +" cscale=\n" +" As lscale but for chroma (2x slower with little visible effect).\n" +" Note that with some scaling filters, upscaling is always done in\n" +" RGB. If chroma is not subsampled, this option is ignored, and the\n" +" luma scaler is used instead. Setting this option is often useless.\n" +" no-fancy-downscaling\n" +" When using convolution based filters, don't extend the filter\n" +" size when downscaling. Trades downscaling performance for\n" +" reduced quality.\n" +" no-npot\n" +" Force use of power-of-2 texture sizes. For debugging only.\n" +" Borders will look discolored due to filtering.\n" +" glfinish\n" +" Call glFinish() before swapping buffers\n" +" backend=\n" +" auto: auto-select (default)\n" +" cocoa: Cocoa/OSX\n" +" win: Win32/WGL\n" +" x11: X11/GLX\n" +" indirect\n" +" Do YUV conversion and scaling as separate passes. This will\n" +" first render the video into a video-sized RGB texture, and\n" +" draw the result on screen. The luma scaler is used to scale\n" +" the RGB image when rendering to screen. The chroma scaler\n" +" is used only on YUV conversion, and only if the video uses\n" +" chroma-subsampling.\n" +" This mechanism is disabled on RGB input.\n" +" fbo-format=\n" +" Selects the internal format of any FBO textures used.\n" +" fmt can be one of: rgb, rgba, rgb8, rgb16, rgb16f, rgb32f\n" +" Default: rgb16.\n" +" gamma\n" +" Always enable gamma control. (Disables delayed enabling.)\n" +" force-gl2\n" +" Create a legacy GL context. This will randomly malfunction\n" +" if the proper extensions are not supported.\n" +"Color management:\n" +" icc-profile=\n" +" Load an ICC profile and use it to transform linear RGB to\n" +" screen output. Needs LittleCMS2 support compiled in.\n" +" icc-cache=\n" +" Store and load the 3D LUT created from the ICC profile in\n" +" this file. This can be used to speed up loading, since\n" +" LittleCMS2 can take a while to create the 3D LUT.\n" +" Note that this file will be at most about 100 MB big.\n" +" icc-intent=\n" +" 0: perceptual\n" +" 1: relative colorimetric\n" +" 2: saturation\n" +" 3: absolute colorimetric (default)\n" +" 3dlut-size=xx\n" +" Size of the 3D LUT generated from the ICC profile in each\n" +" dimension. Default is 128x256x64.\n" +" Sizes must be a power of two, and 256 at most.\n" +"\n"; diff --git a/libvo/vo_gl3_shaders.glsl b/libvo/vo_gl3_shaders.glsl new file mode 100644 index 0000000000..f67e55e6f5 --- /dev/null +++ b/libvo/vo_gl3_shaders.glsl @@ -0,0 +1,316 @@ +/* + * This file is part of mplayer2. + * + * mplayer2 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mplayer2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with mplayer2; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +// Note that this file is not directly passed as shader, but run through some +// text processing functions, and in fact contains multiple vertex and fragment +// shaders. + +// inserted at the beginning of all shaders +#!section prelude +#!section vertex_all +uniform mat3 transform; +uniform sampler3D lut_3d; + +in vec2 vertex_position; +in vec4 vertex_color; +out vec4 color; +in vec2 vertex_texcoord; +out vec2 texcoord; + +void main() { + vec3 position = vec3(vertex_position, 1); +#ifndef FIXED_SCALE + position = transform * position; +#endif + gl_Position = vec4(position, 1); + color = vertex_color; +#ifdef USE_3DLUT + color = vec4(texture(lut_3d, color.rgb).rgb, color.a); +#endif + texcoord = vertex_texcoord; +} + +#!section frag_eosd +uniform sampler2D texture1; + +in vec2 texcoord; +in vec4 color; +out vec4 out_color; + +void main() { + out_color = vec4(color.rgb, color.a * texture(texture1, texcoord).r); +} + +#!section frag_osd +uniform sampler2D texture1; + +in vec2 texcoord; +in vec4 color; +out vec4 out_color; + +void main() { + out_color = texture(texture1, texcoord).rrrg * color; +} + +#!section frag_video +uniform sampler2D texture1; +uniform sampler2D texture2; +uniform sampler2D texture3; +uniform sampler1D lut_c_1d; +uniform sampler1D lut_l_1d; +uniform sampler2D lut_c_2d; +uniform sampler2D lut_l_2d; +uniform sampler3D lut_3d; +uniform sampler2D dither; +uniform mat4x3 colormatrix; +uniform vec3 inv_gamma; +uniform float conv_gamma; +uniform float dither_quantization; +uniform float dither_multiply; +uniform float filter_param1; + +in vec2 texcoord; +out vec4 out_color; + +vec4 sample_bilinear(sampler2D tex, vec2 texcoord) { + return texture(tex, texcoord); +} + +// Explanation how bicubic scaling with only 4 texel fetches is done: +// http://www.mate.tue.nl/mate/pdfs/10318.pdf +// 'Efficient GPU-Based Texture Interpolation using Uniform B-Splines' +// Explanation why this algorithm normally always blurs, even with unit scaling: +// http://bigwww.epfl.ch/preprints/ruijters1001p.pdf +// 'GPU Prefilter for Accurate Cubic B-spline Interpolation' +vec4 calcweights(float s) { + vec4 t = vec4(-0.5, 0.1666, 0.3333, -0.3333) * s + vec4(1, 0, -0.5, 0.5); + t = t * s + vec4(0, 0, -0.5, 0.5); + t = t * s + vec4(-0.6666, 0, 0.8333, 0.1666); + vec2 a = vec2(1 / t.z, 1 / t.w); + t.xy = t.xy * a + vec2(1, 1); + t.x = t.x + s; + t.y = t.y - s; + return t; +} + +vec4 sample_bicubic_fast(sampler2D tex, vec2 texcoord) { + vec2 texsize = textureSize(tex, 0); + vec2 pt = 1 / texsize; + vec2 fcoord = fract(texcoord * texsize + vec2(0.5, 0.5)); + vec4 parmx = calcweights(fcoord.x); + vec4 parmy = calcweights(fcoord.y); + vec4 cdelta; + cdelta.xz = parmx.rg * vec2(-pt.x, pt.x); + cdelta.yw = parmy.rg * vec2(-pt.y, pt.y); + // first y-interpolation + vec4 ar = texture(tex, texcoord + cdelta.xy); + vec4 ag = texture(tex, texcoord + cdelta.xw); + vec4 ab = mix(ag, ar, parmy.b); + // second y-interpolation + vec4 br = texture(tex, texcoord + cdelta.zy); + vec4 bg = texture(tex, texcoord + cdelta.zw); + vec4 aa = mix(bg, br, parmy.b); + // x-interpolation + return mix(aa, ab, parmx.b); +} + +float[2] weights2(sampler1D lookup, float f) { + vec4 c = texture(lookup, f); + return float[2](c.r, c.g); +} + +float[4] weights4(sampler1D lookup, float f) { + vec4 c = texture(lookup, f); + return float[4](c.r, c.g, c.b, c.a); +} + +float[6] weights6(sampler2D lookup, float f) { + vec4 c1 = texture(lookup, vec2(0.25, f)); + vec4 c2 = texture(lookup, vec2(0.75, f)); + return float[6](c1.r, c1.g, c1.b, c2.r, c2.g, c2.b); +} + +float[8] weights8(sampler2D lookup, float f) { + vec4 c1 = texture(lookup, vec2(0.25, f)); + vec4 c2 = texture(lookup, vec2(0.75, f)); + return float[8](c1.r, c1.g, c1.b, c1.a, c2.r, c2.g, c2.b, c2.a); +} + +float[12] weights12(sampler2D lookup, float f) { + vec4 c1 = texture(lookup, vec2(1.0/6.0, f)); + vec4 c2 = texture(lookup, vec2(0.5, f)); + vec4 c3 = texture(lookup, vec2(5.0/6.0, f)); + return float[12](c1.r, c1.g, c1.b, c1.a, + c2.r, c2.g, c2.b, c2.a, + c3.r, c3.g, c3.b, c3.a); +} + +float[16] weights16(sampler2D lookup, float f) { + vec4 c1 = texture(lookup, vec2(0.125, f)); + vec4 c2 = texture(lookup, vec2(0.375, f)); + vec4 c3 = texture(lookup, vec2(0.625, f)); + vec4 c4 = texture(lookup, vec2(0.875, f)); + return float[16](c1.r, c1.g, c1.b, c1.a, c2.r, c2.g, c2.b, c2.a, + c3.r, c3.g, c3.b, c3.a, c4.r, c4.g, c4.b, c4.a); +} + +#define CONVOLUTION_SEP_N(NAME, N) \ + vec4 NAME(sampler2D tex, vec2 texcoord, vec2 pt, float weights[N]) { \ + vec4 res = vec4(0); \ + for (int n = 0; n < N; n++) { \ + res += weights[n] * texture(tex, texcoord + pt * n); \ + } \ + return res; \ + } + +CONVOLUTION_SEP_N(convolution_sep2, 2) +CONVOLUTION_SEP_N(convolution_sep4, 4) +CONVOLUTION_SEP_N(convolution_sep6, 6) +CONVOLUTION_SEP_N(convolution_sep8, 8) +CONVOLUTION_SEP_N(convolution_sep12, 12) +CONVOLUTION_SEP_N(convolution_sep16, 16) + +// The dir parameter is (0, 1) or (1, 0), and we expect the shader compiler to +// remove all the redundant multiplications and additions. +#define SAMPLE_CONVOLUTION_SEP_N(NAME, N, SAMPLERT, CONV_FUNC, WEIGHTS_FUNC)\ + vec4 NAME(vec2 dir, SAMPLERT lookup, sampler2D tex, vec2 texcoord) { \ + vec2 texsize = textureSize(tex, 0); \ + vec2 pt = (1 / texsize) * dir; \ + float fcoord = dot(fract(texcoord * texsize - 0.5), dir); \ + vec2 base = texcoord - fcoord * pt; \ + return CONV_FUNC(tex, base - pt * (N / 2 - 1), pt, \ + WEIGHTS_FUNC(lookup, fcoord)); \ + } + +SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep2, 2, sampler1D, convolution_sep2, weights2) +SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep4, 4, sampler1D, convolution_sep4, weights4) +SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep6, 6, sampler2D, convolution_sep6, weights6) +SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep8, 8, sampler2D, convolution_sep8, weights8) +SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep12, 12, sampler2D, convolution_sep12, weights12) +SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep16, 16, sampler2D, convolution_sep16, weights16) + + +#define CONVOLUTION_N(NAME, N) \ + vec4 NAME(sampler2D tex, vec2 texcoord, vec2 pt, float taps_x[N], \ + float taps_y[N]) { \ + vec4 res = vec4(0); \ + for (int y = 0; y < N; y++) { \ + vec4 line = vec4(0); \ + for (int x = 0; x < N; x++) \ + line += taps_x[x] * texture(tex, texcoord + pt * vec2(x, y));\ + res += taps_y[y] * line; \ + } \ + return res; \ + } + +CONVOLUTION_N(convolution2, 2) +CONVOLUTION_N(convolution4, 4) +CONVOLUTION_N(convolution6, 6) +CONVOLUTION_N(convolution8, 8) +CONVOLUTION_N(convolution12, 12) +CONVOLUTION_N(convolution16, 16) + +#define SAMPLE_CONVOLUTION_N(NAME, N, SAMPLERT, CONV_FUNC, WEIGHTS_FUNC) \ + vec4 NAME(SAMPLERT lookup, sampler2D tex, vec2 texcoord) { \ + vec2 texsize = textureSize(tex, 0); \ + vec2 pt = 1 / texsize; \ + vec2 fcoord = fract(texcoord * texsize - 0.5); \ + vec2 base = texcoord - fcoord * pt; \ + return CONV_FUNC(tex, base - pt * (N / 2 - 1), pt, \ + WEIGHTS_FUNC(lookup, fcoord.x), \ + WEIGHTS_FUNC(lookup, fcoord.y)); \ + } + +SAMPLE_CONVOLUTION_N(sample_convolution2, 2, sampler1D, convolution2, weights2) +SAMPLE_CONVOLUTION_N(sample_convolution4, 4, sampler1D, convolution4, weights4) +SAMPLE_CONVOLUTION_N(sample_convolution6, 6, sampler2D, convolution6, weights6) +SAMPLE_CONVOLUTION_N(sample_convolution8, 8, sampler2D, convolution8, weights8) +SAMPLE_CONVOLUTION_N(sample_convolution12, 12, sampler2D, convolution12, weights12) +SAMPLE_CONVOLUTION_N(sample_convolution16, 16, sampler2D, convolution16, weights16) + + +// Unsharp masking +vec4 sample_sharpen3(sampler2D tex, vec2 texcoord) { + vec2 texsize = textureSize(tex, 0); + vec2 pt = 1 / texsize; + vec2 st = pt * 0.5; + vec4 p = texture(tex, texcoord); + vec4 sum = texture(tex, texcoord + st * vec2(+1, +1)) + + texture(tex, texcoord + st * vec2(+1, -1)) + + texture(tex, texcoord + st * vec2(-1, +1)) + + texture(tex, texcoord + st * vec2(-1, -1)); + return p + (p - 0.25 * sum) * filter_param1; +} + +vec4 sample_sharpen5(sampler2D tex, vec2 texcoord) { + vec2 texsize = textureSize(tex, 0); + vec2 pt = 1 / texsize; + vec2 st1 = pt * 1.2; + vec4 p = texture(tex, texcoord); + vec4 sum1 = texture(tex, texcoord + st1 * vec2(+1, +1)) + + texture(tex, texcoord + st1 * vec2(+1, -1)) + + texture(tex, texcoord + st1 * vec2(-1, +1)) + + texture(tex, texcoord + st1 * vec2(-1, -1)); + vec2 st2 = pt * 1.5; + vec4 sum2 = texture(tex, texcoord + st2 * vec2(+1, 0)) + + texture(tex, texcoord + st2 * vec2( 0, +1)) + + texture(tex, texcoord + st2 * vec2(-1, 0)) + + texture(tex, texcoord + st2 * vec2( 0, -1)); + vec4 t = p * 0.859375 + sum2 * -0.1171875 + sum1 * -0.09765625; + return p + t * filter_param1; +} + +void main() { +#ifdef USE_PLANAR + vec3 color = vec3(SAMPLE_L(texture1, texcoord).r, + SAMPLE_C(texture2, texcoord).r, + SAMPLE_C(texture3, texcoord).r); +#else + vec3 color = SAMPLE_L(texture1, texcoord).rgb; +#endif +#ifdef USE_GBRP + color.gbr = color; +#endif +#ifdef USE_YGRAY + // NOTE: actually slightly wrong for 16 bit input video, and completely + // wrong for 9/10 bit input + color.gb = vec2(128.0/255.0); +#endif +#ifdef USE_COLORMATRIX + color = mat3(colormatrix) * color + colormatrix[3]; +#endif +#ifdef USE_LINEAR_CONV + color = pow(color, vec3(2.2)); +#endif +#ifdef USE_LINEAR_CONV_INV + // Convert from linear RGB to gamma RGB before putting it through the 3D-LUT + // in the final stage. + color = pow(color, vec3(1.0/2.2)); +#endif +#ifdef USE_GAMMA_POW + color = pow(color, inv_gamma); +#endif +#ifdef USE_3DLUT + color = texture(lut_3d, color).rgb; +#endif +#ifdef USE_DITHER + float dither = texture(dither, gl_FragCoord.xy / textureSize(dither, 0)).r; + color = floor(color * dither_multiply + dither ) / dither_quantization; +#endif + out_color = vec4(color, 1); +}