mpv/video/out/opengl/common.h

249 lines
12 KiB
C
Raw Normal View History

/*
* This file is part of mpv.
*
* mpv is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* mpv is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with mpv. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef MPLAYER_GL_COMMON_H
#define MPLAYER_GL_COMMON_H
#include <stdio.h>
#include <stdint.h>
#include <stdbool.h>
#include "config.h"
#include "common/msg.h"
#include "misc/bstr.h"
#include "video/csputils.h"
#include "video/mp_image.h"
vo_opengl: refactor into vo_gpu This is done in several steps: 1. refactor MPGLContext -> struct ra_ctx 2. move GL-specific stuff in vo_opengl into opengl/context.c 3. generalize context creation to support other APIs, and add --gpu-api 4. rename all of the --opengl- options that are no longer opengl-specific 5. move all of the stuff from opengl/* that isn't GL-specific into gpu/ (note: opengl/gl_utils.h became opengl/utils.h) 6. rename vo_opengl to vo_gpu 7. to handle window screenshots, the short-term approach was to just add it to ra_swchain_fns. Long term (and for vulkan) this has to be moved to ra itself (and vo_gpu altered to compensate), but this was a stop-gap measure to prevent this commit from getting too big 8. move ra->fns->flush to ra_gl_ctx instead 9. some other minor changes that I've probably already forgotten Note: This is one half of a major refactor, the other half of which is provided by rossy's following commit. This commit enables support for all linux platforms, while his version enables support for all non-linux platforms. Note 2: vo_opengl_cb.c also re-uses ra_gl_ctx so it benefits from the --opengl- options like --opengl-early-flush, --opengl-finish etc. Should be a strict superset of the old functionality. Disclaimer: Since I have no way of compiling mpv on all platforms, some of these ports were done blindly. Specifically, the blind ports included context_mali_fbdev.c and context_rpi.c. Since they're both based on egl_helpers, the port should have gone smoothly without any major changes required. But if somebody complains about a compile error on those platforms (assuming anybody actually uses them), you know where to complain.
2017-09-14 06:04:55 +00:00
#include "video/out/vo.h"
#include "video/out/gpu/ra.h"
vo_opengl: add our own copy of OpenGL headers gl_headers.h is basically header_fixes.h done consequently. It contains all OpenGL defines (and some typedefs) we need. We don't include GL headers provided by the system anymore. Some care has to be taken by certain windowing APIs including all of gl.h anyway. Then the definitions could clash. Fortunately, redefining preprocessor symbols to the same content is allowed and ignored. Also, redefining typedefs to the same thing is allowed in C11. Apparently the latter is not allowed in C99, so there is an imperfect attempt to avoid the typedefs if required API symbols are apparently present already. The nost risky part about this are the standard typedefs and GLAPIENTRY. The latter is different only on win32 (and at least consistently so). The typedefs are mostly based on stdint.h typedefs, which khrplatform.h clumsily emulates on platforms which don't have it. The biggest difference is that we define GLsizeiptr directly to ptrdiff_t, instead of checking for the _WIN64 symbol and defining it to long or long long. This also typedefs GLsync to __GLsync, just like the khronos headers. Although symbols prefixed with __ are implementation reserved, khronos also violates this rule, and having the same definition as khronos will avoid problems on duplicate definitions. We can simplify the build scripts too. The ios-gl check seems a bit wrong now (what we really want to test for is EAGLContext), but I can't test and thus can't improve it. cuda_dynamic.h redefined two GL symbols; just include the new headers directly instead.
2017-04-07 12:30:51 +00:00
#include "gl_headers.h"
#if HAVE_GL_WIN32
#include <windows.h>
#endif
struct GL;
typedef struct GL GL;
enum {
MPGL_CAP_ROW_LENGTH = (1 << 4), // GL_[UN]PACK_ROW_LENGTH
MPGL_CAP_FB = (1 << 5),
MPGL_CAP_VAO = (1 << 6),
MPGL_CAP_TEX_RG = (1 << 10), // GL_ARB_texture_rg / GL 3.x
MPGL_CAP_VDPAU = (1 << 11), // GL_NV_vdpau_interop
MPGL_CAP_APPLE_RGB_422 = (1 << 12), // GL_APPLE_rgb_422
MPGL_CAP_1D_TEX = (1 << 14),
MPGL_CAP_3D_TEX = (1 << 15),
MPGL_CAP_DEBUG = (1 << 16),
MPGL_CAP_DXINTEROP = (1 << 17), // WGL_NV_DX_interop
MPGL_CAP_EXT16 = (1 << 18), // GL_EXT_texture_norm16
MPGL_CAP_ARB_FLOAT = (1 << 19), // GL_ARB_texture_float
MPGL_CAP_EXT_CR_HFLOAT = (1 << 20), // GL_EXT_color_buffer_half_float
MPGL_CAP_UBO = (1 << 21), // GL_ARB_uniform_buffer_object
MPGL_CAP_SSBO = (1 << 22), // GL_ARB_shader_storage_buffer_object
MPGL_CAP_COMPUTE_SHADER = (1 << 23), // GL_ARB_compute_shader & GL_ARB_shader_image_load_store
MPGL_CAP_NESTED_ARRAY = (1 << 24), // GL_ARB_arrays_of_arrays
MPGL_CAP_SW = (1 << 30), // indirect or sw renderer
};
// E.g. 310 means 3.1
// Code doesn't have to use the macros; they are for convenience only.
#define MPGL_VER(major, minor) (((major) * 100) + (minor) * 10)
#define MPGL_VER_GET_MAJOR(ver) ((unsigned)(ver) / 100)
#define MPGL_VER_GET_MINOR(ver) ((unsigned)(ver) % 100 / 10)
#define MPGL_VER_P(ver) MPGL_VER_GET_MAJOR(ver), MPGL_VER_GET_MINOR(ver)
void mpgl_load_functions(GL *gl, void *(*getProcAddress)(const GLubyte *),
const char *ext2, struct mp_log *log);
void mpgl_load_functions2(GL *gl, void *(*get_fn)(void *ctx, const char *n),
void *fn_ctx, const char *ext2, struct mp_log *log);
typedef void (GLAPIENTRY *MP_GLDEBUGPROC)(GLenum, GLenum, GLuint, GLenum,
GLsizei, const GLchar *,const void *);
//function pointers loaded from the OpenGL library
struct GL {
int version; // MPGL_VER() mangled (e.g. 210 for 2.1)
int es; // es version (e.g. 300), 0 for desktop GL
int glsl_version; // e.g. 130 for GLSL 1.30
char *extensions; // Equivalent to GL_EXTENSIONS
int mpgl_caps; // Bitfield of MPGL_CAP_* constants
bool debug_context; // use of e.g. GLX_CONTEXT_DEBUG_BIT_ARB
void (GLAPIENTRY *Viewport)(GLint, GLint, GLsizei, GLsizei);
void (GLAPIENTRY *Clear)(GLbitfield);
void (GLAPIENTRY *GenTextures)(GLsizei, GLuint *);
void (GLAPIENTRY *DeleteTextures)(GLsizei, const GLuint *);
void (GLAPIENTRY *ClearColor)(GLclampf, GLclampf, GLclampf, GLclampf);
void (GLAPIENTRY *Enable)(GLenum);
void (GLAPIENTRY *Disable)(GLenum);
const GLubyte *(GLAPIENTRY * GetString)(GLenum);
void (GLAPIENTRY *BlendFuncSeparate)(GLenum, GLenum, GLenum, GLenum);
void (GLAPIENTRY *Flush)(void);
void (GLAPIENTRY *Finish)(void);
void (GLAPIENTRY *PixelStorei)(GLenum, GLint);
void (GLAPIENTRY *TexImage1D)(GLenum, GLint, GLint, GLsizei, GLint,
GLenum, GLenum, const GLvoid *);
void (GLAPIENTRY *TexImage2D)(GLenum, GLint, GLint, GLsizei, GLsizei,
GLint, GLenum, GLenum, const GLvoid *);
void (GLAPIENTRY *TexSubImage2D)(GLenum, GLint, GLint, GLint,
GLsizei, GLsizei, GLenum, GLenum,
const GLvoid *);
void (GLAPIENTRY *TexParameteri)(GLenum, GLenum, GLint);
void (GLAPIENTRY *GetIntegerv)(GLenum, GLint *);
void (GLAPIENTRY *ReadPixels)(GLint, GLint, GLsizei, GLsizei, GLenum,
GLenum, GLvoid *);
void (GLAPIENTRY *ReadBuffer)(GLenum);
void (GLAPIENTRY *DrawArrays)(GLenum, GLint, GLsizei);
GLenum (GLAPIENTRY *GetError)(void);
void (GLAPIENTRY *GetTexLevelParameteriv)(GLenum, GLint, GLenum, GLint *);
void (GLAPIENTRY *Scissor)(GLint, GLint, GLsizei, GLsizei);
void (GLAPIENTRY *GenBuffers)(GLsizei, GLuint *);
void (GLAPIENTRY *DeleteBuffers)(GLsizei, const GLuint *);
void (GLAPIENTRY *BindBuffer)(GLenum, GLuint);
vo_opengl: implement NNEDI3 prescaler Implement NNEDI3, a neural network based deinterlacer. The shader is reimplemented in GLSL and supports both 8x4 and 8x6 sampling window now. This allows the shader to be licensed under LGPL2.1 so that it can be used in mpv. The current implementation supports uploading the NN weights (up to 51kb with placebo setting) in two different way, via uniform buffer object or hard coding into shader source. UBO requires OpenGL 3.1, which only guarantee 16kb per block. But I find that 64kb seems to be a default setting for recent card/driver (which nnedi3 is targeting), so I think we're fine here (with default nnedi3 setting the size of weights is 9kb). Hard-coding into shader requires OpenGL 3.3, for the "intBitsToFloat()" built-in function. This is necessary to precisely represent these weights in GLSL. I tried several human readable floating point number format (with really high precision as for single precision float), but for some reason they are not working nicely, bad pixels (with NaN value) could be produced with some weights set. We could also add support to upload these weights with texture, just for compatibility reason (etc. upscaling a still image with a low end graphics card). But as I tested, it's rather slow even with 1D texture (we probably had to use 2D texture due to dimension size limitation). Since there is always better choice to do NNEDI3 upscaling for still image (vapoursynth plugin), it's not implemented in this commit. If this turns out to be a popular demand from the user, it should be easy to add it later. For those who wants to optimize the performance a bit further, the bottleneck seems to be: 1. overhead to upload and access these weights, (in particular, the shader code will be regenerated for each frame, it's on CPU though). 2. "dot()" performance in the main loop. 3. "exp()" performance in the main loop, there are various fast implementation with some bit tricks (probably with the help of the intBitsToFloat function). The code is tested with nvidia card and driver (355.11), on Linux. Closes #2230
2015-10-28 01:37:55 +00:00
void (GLAPIENTRY *BindBufferBase)(GLenum, GLuint, GLuint);
GLvoid * (GLAPIENTRY *MapBufferRange)(GLenum, GLintptr, GLsizeiptr,
GLbitfield);
GLboolean (GLAPIENTRY *UnmapBuffer)(GLenum);
void (GLAPIENTRY *BufferData)(GLenum, intptr_t, const GLvoid *, GLenum);
void (GLAPIENTRY *BufferSubData)(GLenum, GLintptr, GLsizeiptr, const GLvoid *);
void (GLAPIENTRY *ActiveTexture)(GLenum);
void (GLAPIENTRY *BindTexture)(GLenum, GLuint);
int (GLAPIENTRY *SwapInterval)(int);
void (GLAPIENTRY *TexImage3D)(GLenum, GLint, GLenum, GLsizei, GLsizei,
GLsizei, GLint, GLenum, GLenum,
const GLvoid *);
void (GLAPIENTRY *GenVertexArrays)(GLsizei, GLuint *);
void (GLAPIENTRY *BindVertexArray)(GLuint);
GLint (GLAPIENTRY *GetAttribLocation)(GLuint, const GLchar *);
void (GLAPIENTRY *EnableVertexAttribArray)(GLuint);
void (GLAPIENTRY *DisableVertexAttribArray)(GLuint);
void (GLAPIENTRY *VertexAttribPointer)(GLuint, GLint, GLenum, GLboolean,
GLsizei, const GLvoid *);
void (GLAPIENTRY *DeleteVertexArrays)(GLsizei, const GLuint *);
void (GLAPIENTRY *UseProgram)(GLuint);
GLint (GLAPIENTRY *GetUniformLocation)(GLuint, const GLchar *);
void (GLAPIENTRY *CompileShader)(GLuint);
GLuint (GLAPIENTRY *CreateProgram)(void);
GLuint (GLAPIENTRY *CreateShader)(GLenum);
void (GLAPIENTRY *ShaderSource)(GLuint, GLsizei, const GLchar **,
const GLint *);
void (GLAPIENTRY *LinkProgram)(GLuint);
void (GLAPIENTRY *AttachShader)(GLuint, GLuint);
void (GLAPIENTRY *DeleteShader)(GLuint);
void (GLAPIENTRY *DeleteProgram)(GLuint);
void (GLAPIENTRY *GetShaderInfoLog)(GLuint, GLsizei, GLsizei *, GLchar *);
void (GLAPIENTRY *GetShaderiv)(GLuint, GLenum, GLint *);
void (GLAPIENTRY *GetProgramInfoLog)(GLuint, GLsizei, GLsizei *, GLchar *);
void (GLAPIENTRY *GetProgramiv)(GLenum, GLenum, GLint *);
void (GLAPIENTRY *GetProgramBinary)(GLuint, GLsizei, GLsizei *, GLenum *,
void *);
void (GLAPIENTRY *ProgramBinary)(GLuint, GLenum, const void *, GLsizei);
void (GLAPIENTRY *DispatchCompute)(GLuint, GLuint, GLuint);
void (GLAPIENTRY *BindImageTexture)(GLuint, GLuint, GLint, GLboolean,
GLint, GLenum, GLenum);
void (GLAPIENTRY *MemoryBarrier)(GLbitfield);
const GLubyte* (GLAPIENTRY *GetStringi)(GLenum, GLuint);
void (GLAPIENTRY *BindAttribLocation)(GLuint, GLuint, const GLchar *);
void (GLAPIENTRY *BindFramebuffer)(GLenum, GLuint);
void (GLAPIENTRY *GenFramebuffers)(GLsizei, GLuint *);
void (GLAPIENTRY *DeleteFramebuffers)(GLsizei, const GLuint *);
GLenum (GLAPIENTRY *CheckFramebufferStatus)(GLenum);
void (GLAPIENTRY *FramebufferTexture2D)(GLenum, GLenum, GLenum, GLuint,
GLint);
void (GLAPIENTRY *BlitFramebuffer)(GLint, GLint, GLint, GLint, GLint, GLint,
GLint, GLint, GLbitfield, GLenum);
void (GLAPIENTRY *GetFramebufferAttachmentParameteriv)(GLenum, GLenum,
GLenum, GLint *);
void (GLAPIENTRY *Uniform1f)(GLint, GLfloat);
void (GLAPIENTRY *Uniform2f)(GLint, GLfloat, GLfloat);
void (GLAPIENTRY *Uniform3f)(GLint, GLfloat, GLfloat, GLfloat);
void (GLAPIENTRY *Uniform4f)(GLint, GLfloat, GLfloat, GLfloat, GLfloat);
void (GLAPIENTRY *Uniform1i)(GLint, GLint);
void (GLAPIENTRY *UniformMatrix2fv)(GLint, GLsizei, GLboolean,
const GLfloat *);
void (GLAPIENTRY *UniformMatrix3fv)(GLint, GLsizei, GLboolean,
const GLfloat *);
vo_opengl: refactor RA texture and buffer updates - tex_uploads args are moved to a struct - the ability to directly upload texture data without going through a buffer is made explicit - the concept of buffer updates and buffer polling is made more explicit and generalized to buf_update as well (not just mapped buffers) - the ability to call tex_upload/buf_update on a tex/buf is made explicit during tex/buf creation - uploading from buffers now uses an explicit offset instead of implicitly comparing *src against buf->data, because not all buffers may actually be persistently mapped - the initial_data = immutable requirement is dropped. (May be re-added later for D3D11 if that ever becomes a thing) This change helps the vulkan abstraction immensely and also helps move common code (like the PBO pooling) out of ra_gl and into the opengl/utils.c This also technically has the side-benefit / side-constraint of using PBOs for OSD texture uploads as well, which actually seems to help performance on machines where --opengl-pbo is faster than the naive code path. Because of this, I decided to hook up the OSD code to the opengl-pbo option as well. One drawback of this refactor is that the GL_STREAM_COPY hack for texture uploads "got lost", but I think I'm happy with that going away anyway since DR almost fully deprecates it, and it's not the "right thing" anyway - but instead an nvidia-only hack to make this stuff work somewhat better on NUMA systems with discrete GPUs. Another change is that due to the way fencing works with ra_buf (we get one fence per ra_buf per upload) we have to use multiple ra_bufs instead of offsets into a shared buffer. But for OpenGL this is probably better anyway. It's possible that in future, we could support having independent “buffer slices” (each with their own fence/sync object), but this would be an optimization more than anything. I also think that we could address the underlying problem (memory closeness) differently by making the ra_vk memory allocator smart enough to chunk together allocations under the hood.
2017-08-16 20:13:51 +00:00
void (GLAPIENTRY *InvalidateTexImage)(GLuint, GLint);
void (GLAPIENTRY *InvalidateFramebuffer)(GLenum, GLsizei, const GLenum *);
GLsync (GLAPIENTRY *FenceSync)(GLenum, GLbitfield);
GLenum (GLAPIENTRY *ClientWaitSync)(GLsync, GLbitfield, GLuint64);
void (GLAPIENTRY *DeleteSync)(GLsync sync);
void (GLAPIENTRY *BufferStorage)(GLenum, intptr_t, const GLvoid *, GLenum);
void (GLAPIENTRY *GenQueries)(GLsizei, GLuint *);
void (GLAPIENTRY *DeleteQueries)(GLsizei, const GLuint *);
void (GLAPIENTRY *BeginQuery)(GLenum, GLuint);
void (GLAPIENTRY *EndQuery)(GLenum);
void (GLAPIENTRY *QueryCounter)(GLuint, GLenum);
GLboolean (GLAPIENTRY *IsQuery)(GLuint);
void (GLAPIENTRY *GetQueryObjectiv)(GLuint, GLenum, GLint *);
void (GLAPIENTRY *GetQueryObjecti64v)(GLuint, GLenum, GLint64 *);
void (GLAPIENTRY *GetQueryObjectuiv)(GLuint, GLenum, GLuint *);
void (GLAPIENTRY *GetQueryObjectui64v)(GLuint, GLenum, GLuint64 *);
void (GLAPIENTRY *VDPAUInitNV)(const GLvoid *, const GLvoid *);
void (GLAPIENTRY *VDPAUFiniNV)(void);
GLvdpauSurfaceNV (GLAPIENTRY *VDPAURegisterOutputSurfaceNV)
(GLvoid *, GLenum, GLsizei, const GLuint *);
GLvdpauSurfaceNV (GLAPIENTRY *VDPAURegisterVideoSurfaceNV)
(GLvoid *, GLenum, GLsizei, const GLuint *);
void (GLAPIENTRY *VDPAUUnregisterSurfaceNV)(GLvdpauSurfaceNV);
void (GLAPIENTRY *VDPAUSurfaceAccessNV)(GLvdpauSurfaceNV, GLenum);
void (GLAPIENTRY *VDPAUMapSurfacesNV)(GLsizei, const GLvdpauSurfaceNV *);
void (GLAPIENTRY *VDPAUUnmapSurfacesNV)(GLsizei, const GLvdpauSurfaceNV *);
#if HAVE_GL_WIN32
// The HANDLE type might not be present on non-Win32
BOOL (GLAPIENTRY *DXSetResourceShareHandleNV)(void *dxObject,
HANDLE shareHandle);
HANDLE (GLAPIENTRY *DXOpenDeviceNV)(void *dxDevice);
BOOL (GLAPIENTRY *DXCloseDeviceNV)(HANDLE hDevice);
HANDLE (GLAPIENTRY *DXRegisterObjectNV)(HANDLE hDevice, void *dxObject,
GLuint name, GLenum type, GLenum access);
BOOL (GLAPIENTRY *DXUnregisterObjectNV)(HANDLE hDevice, HANDLE hObject);
BOOL (GLAPIENTRY *DXLockObjectsNV)(HANDLE hDevice, GLint count,
HANDLE *hObjects);
BOOL (GLAPIENTRY *DXUnlockObjectsNV)(HANDLE hDevice, GLint count,
HANDLE *hObjects);
#endif
GLint (GLAPIENTRY *GetVideoSync)(GLuint *);
GLint (GLAPIENTRY *WaitVideoSync)(GLint, GLint, unsigned int *);
void (GLAPIENTRY *GetTranslatedShaderSourceANGLE)(GLuint, GLsizei,
GLsizei*, GLchar* source);
void (GLAPIENTRY *DebugMessageCallback)(MP_GLDEBUGPROC callback,
const void *userParam);
void *(GLAPIENTRY *MPGetNativeDisplay)(const char *name);
};
#endif /* MPLAYER_GL_COMMON_H */