vo_opengl: support compute shaders

These can either be invoked as dispatch_compute to do a single
computation, or finish_pass_fbo (after setting compute_size_minimum) to
render to a new texture using a compute shader. To make this stuff all
work transparently, we try really, really hard to make compute shaders
as identical to fragment shaders as possible in their behavior.
This commit is contained in:
Niklas Haas 2017-07-17 18:11:32 +02:00
parent eb54d2ad4d
commit aad6ba018a
7 changed files with 317 additions and 100 deletions

View File

@ -335,6 +335,23 @@ static const struct gl_functions gl_functions[] = {
{0} {0}
}, },
}, },
{
.ver_core = 420,
.extension = "GL_ARB_shader_image_load_store",
.functions = (const struct gl_function[]) {
DEF_FN(BindImageTexture),
DEF_FN(MemoryBarrier),
{0}
},
},
{
.ver_core = 430,
.extension = "GL_ARB_compute_shader",
.functions = (const struct gl_function[]) {
DEF_FN(DispatchCompute),
{0},
},
},
// Swap control, always an OS specific extension // Swap control, always an OS specific extension
// The OSX code loads this manually. // The OSX code loads this manually.
{ {
@ -589,7 +606,7 @@ void mpgl_load_functions2(GL *gl, void *(*get_fn)(void *ctx, const char *n),
if (shader && sscanf(shader, "%d.%d", &glsl_major, &glsl_minor) == 2) if (shader && sscanf(shader, "%d.%d", &glsl_major, &glsl_minor) == 2)
gl->glsl_version = glsl_major * 100 + glsl_minor; gl->glsl_version = glsl_major * 100 + glsl_minor;
// restrict GLSL version to be forwards compatible // restrict GLSL version to be forwards compatible
gl->glsl_version = MPMIN(gl->glsl_version, 400); gl->glsl_version = MPMIN(gl->glsl_version, 430);
} }
if (is_software_gl(gl)) { if (is_software_gl(gl)) {

View File

@ -163,6 +163,11 @@ struct GL {
void *); void *);
void (GLAPIENTRY *ProgramBinary)(GLuint, GLenum, const void *, GLsizei); void (GLAPIENTRY *ProgramBinary)(GLuint, GLenum, const void *, GLsizei);
void (GLAPIENTRY *DispatchCompute)(GLuint, GLuint, GLuint);
void (GLAPIENTRY *BindImageTexture)(GLuint, GLuint, GLint, GLboolean,
GLint, GLenum, GLenum);
void (GLAPIENTRY *MemoryBarrier)(GLbitfield);
const GLubyte* (GLAPIENTRY *GetStringi)(GLenum, GLuint); const GLubyte* (GLAPIENTRY *GetStringi)(GLenum, GLuint);
void (GLAPIENTRY *BindAttribLocation)(GLuint, GLuint, const GLchar *); void (GLAPIENTRY *BindAttribLocation)(GLuint, GLuint, const GLchar *);
void (GLAPIENTRY *BindFramebuffer)(GLenum, GLuint); void (GLAPIENTRY *BindFramebuffer)(GLenum, GLuint);

View File

@ -93,6 +93,7 @@ static const struct mpgl_driver *const backends[] = {
// initialize. The first entry is the most preferred version. // initialize. The first entry is the most preferred version.
const int mpgl_preferred_gl_versions[] = { const int mpgl_preferred_gl_versions[] = {
440, 440,
430,
400, 400,
330, 330,
320, 320,

View File

@ -48,7 +48,9 @@
// --- GL 1.5 // --- GL 1.5
#define GL_READ_ONLY 0x88B8
#define GL_WRITE_ONLY 0x88B9 #define GL_WRITE_ONLY 0x88B9
#define GL_READ_WRITE 0x88BA
// --- GL 3.0 // --- GL 3.0
@ -77,6 +79,10 @@
#define GL_DYNAMIC_STORAGE_BIT 0x0100 #define GL_DYNAMIC_STORAGE_BIT 0x0100
#define GL_CLIENT_STORAGE_BIT 0x0200 #define GL_CLIENT_STORAGE_BIT 0x0200
// -- GL 4.3 or GL_ARB_compute_shader
#define GL_COMPUTE_SHADER 0x91B9
// --- GL_NV_vdpau_interop // --- GL_NV_vdpau_interop
#define GLvdpauSurfaceNV GLintptr #define GLvdpauSurfaceNV GLintptr

View File

@ -265,8 +265,11 @@ bool fbotex_init(struct fbotex *fbo, GL *gl, struct mp_log *log, int w, int h,
// Like fbotex_init(), except it can be called on an already initialized FBO; // Like fbotex_init(), except it can be called on an already initialized FBO;
// and if the parameters are the same as the previous call, do not touch it. // and if the parameters are the same as the previous call, do not touch it.
// flags can be 0, or a combination of FBOTEX_FUZZY_W and FBOTEX_FUZZY_H. // flags can be 0, or a combination of FBOTEX_FUZZY_W, FBOTEX_FUZZY_H and
// FBOTEX_COMPUTE.
// Enabling FUZZY for W or H means the w or h does not need to be exact. // Enabling FUZZY for W or H means the w or h does not need to be exact.
// FBOTEX_COMPUTE means that the texture will be written to by a compute shader
// instead of actually being attached to an FBO.
bool fbotex_change(struct fbotex *fbo, GL *gl, struct mp_log *log, int w, int h, bool fbotex_change(struct fbotex *fbo, GL *gl, struct mp_log *log, int w, int h,
GLenum iformat, int flags) GLenum iformat, int flags)
{ {
@ -315,7 +318,6 @@ bool fbotex_change(struct fbotex *fbo, GL *gl, struct mp_log *log, int w, int h,
.iformat = iformat, .iformat = iformat,
}; };
gl->GenFramebuffers(1, &fbo->fbo);
gl->GenTextures(1, &fbo->texture); gl->GenTextures(1, &fbo->texture);
gl->BindTexture(GL_TEXTURE_2D, fbo->texture); gl->BindTexture(GL_TEXTURE_2D, fbo->texture);
gl->TexImage2D(GL_TEXTURE_2D, 0, format->internal_format, fbo->rw, fbo->rh, 0, gl->TexImage2D(GL_TEXTURE_2D, 0, format->internal_format, fbo->rw, fbo->rh, 0,
@ -328,21 +330,24 @@ bool fbotex_change(struct fbotex *fbo, GL *gl, struct mp_log *log, int w, int h,
gl_check_error(gl, log, "after creating framebuffer texture"); gl_check_error(gl, log, "after creating framebuffer texture");
gl->BindFramebuffer(GL_FRAMEBUFFER, fbo->fbo); bool skip_fbo = flags & FBOTEX_COMPUTE;
gl->FramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, if (!skip_fbo) {
GL_TEXTURE_2D, fbo->texture, 0); gl->GenFramebuffers(1, &fbo->fbo);
gl->BindFramebuffer(GL_FRAMEBUFFER, fbo->fbo);
gl->FramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
GL_TEXTURE_2D, fbo->texture, 0);
GLenum err = gl->CheckFramebufferStatus(GL_FRAMEBUFFER); GLenum err = gl->CheckFramebufferStatus(GL_FRAMEBUFFER);
if (err != GL_FRAMEBUFFER_COMPLETE) { if (err != GL_FRAMEBUFFER_COMPLETE) {
mp_err(log, "Error: framebuffer completeness check failed (error=%d).\n", mp_err(log, "Error: framebuffer completeness check failed (error=%d).\n",
(int)err); (int)err);
res = false; res = false;
}
gl->BindFramebuffer(GL_FRAMEBUFFER, 0);
gl_check_error(gl, log, "after creating framebuffer");
} }
gl->BindFramebuffer(GL_FRAMEBUFFER, 0);
gl_check_error(gl, log, "after creating framebuffer");
return res; return res;
} }
@ -462,6 +467,10 @@ struct sc_uniform {
// Set for sampler uniforms. // Set for sampler uniforms.
GLenum tex_target; GLenum tex_target;
GLuint tex_handle; GLuint tex_handle;
// Set for image uniforms
GLuint img_handle;
GLenum img_access;
GLenum img_iformat;
}; };
struct sc_cached_uniform { struct sc_cached_uniform {
@ -475,6 +484,7 @@ struct sc_entry {
int num_uniforms; int num_uniforms;
bstr frag; bstr frag;
bstr vert; bstr vert;
bstr comp;
struct gl_timer *timer; struct gl_timer *timer;
struct gl_vao vao; struct gl_vao vao;
}; };
@ -492,6 +502,7 @@ struct gl_shader_cache {
bstr header_text; bstr header_text;
bstr text; bstr text;
int next_texture_unit; int next_texture_unit;
int next_image_unit;
struct gl_vao *vao; // deprecated struct gl_vao *vao; // deprecated
struct sc_entry *entries; struct sc_entry *entries;
@ -545,6 +556,10 @@ void gl_sc_reset(struct gl_shader_cache *sc)
gl->ActiveTexture(GL_TEXTURE0 + u->v.i[0]); gl->ActiveTexture(GL_TEXTURE0 + u->v.i[0]);
gl->BindTexture(u->tex_target, 0); gl->BindTexture(u->tex_target, 0);
} }
if (u->type == UT_i && u->img_access) {
gl->BindImageTexture(u->v.i[0], 0, 0, GL_FALSE, 0,
u->img_access, u->img_iformat);
}
} }
gl->ActiveTexture(GL_TEXTURE0); gl->ActiveTexture(GL_TEXTURE0);
} }
@ -556,6 +571,7 @@ void gl_sc_reset(struct gl_shader_cache *sc)
talloc_free(sc->uniforms[n].name); talloc_free(sc->uniforms[n].name);
sc->num_uniforms = 0; sc->num_uniforms = 0;
sc->next_texture_unit = 1; // not 0, as 0 is "free for use" sc->next_texture_unit = 1; // not 0, as 0 is "free for use"
sc->next_image_unit = 1;
sc->vertex_entries = NULL; sc->vertex_entries = NULL;
sc->vertex_size = 0; sc->vertex_size = 0;
sc->current_shader = NULL; sc->current_shader = NULL;
@ -571,6 +587,7 @@ static void sc_flush_cache(struct gl_shader_cache *sc)
sc->gl->DeleteProgram(e->gl_shader); sc->gl->DeleteProgram(e->gl_shader);
talloc_free(e->vert.start); talloc_free(e->vert.start);
talloc_free(e->frag.start); talloc_free(e->frag.start);
talloc_free(e->comp.start);
talloc_free(e->uniforms); talloc_free(e->uniforms);
gl_timer_free(e->timer); gl_timer_free(e->timer);
gl_vao_uninit(&e->vao); gl_vao_uninit(&e->vao);
@ -639,6 +656,14 @@ void gl_sc_hadd_bstr(struct gl_shader_cache *sc, struct bstr text)
bstr_xappend(sc, &sc->header_text, text); bstr_xappend(sc, &sc->header_text, text);
} }
void gl_sc_paddf(struct gl_shader_cache *sc, const char *textf, ...)
{
va_list ap;
va_start(ap, textf);
bstr_xappend_vasprintf(sc, &sc->prelude_text, textf, ap);
va_end(ap);
}
static struct sc_uniform *find_uniform(struct gl_shader_cache *sc, static struct sc_uniform *find_uniform(struct gl_shader_cache *sc,
const char *name) const char *name)
{ {
@ -690,6 +715,29 @@ void gl_sc_uniform_tex_ui(struct gl_shader_cache *sc, char *name, GLuint texture
u->tex_handle = texture; u->tex_handle = texture;
} }
static const char *mp_image2D_type(GLenum access)
{
switch (access) {
case GL_WRITE_ONLY: return "writeonly image2D";
case GL_READ_ONLY: return "readonly image2D";
case GL_READ_WRITE: return "image2D";
default: abort();
}
}
void gl_sc_uniform_image2D(struct gl_shader_cache *sc, char *name, GLuint texture,
GLuint iformat, GLenum access)
{
struct sc_uniform *u = find_uniform(sc, name);
u->type = UT_i;
u->size = 1;
u->glsl_type = mp_image2D_type(access);
u->v.i[0] = sc->next_image_unit++;
u->img_handle = texture;
u->img_access = access;
u->img_iformat = iformat;
}
void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, GLfloat f) void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, GLfloat f)
{ {
struct sc_uniform *u = find_uniform(sc, name); struct sc_uniform *u = find_uniform(sc, name);
@ -809,6 +857,10 @@ static void update_uniform(GL *gl, struct sc_entry *e, struct sc_uniform *u, int
gl->ActiveTexture(GL_TEXTURE0 + u->v.i[0]); gl->ActiveTexture(GL_TEXTURE0 + u->v.i[0]);
gl->BindTexture(u->tex_target, u->tex_handle); gl->BindTexture(u->tex_target, u->tex_handle);
} }
if (u->img_handle) {
gl->BindImageTexture(u->v.i[0], u->img_handle, 0, GL_FALSE, 0,
u->img_access, u->img_iformat);
}
break; break;
case UT_f: case UT_f:
if (memcmp(un->v.f, u->v.f, sizeof(u->v.f)) != 0) { if (memcmp(un->v.f, u->v.f, sizeof(u->v.f)) != 0) {
@ -846,6 +898,16 @@ void gl_sc_set_cache_dir(struct gl_shader_cache *sc, struct mpv_global *global,
sc->global = global; sc->global = global;
} }
static const char *shader_typestr(GLenum type)
{
switch (type) {
case GL_VERTEX_SHADER: return "vertex";
case GL_FRAGMENT_SHADER: return "fragment";
case GL_COMPUTE_SHADER: return "compute";
default: abort();
}
}
static void compile_attach_shader(struct gl_shader_cache *sc, GLuint program, static void compile_attach_shader(struct gl_shader_cache *sc, GLuint program,
GLenum type, const char *source) GLenum type, const char *source)
{ {
@ -860,7 +922,7 @@ static void compile_attach_shader(struct gl_shader_cache *sc, GLuint program,
gl->GetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length); gl->GetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length);
int pri = status ? (log_length > 1 ? MSGL_V : MSGL_DEBUG) : MSGL_ERR; int pri = status ? (log_length > 1 ? MSGL_V : MSGL_DEBUG) : MSGL_ERR;
const char *typestr = type == GL_VERTEX_SHADER ? "vertex" : "fragment"; const char *typestr = shader_typestr(type);
if (mp_msg_test(sc->log, pri)) { if (mp_msg_test(sc->log, pri)) {
MP_MSG(sc, pri, "%s shader source:\n", typestr); MP_MSG(sc, pri, "%s shader source:\n", typestr);
mp_log_source(sc->log, pri, source); mp_log_source(sc->log, pri, source);
@ -911,23 +973,28 @@ static void link_shader(struct gl_shader_cache *sc, GLuint program)
sc->error_state = true; sc->error_state = true;
} }
static GLuint compile_program(struct gl_shader_cache *sc, const char *vertex, // either 'compute' or both 'vertex' and 'frag' are needed
const char *frag) static GLuint compile_program(struct gl_shader_cache *sc, struct bstr *vertex,
struct bstr *frag, struct bstr *compute)
{ {
GL *gl = sc->gl; GL *gl = sc->gl;
GLuint prog = gl->CreateProgram(); GLuint prog = gl->CreateProgram();
compile_attach_shader(sc, prog, GL_VERTEX_SHADER, vertex); if (compute)
compile_attach_shader(sc, prog, GL_FRAGMENT_SHADER, frag); compile_attach_shader(sc, prog, GL_COMPUTE_SHADER, compute->start);
for (int n = 0; sc->vertex_entries[n].name; n++) { if (vertex && frag) {
char *vname = mp_tprintf(80, "vertex_%s", sc->vertex_entries[n].name); compile_attach_shader(sc, prog, GL_VERTEX_SHADER, vertex->start);
gl->BindAttribLocation(prog, n, vname); compile_attach_shader(sc, prog, GL_FRAGMENT_SHADER, frag->start);
for (int n = 0; sc->vertex_entries[n].name; n++) {
char *vname = mp_tprintf(80, "vertex_%s", sc->vertex_entries[n].name);
gl->BindAttribLocation(prog, n, vname);
}
} }
link_shader(sc, prog); link_shader(sc, prog);
return prog; return prog;
} }
static GLuint load_program(struct gl_shader_cache *sc, const char *vertex, static GLuint load_program(struct gl_shader_cache *sc, struct bstr *vertex,
const char *frag) struct bstr *frag, struct bstr *compute)
{ {
GL *gl = sc->gl; GL *gl = sc->gl;
@ -941,7 +1008,7 @@ static GLuint load_program(struct gl_shader_cache *sc, const char *vertex,
mp_log_source(sc->log, MSGL_V, sc->text.start); mp_log_source(sc->log, MSGL_V, sc->text.start);
if (!sc->cache_dir || !sc->cache_dir[0] || !gl->ProgramBinary) if (!sc->cache_dir || !sc->cache_dir[0] || !gl->ProgramBinary)
return compile_program(sc, vertex, frag); return compile_program(sc, vertex, frag, compute);
// Try to load it from a disk cache, or compiling + saving it. // Try to load it from a disk cache, or compiling + saving it.
@ -954,8 +1021,12 @@ static GLuint load_program(struct gl_shader_cache *sc, const char *vertex,
abort(); abort();
av_sha_init(sha, 256); av_sha_init(sha, 256);
av_sha_update(sha, vertex, strlen(vertex) + 1); if (vertex)
av_sha_update(sha, frag, strlen(frag) + 1); av_sha_update(sha, vertex->start, vertex->len + 1);
if (frag)
av_sha_update(sha, frag->start, frag->len + 1);
if (compute)
av_sha_update(sha, compute->start, compute->len + 1);
// In theory, the array could change order, breaking old binaries. // In theory, the array could change order, breaking old binaries.
for (int n = 0; sc->vertex_entries[n].name; n++) { for (int n = 0; sc->vertex_entries[n].name; n++) {
@ -997,7 +1068,7 @@ static GLuint load_program(struct gl_shader_cache *sc, const char *vertex,
} }
if (!prog) { if (!prog) {
prog = compile_program(sc, vertex, frag); prog = compile_program(sc, vertex, frag, compute);
GLint size = 0; GLint size = 0;
gl->GetProgramiv(prog, GL_PROGRAM_BINARY_LENGTH, &size); gl->GetProgramiv(prog, GL_PROGRAM_BINARY_LENGTH, &size);
@ -1040,7 +1111,8 @@ static GLuint load_program(struct gl_shader_cache *sc, const char *vertex,
// The return value is a mp_pass_perf containing performance metrics for the // The return value is a mp_pass_perf containing performance metrics for the
// execution of the generated shader. (Note: execution is measured up until // execution of the generated shader. (Note: execution is measured up until
// the corresponding gl_sc_reset call) // the corresponding gl_sc_reset call)
struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc) // 'type' can be either GL_FRAGMENT_SHADER or GL_COMPUTE_SHADER
struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc, GLenum type)
{ {
GL *gl = sc->gl; GL *gl = sc->gl;
@ -1065,81 +1137,106 @@ struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc)
if (gl->mpgl_caps & MPGL_CAP_3D_TEX) if (gl->mpgl_caps & MPGL_CAP_3D_TEX)
ADD(header, "precision mediump sampler3D;\n"); ADD(header, "precision mediump sampler3D;\n");
} }
ADD_BSTR(header, sc->prelude_text);
if (gl->glsl_version >= 130) {
ADD(header, "#define texture1D texture\n");
ADD(header, "#define texture3D texture\n");
} else {
ADD(header, "#define texture texture2D\n");
}
// Additional helpers.
ADD(header, "#define LUT_POS(x, lut_size)"
" mix(0.5 / (lut_size), 1.0 - 0.5 / (lut_size), (x))\n");
char *vert_in = gl->glsl_version >= 130 ? "in" : "attribute"; char *vert_in = gl->glsl_version >= 130 ? "in" : "attribute";
char *vert_out = gl->glsl_version >= 130 ? "out" : "varying"; char *vert_out = gl->glsl_version >= 130 ? "out" : "varying";
char *frag_in = gl->glsl_version >= 130 ? "in" : "varying"; char *frag_in = gl->glsl_version >= 130 ? "in" : "varying";
// vertex shader: we don't use the vertex shader, so just setup a dummy, struct bstr *vert = NULL, *frag = NULL, *comp = NULL;
// which passes through the vertex array attributes.
bstr *vert_head = &sc->tmp[1]; if (type == GL_FRAGMENT_SHADER) {
ADD_BSTR(vert_head, *header); // vertex shader: we don't use the vertex shader, so just setup a
bstr *vert_body = &sc->tmp[2]; // dummy, which passes through the vertex array attributes.
ADD(vert_body, "void main() {\n"); bstr *vert_head = &sc->tmp[1];
bstr *frag_vaos = &sc->tmp[3]; ADD_BSTR(vert_head, *header);
for (int n = 0; sc->vertex_entries[n].name; n++) { bstr *vert_body = &sc->tmp[2];
const struct gl_vao_entry *e = &sc->vertex_entries[n]; ADD(vert_body, "void main() {\n");
const char *glsl_type = vao_glsl_type(e); bstr *frag_vaos = &sc->tmp[3];
if (strcmp(e->name, "position") == 0) { for (int n = 0; sc->vertex_entries[n].name; n++) {
// setting raster pos. requires setting gl_Position magic variable const struct gl_vao_entry *e = &sc->vertex_entries[n];
assert(e->num_elems == 2 && e->type == GL_FLOAT); const char *glsl_type = vao_glsl_type(e);
ADD(vert_head, "%s vec2 vertex_position;\n", vert_in); if (strcmp(e->name, "position") == 0) {
ADD(vert_body, "gl_Position = vec4(vertex_position, 1.0, 1.0);\n"); // setting raster pos. requires setting gl_Position magic variable
} else { assert(e->num_elems == 2 && e->type == GL_FLOAT);
ADD(vert_head, "%s %s vertex_%s;\n", vert_in, glsl_type, e->name); ADD(vert_head, "%s vec2 vertex_position;\n", vert_in);
ADD(vert_head, "%s %s %s;\n", vert_out, glsl_type, e->name); ADD(vert_body, "gl_Position = vec4(vertex_position, 1.0, 1.0);\n");
ADD(vert_body, "%s = vertex_%s;\n", e->name, e->name); } else {
ADD(frag_vaos, "%s %s %s;\n", frag_in, glsl_type, e->name); ADD(vert_head, "%s %s vertex_%s;\n", vert_in, glsl_type, e->name);
ADD(vert_head, "%s %s %s;\n", vert_out, glsl_type, e->name);
ADD(vert_body, "%s = vertex_%s;\n", e->name, e->name);
ADD(frag_vaos, "%s %s %s;\n", frag_in, glsl_type, e->name);
}
} }
} ADD(vert_body, "}\n");
ADD(vert_body, "}\n"); vert = vert_head;
bstr *vert = vert_head; ADD_BSTR(vert, *vert_body);
ADD_BSTR(vert, *vert_body);
// fragment shader; still requires adding used uniforms and VAO elements // fragment shader; still requires adding used uniforms and VAO elements
bstr *frag = &sc->tmp[4]; frag = &sc->tmp[4];
ADD_BSTR(frag, *header); ADD_BSTR(frag, *header);
if (gl->glsl_version >= 130) { if (gl->glsl_version >= 130)
ADD(frag, "#define texture1D texture\n"); ADD(frag, "out vec4 out_color;\n");
ADD(frag, "#define texture3D texture\n"); ADD_BSTR(frag, *frag_vaos);
ADD(frag, "out vec4 out_color;\n"); for (int n = 0; n < sc->num_uniforms; n++) {
} else { struct sc_uniform *u = &sc->uniforms[n];
ADD(frag, "#define texture texture2D\n"); ADD(frag, "uniform %s %s;\n", u->glsl_type, u->name);
} }
ADD_BSTR(frag, *frag_vaos);
for (int n = 0; n < sc->num_uniforms; n++) {
struct sc_uniform *u = &sc->uniforms[n];
ADD(frag, "uniform %s %s;\n", u->glsl_type, u->name);
}
// Additional helpers. ADD_BSTR(frag, sc->prelude_text);
ADD(frag, "#define LUT_POS(x, lut_size)"
" mix(0.5 / (lut_size), 1.0 - 0.5 / (lut_size), (x))\n");
// custom shader header
if (sc->header_text.len) {
ADD(frag, "// header\n");
ADD_BSTR(frag, sc->header_text); ADD_BSTR(frag, sc->header_text);
ADD(frag, "// body\n");
ADD(frag, "void main() {\n");
// we require _all_ frag shaders to write to a "vec4 color"
ADD(frag, "vec4 color = vec4(0.0, 0.0, 0.0, 1.0);\n");
ADD_BSTR(frag, sc->text);
if (gl->glsl_version >= 130) {
ADD(frag, "out_color = color;\n");
} else {
ADD(frag, "gl_FragColor = color;\n");
}
ADD(frag, "}\n");
} }
ADD(frag, "void main() {\n");
// we require _all_ frag shaders to write to a "vec4 color" if (type == GL_COMPUTE_SHADER) {
ADD(frag, "vec4 color = vec4(0.0, 0.0, 0.0, 1.0);\n"); comp = &sc->tmp[4];
ADD_BSTR(frag, sc->text); ADD_BSTR(comp, *header);
if (gl->glsl_version >= 130) {
ADD(frag, "out_color = color;\n"); for (int n = 0; n < sc->num_uniforms; n++) {
} else { struct sc_uniform *u = &sc->uniforms[n];
ADD(frag, "gl_FragColor = color;\n"); ADD(comp, "uniform %s %s;\n", u->glsl_type, u->name);
}
ADD_BSTR(comp, sc->prelude_text);
ADD_BSTR(comp, sc->header_text);
ADD(comp, "void main() {\n");
ADD(comp, "vec4 color = vec4(0.0, 0.0, 0.0, 1.0);\n"); // convenience
ADD_BSTR(comp, sc->text);
ADD(comp, "}\n");
} }
ADD(frag, "}\n");
struct sc_entry *entry = NULL; struct sc_entry *entry = NULL;
for (int n = 0; n < sc->num_entries; n++) { for (int n = 0; n < sc->num_entries; n++) {
struct sc_entry *cur = &sc->entries[n]; struct sc_entry *cur = &sc->entries[n];
if (bstr_equals(cur->frag, *frag) && bstr_equals(cur->vert, *vert)) { if (frag && !bstr_equals(cur->frag, *frag))
entry = cur; continue;
break; if (vert && !bstr_equals(cur->vert, *vert))
} continue;
if (comp && !bstr_equals(cur->comp, *comp))
continue;
entry = cur;
break;
} }
if (!entry) { if (!entry) {
if (sc->num_entries == SC_MAX_ENTRIES) if (sc->num_entries == SC_MAX_ENTRIES)
@ -1147,14 +1244,15 @@ struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc)
MP_TARRAY_GROW(sc, sc->entries, sc->num_entries); MP_TARRAY_GROW(sc, sc->entries, sc->num_entries);
entry = &sc->entries[sc->num_entries++]; entry = &sc->entries[sc->num_entries++];
*entry = (struct sc_entry){ *entry = (struct sc_entry){
.vert = bstrdup(NULL, *vert), .vert = vert ? bstrdup(NULL, *vert) : (struct bstr){0},
.frag = bstrdup(NULL, *frag), .frag = frag ? bstrdup(NULL, *frag) : (struct bstr){0},
.comp = comp ? bstrdup(NULL, *comp) : (struct bstr){0},
.timer = gl_timer_create(gl), .timer = gl_timer_create(gl),
}; };
} }
// build vertex shader from vao and cache the locations of the uniform variables // build shader program and cache the locations of the uniform variables
if (!entry->gl_shader) { if (!entry->gl_shader) {
entry->gl_shader = load_program(sc, vert->start, frag->start); entry->gl_shader = load_program(sc, vert, frag, comp);
entry->num_uniforms = 0; entry->num_uniforms = 0;
for (int n = 0; n < sc->num_uniforms; n++) { for (int n = 0; n < sc->num_uniforms; n++) {
struct sc_cached_uniform un = { struct sc_cached_uniform un = {

View File

@ -66,6 +66,7 @@ bool fbotex_change(struct fbotex *fbo, GL *gl, struct mp_log *log, int w, int h,
#define FBOTEX_FUZZY_W 1 #define FBOTEX_FUZZY_W 1
#define FBOTEX_FUZZY_H 2 #define FBOTEX_FUZZY_H 2
#define FBOTEX_FUZZY (FBOTEX_FUZZY_W | FBOTEX_FUZZY_H) #define FBOTEX_FUZZY (FBOTEX_FUZZY_W | FBOTEX_FUZZY_H)
#define FBOTEX_COMPUTE 4
void fbotex_set_filter(struct fbotex *fbo, GLenum gl_filter); void fbotex_set_filter(struct fbotex *fbo, GLenum gl_filter);
void fbotex_invalidate(struct fbotex *fbo); void fbotex_invalidate(struct fbotex *fbo);
@ -141,9 +142,13 @@ void gl_sc_hadd(struct gl_shader_cache *sc, const char *text);
void gl_sc_haddf(struct gl_shader_cache *sc, const char *textf, ...) void gl_sc_haddf(struct gl_shader_cache *sc, const char *textf, ...)
PRINTF_ATTRIBUTE(2, 3); PRINTF_ATTRIBUTE(2, 3);
void gl_sc_hadd_bstr(struct gl_shader_cache *sc, struct bstr text); void gl_sc_hadd_bstr(struct gl_shader_cache *sc, struct bstr text);
void gl_sc_paddf(struct gl_shader_cache *sc, const char *textf, ...)
PRINTF_ATTRIBUTE(2, 3);
void gl_sc_uniform_tex(struct gl_shader_cache *sc, char *name, GLenum target, void gl_sc_uniform_tex(struct gl_shader_cache *sc, char *name, GLenum target,
GLuint texture); GLuint texture);
void gl_sc_uniform_tex_ui(struct gl_shader_cache *sc, char *name, GLuint texture); void gl_sc_uniform_tex_ui(struct gl_shader_cache *sc, char *name, GLuint texture);
void gl_sc_uniform_image2D(struct gl_shader_cache *sc, char *name, GLuint texture,
GLuint iformat, GLenum access);
void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, GLfloat f); void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, GLfloat f);
void gl_sc_uniform_i(struct gl_shader_cache *sc, char *name, GLint f); void gl_sc_uniform_i(struct gl_shader_cache *sc, char *name, GLint f);
void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, GLfloat f[2]); void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, GLfloat f[2]);
@ -156,7 +161,7 @@ void gl_sc_set_vertex_format(struct gl_shader_cache *sc,
const struct gl_vao_entry *entries, const struct gl_vao_entry *entries,
size_t vertex_size); size_t vertex_size);
void gl_sc_enable_extension(struct gl_shader_cache *sc, char *name); void gl_sc_enable_extension(struct gl_shader_cache *sc, char *name);
struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc); struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc, GLenum type);
void gl_sc_draw_data(struct gl_shader_cache *sc, GLenum prim, void *ptr, void gl_sc_draw_data(struct gl_shader_cache *sc, GLenum prim, void *ptr,
size_t num); size_t num);
void gl_sc_reset(struct gl_shader_cache *sc); void gl_sc_reset(struct gl_shader_cache *sc);

View File

@ -260,6 +260,7 @@ struct gl_video {
struct img_tex pass_tex[TEXUNIT_VIDEO_NUM]; struct img_tex pass_tex[TEXUNIT_VIDEO_NUM];
int pass_tex_num; int pass_tex_num;
int texture_w, texture_h; int texture_w, texture_h;
int compute_w, compute_h; // presence indicates the use of a compute shader
struct gl_transform texture_offset; // texture transform without rotation struct gl_transform texture_offset; // texture transform without rotation
int components; int components;
bool use_linear; bool use_linear;
@ -446,6 +447,7 @@ static void gl_video_setup_hooks(struct gl_video *p);
#define GLSL(x) gl_sc_add(p->sc, #x "\n"); #define GLSL(x) gl_sc_add(p->sc, #x "\n");
#define GLSLF(...) gl_sc_addf(p->sc, __VA_ARGS__) #define GLSLF(...) gl_sc_addf(p->sc, __VA_ARGS__)
#define GLSLHF(...) gl_sc_haddf(p->sc, __VA_ARGS__) #define GLSLHF(...) gl_sc_haddf(p->sc, __VA_ARGS__)
#define PRELUDE(...) gl_sc_paddf(p->sc, __VA_ARGS__)
static struct bstr load_cached_file(struct gl_video *p, const char *path) static struct bstr load_cached_file(struct gl_video *p, const char *path)
{ {
@ -1107,6 +1109,7 @@ static void pass_prepare_src_tex(struct gl_video *p)
char *texture_name = mp_tprintf(32, "texture%d", n); char *texture_name = mp_tprintf(32, "texture%d", n);
char *texture_size = mp_tprintf(32, "texture_size%d", n); char *texture_size = mp_tprintf(32, "texture_size%d", n);
char *texture_rot = mp_tprintf(32, "texture_rot%d", n); char *texture_rot = mp_tprintf(32, "texture_rot%d", n);
char *texture_off = mp_tprintf(32, "texture_off%d", n);
char *pixel_size = mp_tprintf(32, "pixel_size%d", n); char *pixel_size = mp_tprintf(32, "pixel_size%d", n);
if (gl_is_integer_format(s->gl_format)) { if (gl_is_integer_format(s->gl_format)) {
@ -1121,11 +1124,80 @@ static void pass_prepare_src_tex(struct gl_video *p)
} }
gl_sc_uniform_vec2(sc, texture_size, f); gl_sc_uniform_vec2(sc, texture_size, f);
gl_sc_uniform_mat2(sc, texture_rot, true, (float *)s->transform.m); gl_sc_uniform_mat2(sc, texture_rot, true, (float *)s->transform.m);
gl_sc_uniform_vec2(sc, texture_off, (float *)s->transform.t);
gl_sc_uniform_vec2(sc, pixel_size, (GLfloat[]){1.0f / f[0], gl_sc_uniform_vec2(sc, pixel_size, (GLfloat[]){1.0f / f[0],
1.0f / f[1]}); 1.0f / f[1]});
} }
} }
// Update the compute work group size requirements for the current shader.
// Since we assume that all shaders can work with bigger working groups, just
// never smaller ones, this effectively becomes the maximum of all size
// requirements
static void compute_size_minimum(struct gl_video *p, int bw, int bh)
{
p->compute_w = MPMAX(p->compute_w, bw);
p->compute_h = MPMAX(p->compute_h, bh);
}
// w/h: the width/height of the compute shader's operating domain (e.g. the
// target target that needs to be written, or the source texture that needs to
// be reduced)
// bw/bh: the width/height of the block (working group), which is tiled over
// w/h as necessary
static void dispatch_compute(struct gl_video *p, int w, int h, int bw, int bh)
{
GL *gl = p->gl;
PRELUDE("layout (local_size_x = %d, local_size_y = %d) in;\n", bw, bh);
pass_prepare_src_tex(p);
gl_sc_set_vertex_format(p->sc, vertex_vao, sizeof(struct vertex));
// Since we don't actually have vertices, we pretend for convenience
// reasons that we do and calculate the right texture coordinates based on
// the output sample ID
gl_sc_uniform_vec2(p->sc, "out_scale", (GLfloat[2]){ 1.0 / w, 1.0 / h });
PRELUDE("#define outcoord(id) (out_scale * (vec2(id) + vec2(0.5)))\n");
for (int n = 0; n < TEXUNIT_VIDEO_NUM; n++) {
struct img_tex *s = &p->pass_tex[n];
if (!s->gl_tex)
continue;
// We need to rescale the coordinates to the true texture size
char tex_scale[32];
snprintf(tex_scale, sizeof(tex_scale), "tex_scale%d", n);
gl_sc_uniform_vec2(p->sc, tex_scale, (GLfloat[2]){
(float)s->w / s->tex_w,
(float)s->h / s->tex_h,
});
PRELUDE("#define texcoord%d_raw(id) (tex_scale%d * outcoord(id))\n", n, n);
PRELUDE("#define texcoord%d_rot(id) (texture_rot%d * texcoord%d_raw(id) + "
"pixel_size%d * texture_off%d)\n", n, n, n, n, n);
// Clamp the texture coordinates to prevent sampling out-of-bounds in
// threads that exceed the requested width/height
PRELUDE("#define texmap%d(id) min(texcoord%d_rot(id), vec2(1.0))\n", n, n);
PRELUDE("const vec2 texcoord%d = texmap%d(gl_GlobalInvocationID);\n", n, n);
}
pass_record(p, gl_sc_generate(p->sc, GL_COMPUTE_SHADER));
// always round up when dividing to make sure we don't leave off a part of
// the image
int num_x = (w + bw - 1) / bw,
num_y = (h + bh - 1) / bh;
gl->DispatchCompute(num_x, num_y, 1);
gl_sc_reset(p->sc);
debug_check_gl(p, "after dispatching compute shader");
memset(&p->pass_tex, 0, sizeof(p->pass_tex));
p->pass_tex_num = 0;
}
static void render_pass_quad(struct gl_video *p, int vp_w, int vp_h, static void render_pass_quad(struct gl_video *p, int vp_w, int vp_h,
const struct mp_rect *dst) const struct mp_rect *dst)
{ {
@ -1169,7 +1241,7 @@ static void finish_pass_direct(struct gl_video *p, GLint fbo, int vp_w, int vp_h
GL *gl = p->gl; GL *gl = p->gl;
pass_prepare_src_tex(p); pass_prepare_src_tex(p);
gl_sc_set_vertex_format(p->sc, vertex_vao, sizeof(struct vertex)); gl_sc_set_vertex_format(p->sc, vertex_vao, sizeof(struct vertex));
pass_record(p, gl_sc_generate(p->sc)); pass_record(p, gl_sc_generate(p->sc, GL_FRAGMENT_SHADER));
gl->BindFramebuffer(GL_FRAMEBUFFER, fbo); gl->BindFramebuffer(GL_FRAMEBUFFER, fbo);
render_pass_quad(p, vp_w, vp_h, dst); render_pass_quad(p, vp_w, vp_h, dst);
gl->BindFramebuffer(GL_FRAMEBUFFER, 0); gl->BindFramebuffer(GL_FRAMEBUFFER, 0);
@ -1187,10 +1259,23 @@ static void finish_pass_direct(struct gl_video *p, GLint fbo, int vp_w, int vp_h
static void finish_pass_fbo(struct gl_video *p, struct fbotex *dst_fbo, static void finish_pass_fbo(struct gl_video *p, struct fbotex *dst_fbo,
int w, int h, int flags) int w, int h, int flags)
{ {
bool use_compute = p->compute_w > 0 && p->compute_h > 0;
if (use_compute)
flags |= FBOTEX_COMPUTE;
fbotex_change(dst_fbo, p->gl, p->log, w, h, p->opts.fbo_format, flags); fbotex_change(dst_fbo, p->gl, p->log, w, h, p->opts.fbo_format, flags);
finish_pass_direct(p, dst_fbo->fbo, dst_fbo->rw, dst_fbo->rh, if (use_compute) {
&(struct mp_rect){0, 0, w, h}); gl_sc_uniform_image2D(p->sc, "out_image", dst_fbo->texture,
dst_fbo->iformat, GL_WRITE_ONLY);
GLSL(imageStore(out_image, ivec2(gl_GlobalInvocationID), color);)
dispatch_compute(p, w, h, p->compute_w, p->compute_h);
} else {
finish_pass_direct(p, dst_fbo->fbo, dst_fbo->rw, dst_fbo->rh,
&(struct mp_rect){0, 0, w, h});
}
p->compute_w = p->compute_h = 0;
} }
static const char *get_tex_swizzle(struct img_tex *img) static const char *get_tex_swizzle(struct img_tex *img)
@ -2479,7 +2564,7 @@ static void pass_draw_osd(struct gl_video *p, int draw_flags, double pts,
pass_colormanage(p, csp_srgb, true); pass_colormanage(p, csp_srgb, true);
} }
pass_record(p, gl_sc_generate(p->sc)); pass_record(p, gl_sc_generate(p->sc, GL_FRAGMENT_SHADER));
mpgl_osd_draw_finish(p->osd, vp_w, vp_h, n, p->sc); mpgl_osd_draw_finish(p->osd, vp_w, vp_h, n, p->sc);
gl_sc_reset(p->sc); gl_sc_reset(p->sc);
} }