2015-01-28 18:40:46 +00:00
|
|
|
/*
|
|
|
|
* This file is part of mpv.
|
|
|
|
* Parts based on MPlayer code by Reimar Döffinger.
|
|
|
|
*
|
2016-01-19 17:36:34 +00:00
|
|
|
* mpv is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License as published by the Free Software Foundation; either
|
|
|
|
* version 2.1 of the License, or (at your option) any later version.
|
2015-01-28 18:40:46 +00:00
|
|
|
*
|
|
|
|
* mpv is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
2016-01-19 17:36:34 +00:00
|
|
|
* GNU Lesser General Public License for more details.
|
2015-01-28 18:40:46 +00:00
|
|
|
*
|
2016-01-19 17:36:34 +00:00
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
|
|
* License along with mpv. If not, see <http://www.gnu.org/licenses/>.
|
2015-01-28 18:40:46 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef MP_GL_UTILS_
|
|
|
|
#define MP_GL_UTILS_
|
|
|
|
|
2015-08-29 02:12:56 +00:00
|
|
|
#include "common.h"
|
2015-01-28 18:40:46 +00:00
|
|
|
|
|
|
|
struct mp_log;
|
|
|
|
|
|
|
|
void glCheckError(GL *gl, struct mp_log *log, const char *info);
|
|
|
|
|
|
|
|
void glUploadTex(GL *gl, GLenum target, GLenum format, GLenum type,
|
|
|
|
const void *dataptr, int stride,
|
|
|
|
int x, int y, int w, int h, int slice);
|
|
|
|
void glClearTex(GL *gl, GLenum target, GLenum format, GLenum type,
|
|
|
|
int x, int y, int w, int h, uint8_t val, void **scratch);
|
|
|
|
|
|
|
|
mp_image_t *glGetWindowScreenshot(GL *gl);
|
|
|
|
|
2015-09-10 18:52:50 +00:00
|
|
|
const char* mp_sampler_type(GLenum texture_target);
|
|
|
|
|
2015-01-28 18:40:46 +00:00
|
|
|
// print a multi line string with line numbers (e.g. for shader sources)
|
|
|
|
// log, lev: module and log level, as in mp_msg()
|
|
|
|
void mp_log_source(struct mp_log *log, int lev, const char *src);
|
|
|
|
|
2015-01-28 21:22:29 +00:00
|
|
|
struct gl_vao_entry {
|
2015-01-29 16:19:01 +00:00
|
|
|
// used for shader / glBindAttribLocation
|
2015-01-28 21:22:29 +00:00
|
|
|
const char *name;
|
2015-01-29 16:19:01 +00:00
|
|
|
// glVertexAttribPointer() arguments
|
|
|
|
int num_elems; // size (number of elements)
|
2015-01-28 21:22:29 +00:00
|
|
|
GLenum type;
|
|
|
|
bool normalized;
|
|
|
|
int offset;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct gl_vao {
|
|
|
|
GL *gl;
|
2015-01-29 16:19:01 +00:00
|
|
|
GLuint vao; // the VAO object, or 0 if unsupported by driver
|
|
|
|
GLuint buffer; // GL_ARRAY_BUFFER used for the data
|
|
|
|
int stride; // size of each element (interleaved elements are assumed)
|
2015-01-28 21:22:29 +00:00
|
|
|
const struct gl_vao_entry *entries;
|
|
|
|
};
|
|
|
|
|
|
|
|
void gl_vao_init(struct gl_vao *vao, GL *gl, int stride,
|
|
|
|
const struct gl_vao_entry *entries);
|
|
|
|
void gl_vao_uninit(struct gl_vao *vao);
|
|
|
|
void gl_vao_bind(struct gl_vao *vao);
|
|
|
|
void gl_vao_unbind(struct gl_vao *vao);
|
2015-01-29 16:19:01 +00:00
|
|
|
void gl_vao_draw_data(struct gl_vao *vao, GLenum prim, void *ptr, size_t num);
|
2015-01-28 21:22:29 +00:00
|
|
|
|
2015-01-29 13:58:26 +00:00
|
|
|
struct fbotex {
|
|
|
|
GL *gl;
|
|
|
|
GLuint fbo;
|
|
|
|
GLuint texture;
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
GLenum iformat;
|
|
|
|
GLenum tex_filter;
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
int rw, rh; // real (texture) size
|
|
|
|
int lw, lh; // logical (configured) size
|
2015-01-29 13:58:26 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
bool fbotex_init(struct fbotex *fbo, GL *gl, struct mp_log *log, int w, int h,
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
GLenum iformat);
|
2015-01-29 13:58:26 +00:00
|
|
|
void fbotex_uninit(struct fbotex *fbo);
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
bool fbotex_change(struct fbotex *fbo, GL *gl, struct mp_log *log, int w, int h,
|
|
|
|
GLenum iformat, int flags);
|
|
|
|
#define FBOTEX_FUZZY_W 1
|
|
|
|
#define FBOTEX_FUZZY_H 2
|
2015-03-25 22:06:46 +00:00
|
|
|
#define FBOTEX_FUZZY (FBOTEX_FUZZY_W | FBOTEX_FUZZY_H)
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
void fbotex_set_filter(struct fbotex *fbo, GLenum gl_filter);
|
2015-01-29 13:58:26 +00:00
|
|
|
|
2015-03-13 20:14:18 +00:00
|
|
|
// A 3x2 matrix, with the translation part separate.
|
|
|
|
struct gl_transform {
|
2016-03-28 14:16:09 +00:00
|
|
|
// row-major, e.g. in mathematical notation:
|
|
|
|
// | m[0][0] m[0][1] |
|
|
|
|
// | m[1][0] m[1][1] |
|
2015-03-13 20:14:18 +00:00
|
|
|
float m[2][2];
|
|
|
|
float t[2];
|
|
|
|
};
|
|
|
|
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
static const struct gl_transform identity_trans = {
|
|
|
|
.m = {{1.0, 0.0}, {0.0, 1.0}},
|
|
|
|
.t = {0.0, 0.0},
|
|
|
|
};
|
|
|
|
|
2015-03-13 20:14:18 +00:00
|
|
|
void gl_transform_ortho(struct gl_transform *t, float x0, float x1,
|
|
|
|
float y0, float y1);
|
2015-01-29 13:58:26 +00:00
|
|
|
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
// This treats m as an affine transformation, in other words m[2][n] gets
|
|
|
|
// added to the output.
|
2015-03-13 20:14:18 +00:00
|
|
|
static inline void gl_transform_vec(struct gl_transform t, float *x, float *y)
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
{
|
|
|
|
float vx = *x, vy = *y;
|
2016-03-28 14:16:09 +00:00
|
|
|
*x = vx * t.m[0][0] + vy * t.m[0][1] + t.t[0];
|
|
|
|
*y = vx * t.m[1][0] + vy * t.m[1][1] + t.t[1];
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
}
|
|
|
|
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
struct mp_rect_f {
|
|
|
|
float x0, y0, x1, y1;
|
|
|
|
};
|
|
|
|
|
2015-03-13 20:14:18 +00:00
|
|
|
static inline void gl_transform_rect(struct gl_transform t, struct mp_rect_f *r)
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
{
|
2015-03-13 20:14:18 +00:00
|
|
|
gl_transform_vec(t, &r->x0, &r->y0);
|
|
|
|
gl_transform_vec(t, &r->x1, &r->y1);
|
vo_opengl: refactor shader generation (part 2)
This adds stuff related to gamma, linear light, sigmoid, BT.2020-CL,
etc, as well as color management. Also adds a new gamma function (gamma22).
This adds new parameters to configure the CMS settings, in particular
letting us target simple colorspaces without requiring usage of a 3DLUT.
This adds smoothmotion. Mostly working, but it's still sensitive to
timing issues. It's based on an actual queue now, but the queue size
is kept small to avoid larger amounts of latency.
Also makes “upscale before blending” the default strategy.
This is justified because the "render after blending" thing doesn't seme
to work consistently any way (introduces stutter due to the way vsync
timing works, or something), so this behavior is a bit closer to master
and makes pausing/unpausing less weird/jumpy.
This adds the remaining scalers, including bicubic_fast, sharpen3,
sharpen5, polar filters and antiringing. Apparently, sharpen3/5 also
consult scale-param1, which was undocumented in master.
This also implements cropping and chroma transformation, plus
rotation/flipping. These are inherently part of the same logic, although
it's a bit rough around the edges in some case, mainly due to the fallback
code paths (for bilinear scaling without indirection).
2015-03-12 21:18:16 +00:00
|
|
|
}
|
|
|
|
|
vo_opengl: refactor pass_read_video and texture binding
This is a pretty major rewrite of the internal texture binding
mechanic, which makes it more flexible.
In general, the difference between the old and current approaches is
that now, all texture description is held in a struct img_tex and only
explicitly bound with pass_bind. (Once bound, a texture unit is assumed
to be set in stone and no longer tied to the img_tex)
This approach makes the code inside pass_read_video significantly more
flexible and cuts down on the number of weird special cases and
spaghetti logic.
It also has some improvements, e.g. cutting down greatly on the number
of unnecessary conversion passes inside pass_read_video (which was
previously mostly done to cope with the fact that the alternative would
have resulted in a combinatorial explosion of code complexity).
Some other notable changes (and potential improvements):
- texture expansion is now *always* handled in pass_read_video, and the
colormatrix never does this anymore. (Which means the code could
probably be removed from the colormatrix generation logic, modulo some
other VOs)
- struct fbo_tex now stores both its "physical" and "logical"
(configured) size, which cuts down on the amount of width/height
baggage on some function calls
- vo_opengl can now technically support textures with different bit
depths (e.g. 10 bit luma, 8 bit chroma) - but the APIs it queries
inside img_format.c doesn't export this (nor does ffmpeg support it,
really) so the status quo of using the same tex_mul for all planes is
kept.
- dumb_mode is now only needed because of the indirect_fbo being in the
main rendering pipeline. If we reintroduce p->use_indirect and thread
a transform through the entire program this could be skipped where
unnecessary, allowing for the removal of dumb_mode. But I'm not sure
how to do this in a clean way. (Which is part of why it got introduced
to begin with)
- It would be trivial to resurrect source-shader now (it would just be
one extra 'if' inside pass_read_video).
2016-03-05 10:29:19 +00:00
|
|
|
static inline bool gl_transform_eq(struct gl_transform a, struct gl_transform b)
|
|
|
|
{
|
|
|
|
for (int x = 0; x < 2; x++) {
|
|
|
|
for (int y = 0; y < 2; y++) {
|
|
|
|
if (a.m[x][y] != b.m[x][y])
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return a.t[0] == b.t[0] && a.t[1] == b.t[1];
|
|
|
|
}
|
|
|
|
|
2015-10-26 22:43:48 +00:00
|
|
|
void gl_transform_trans(struct gl_transform t, struct gl_transform *x);
|
|
|
|
|
2015-01-29 14:50:21 +00:00
|
|
|
void gl_set_debug_logger(GL *gl, struct mp_log *log);
|
|
|
|
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
struct gl_shader_cache;
|
|
|
|
|
2015-09-23 20:13:03 +00:00
|
|
|
struct gl_shader_cache *gl_sc_create(GL *gl, struct mp_log *log);
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
void gl_sc_destroy(struct gl_shader_cache *sc);
|
|
|
|
void gl_sc_add(struct gl_shader_cache *sc, const char *text);
|
|
|
|
void gl_sc_addf(struct gl_shader_cache *sc, const char *textf, ...);
|
2015-03-27 12:27:40 +00:00
|
|
|
void gl_sc_hadd(struct gl_shader_cache *sc, const char *text);
|
2015-09-05 15:39:27 +00:00
|
|
|
void gl_sc_haddf(struct gl_shader_cache *sc, const char *textf, ...);
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
void gl_sc_uniform_sampler(struct gl_shader_cache *sc, char *name, GLenum target,
|
|
|
|
int unit);
|
2016-01-26 19:47:32 +00:00
|
|
|
void gl_sc_uniform_sampler_ui(struct gl_shader_cache *sc, char *name, int unit);
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, GLfloat f);
|
2015-03-27 12:27:40 +00:00
|
|
|
void gl_sc_uniform_i(struct gl_shader_cache *sc, char *name, GLint f);
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, GLfloat f[2]);
|
|
|
|
void gl_sc_uniform_vec3(struct gl_shader_cache *sc, char *name, GLfloat f[3]);
|
|
|
|
void gl_sc_uniform_mat2(struct gl_shader_cache *sc, char *name,
|
|
|
|
bool transpose, GLfloat *v);
|
|
|
|
void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name,
|
|
|
|
bool transpose, GLfloat *v);
|
vo_opengl: implement NNEDI3 prescaler
Implement NNEDI3, a neural network based deinterlacer.
The shader is reimplemented in GLSL and supports both 8x4 and 8x6
sampling window now. This allows the shader to be licensed
under LGPL2.1 so that it can be used in mpv.
The current implementation supports uploading the NN weights (up to
51kb with placebo setting) in two different way, via uniform buffer
object or hard coding into shader source. UBO requires OpenGL 3.1,
which only guarantee 16kb per block. But I find that 64kb seems to be
a default setting for recent card/driver (which nnedi3 is targeting),
so I think we're fine here (with default nnedi3 setting the size of
weights is 9kb). Hard-coding into shader requires OpenGL 3.3, for the
"intBitsToFloat()" built-in function. This is necessary to precisely
represent these weights in GLSL. I tried several human readable
floating point number format (with really high precision as for
single precision float), but for some reason they are not working
nicely, bad pixels (with NaN value) could be produced with some
weights set.
We could also add support to upload these weights with texture, just
for compatibility reason (etc. upscaling a still image with a low end
graphics card). But as I tested, it's rather slow even with 1D
texture (we probably had to use 2D texture due to dimension size
limitation). Since there is always better choice to do NNEDI3
upscaling for still image (vapoursynth plugin), it's not implemented
in this commit. If this turns out to be a popular demand from the
user, it should be easy to add it later.
For those who wants to optimize the performance a bit further, the
bottleneck seems to be:
1. overhead to upload and access these weights, (in particular,
the shader code will be regenerated for each frame, it's on CPU
though).
2. "dot()" performance in the main loop.
3. "exp()" performance in the main loop, there are various fast
implementation with some bit tricks (probably with the help of the
intBitsToFloat function).
The code is tested with nvidia card and driver (355.11), on Linux.
Closes #2230
2015-10-28 01:37:55 +00:00
|
|
|
void gl_sc_uniform_buffer(struct gl_shader_cache *sc, char *name,
|
|
|
|
const char *text, int binding);
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
void gl_sc_set_vao(struct gl_shader_cache *sc, struct gl_vao *vao);
|
2015-11-09 15:24:01 +00:00
|
|
|
void gl_sc_enable_extension(struct gl_shader_cache *sc, char *name);
|
vo_opengl: refactor shader generation (part 1)
The basic idea is to use dynamically generated shaders instead of a
single monolithic file + a ton of ifdefs. Instead of having to setup
every aspect of it separately (like compiling shaders, setting uniforms,
perfoming the actual rendering steps, the GLSL parts), we generate the
GLSL on the fly, and perform the rendering at the same time. The GLSL
is regenerated every frame, but the actual compiled OpenGL-level shaders
are cached, which makes it fast again. Almost all logic can be in a
single place.
The new code is significantly more flexible, which allows us to improve
the code clarity, performance and add more features easily.
This commit is incomplete. It drops almost all previous code, and
readds only the most important things (some of them actually buggy).
The next commit will complete it - it's separate to preserve authorship
information.
2015-03-12 20:57:54 +00:00
|
|
|
void gl_sc_gen_shader_and_reset(struct gl_shader_cache *sc);
|
|
|
|
void gl_sc_reset(struct gl_shader_cache *sc);
|
|
|
|
|
2015-01-28 18:40:46 +00:00
|
|
|
#endif
|