mirror of https://github.com/mpv-player/mpv
vo_gpu: attempt to avoid UBOs for dynamic variables
This makes the radeon driver shut up about frequently updating STATIC_DRAW UBOs (--opengl-debug), and also reduces the amount of synchronization necessary for vulkan uniform buffers. Also add some extra debugging/tracing code paths. I went with a flags-based approach in case we ever want to extend this.
This commit is contained in:
parent
ca85a153b4
commit
47af509e1f
|
@ -76,6 +76,7 @@ struct gl_shader_cache {
|
||||||
// Next binding point (texture unit, image unit, buffer binding, etc.)
|
// Next binding point (texture unit, image unit, buffer binding, etc.)
|
||||||
// In OpenGL these are separate for each input type
|
// In OpenGL these are separate for each input type
|
||||||
int next_binding[RA_VARTYPE_COUNT];
|
int next_binding[RA_VARTYPE_COUNT];
|
||||||
|
bool next_uniform_dynamic;
|
||||||
|
|
||||||
struct ra_renderpass_params params;
|
struct ra_renderpass_params params;
|
||||||
|
|
||||||
|
@ -135,6 +136,7 @@ void gl_sc_reset(struct gl_shader_cache *sc)
|
||||||
sc->pushc_size = 0;
|
sc->pushc_size = 0;
|
||||||
for (int i = 0; i < RA_VARTYPE_COUNT; i++)
|
for (int i = 0; i < RA_VARTYPE_COUNT; i++)
|
||||||
sc->next_binding[i] = 0;
|
sc->next_binding[i] = 0;
|
||||||
|
sc->next_uniform_dynamic = false;
|
||||||
sc->current_shader = NULL;
|
sc->current_shader = NULL;
|
||||||
sc->params = (struct ra_renderpass_params){0};
|
sc->params = (struct ra_renderpass_params){0};
|
||||||
sc->needs_reset = false;
|
sc->needs_reset = false;
|
||||||
|
@ -259,14 +261,22 @@ static int gl_sc_next_binding(struct gl_shader_cache *sc, enum ra_vartype type)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void gl_sc_uniform_dynamic(struct gl_shader_cache *sc)
|
||||||
|
{
|
||||||
|
sc->next_uniform_dynamic = true;
|
||||||
|
}
|
||||||
|
|
||||||
// Updates the metadata for the given sc_uniform. Assumes sc_uniform->input
|
// Updates the metadata for the given sc_uniform. Assumes sc_uniform->input
|
||||||
// and glsl_type/buffer_format are already set.
|
// and glsl_type/buffer_format are already set.
|
||||||
static void update_uniform_params(struct gl_shader_cache *sc, struct sc_uniform *u)
|
static void update_uniform_params(struct gl_shader_cache *sc, struct sc_uniform *u)
|
||||||
{
|
{
|
||||||
|
bool dynamic = sc->next_uniform_dynamic;
|
||||||
|
sc->next_uniform_dynamic = false;
|
||||||
|
|
||||||
// Try not using push constants for "large" values like matrices, since
|
// Try not using push constants for "large" values like matrices, since
|
||||||
// this is likely to both exceed the VGPR budget as well as the pushc size
|
// this is likely to both exceed the VGPR budget as well as the pushc size
|
||||||
// budget
|
// budget
|
||||||
bool try_pushc = u->input.dim_m == 1;
|
bool try_pushc = u->input.dim_m == 1 || dynamic;
|
||||||
|
|
||||||
// Attempt using push constants first
|
// Attempt using push constants first
|
||||||
if (try_pushc && sc->ra->glsl_vulkan && sc->ra->max_pushc_size) {
|
if (try_pushc && sc->ra->glsl_vulkan && sc->ra->max_pushc_size) {
|
||||||
|
@ -287,7 +297,10 @@ static void update_uniform_params(struct gl_shader_cache *sc, struct sc_uniform
|
||||||
// to explicit offsets on UBO entries. In theory we could leave away
|
// to explicit offsets on UBO entries. In theory we could leave away
|
||||||
// the offsets and support UBOs for older GL as well, but this is a nice
|
// the offsets and support UBOs for older GL as well, but this is a nice
|
||||||
// safety net for driver bugs (and also rules out potentially buggy drivers)
|
// safety net for driver bugs (and also rules out potentially buggy drivers)
|
||||||
if (sc->ra->glsl_version >= 440 && (sc->ra->caps & RA_CAP_BUF_RO)) {
|
// Also avoid UBOs for highly dynamic stuff since that requires synchronizing
|
||||||
|
// the UBO writes every frame
|
||||||
|
bool try_ubo = !(sc->ra->caps & RA_CAP_GLOBAL_UNIFORM) || !dynamic;
|
||||||
|
if (try_ubo && sc->ra->glsl_version >= 440 && (sc->ra->caps & RA_CAP_BUF_RO)) {
|
||||||
u->type = SC_UNIFORM_TYPE_UBO;
|
u->type = SC_UNIFORM_TYPE_UBO;
|
||||||
u->layout = sc->ra->fns->uniform_layout(&u->input);
|
u->layout = sc->ra->fns->uniform_layout(&u->input);
|
||||||
u->offset = MP_ALIGN_UP(sc->ubo_size, u->layout.align);
|
u->offset = MP_ALIGN_UP(sc->ubo_size, u->layout.align);
|
||||||
|
@ -514,6 +527,13 @@ static void update_uniform(struct gl_shader_cache *sc, struct sc_entry *e,
|
||||||
un->v = u->v;
|
un->v = u->v;
|
||||||
un->set = true;
|
un->set = true;
|
||||||
|
|
||||||
|
static const char *desc[] = {
|
||||||
|
[SC_UNIFORM_TYPE_UBO] = "UBO",
|
||||||
|
[SC_UNIFORM_TYPE_PUSHC] = "PC",
|
||||||
|
[SC_UNIFORM_TYPE_GLOBAL] = "global",
|
||||||
|
};
|
||||||
|
MP_TRACE(sc, "Updating %s uniform '%s'\n", desc[u->type], u->input.name);
|
||||||
|
|
||||||
switch (u->type) {
|
switch (u->type) {
|
||||||
case SC_UNIFORM_TYPE_GLOBAL: {
|
case SC_UNIFORM_TYPE_GLOBAL: {
|
||||||
struct ra_renderpass_input_val value = {
|
struct ra_renderpass_input_val value = {
|
||||||
|
|
|
@ -25,6 +25,10 @@ void gl_sc_haddf(struct gl_shader_cache *sc, const char *textf, ...)
|
||||||
void gl_sc_hadd_bstr(struct gl_shader_cache *sc, struct bstr text);
|
void gl_sc_hadd_bstr(struct gl_shader_cache *sc, struct bstr text);
|
||||||
void gl_sc_paddf(struct gl_shader_cache *sc, const char *textf, ...)
|
void gl_sc_paddf(struct gl_shader_cache *sc, const char *textf, ...)
|
||||||
PRINTF_ATTRIBUTE(2, 3);
|
PRINTF_ATTRIBUTE(2, 3);
|
||||||
|
|
||||||
|
// A hint that the next data-type (i.e. non-binding) uniform is expected to
|
||||||
|
// change frequently. This refers to the _f, _i, _vecN etc. uniform types.
|
||||||
|
void gl_sc_uniform_dynamic(struct gl_shader_cache *sc);
|
||||||
void gl_sc_uniform_texture(struct gl_shader_cache *sc, char *name,
|
void gl_sc_uniform_texture(struct gl_shader_cache *sc, char *name,
|
||||||
struct ra_tex *tex);
|
struct ra_tex *tex);
|
||||||
void gl_sc_uniform_image2D_wo(struct gl_shader_cache *sc, const char *name,
|
void gl_sc_uniform_image2D_wo(struct gl_shader_cache *sc, const char *name,
|
||||||
|
|
|
@ -1500,7 +1500,9 @@ found:
|
||||||
static void load_shader(struct gl_video *p, struct bstr body)
|
static void load_shader(struct gl_video *p, struct bstr body)
|
||||||
{
|
{
|
||||||
gl_sc_hadd_bstr(p->sc, body);
|
gl_sc_hadd_bstr(p->sc, body);
|
||||||
|
gl_sc_uniform_dynamic(p->sc);
|
||||||
gl_sc_uniform_f(p->sc, "random", (double)av_lfg_get(&p->lfg) / UINT32_MAX);
|
gl_sc_uniform_f(p->sc, "random", (double)av_lfg_get(&p->lfg) / UINT32_MAX);
|
||||||
|
gl_sc_uniform_dynamic(p->sc);
|
||||||
gl_sc_uniform_i(p->sc, "frame", p->frames_uploaded);
|
gl_sc_uniform_i(p->sc, "frame", p->frames_uploaded);
|
||||||
gl_sc_uniform_vec2(p->sc, "input_size",
|
gl_sc_uniform_vec2(p->sc, "input_size",
|
||||||
(float[]){(p->src_rect.x1 - p->src_rect.x0) *
|
(float[]){(p->src_rect.x1 - p->src_rect.x0) *
|
||||||
|
@ -2579,6 +2581,7 @@ static void pass_dither(struct gl_video *p)
|
||||||
|
|
||||||
float matrix[2][2] = {{cos(r), -sin(r) },
|
float matrix[2][2] = {{cos(r), -sin(r) },
|
||||||
{sin(r) * m, cos(r) * m}};
|
{sin(r) * m, cos(r) * m}};
|
||||||
|
gl_sc_uniform_dynamic(p->sc);
|
||||||
gl_sc_uniform_mat2(p->sc, "dither_trafo", true, &matrix[0][0]);
|
gl_sc_uniform_mat2(p->sc, "dither_trafo", true, &matrix[0][0]);
|
||||||
|
|
||||||
GLSL(dither_pos = dither_trafo * dither_pos;)
|
GLSL(dither_pos = dither_trafo * dither_pos;)
|
||||||
|
@ -2957,11 +2960,13 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t,
|
||||||
|
|
||||||
// Blend the frames together
|
// Blend the frames together
|
||||||
if (oversample || linear) {
|
if (oversample || linear) {
|
||||||
|
gl_sc_uniform_dynamic(p->sc);
|
||||||
gl_sc_uniform_f(p->sc, "inter_coeff", mix);
|
gl_sc_uniform_f(p->sc, "inter_coeff", mix);
|
||||||
GLSL(color = mix(texture(texture0, texcoord0),
|
GLSL(color = mix(texture(texture0, texcoord0),
|
||||||
texture(texture1, texcoord1),
|
texture(texture1, texcoord1),
|
||||||
inter_coeff);)
|
inter_coeff);)
|
||||||
} else {
|
} else {
|
||||||
|
gl_sc_uniform_dynamic(p->sc);
|
||||||
gl_sc_uniform_f(p->sc, "fcoord", mix);
|
gl_sc_uniform_f(p->sc, "fcoord", mix);
|
||||||
pass_sample_separated_gen(p->sc, tscale, 0, 0);
|
pass_sample_separated_gen(p->sc, tscale, 0, 0);
|
||||||
}
|
}
|
||||||
|
|
|
@ -768,6 +768,7 @@ static void prng_init(struct gl_shader_cache *sc, AVLFG *lfg)
|
||||||
// Initialize the PRNG by hashing the position + a random uniform
|
// Initialize the PRNG by hashing the position + a random uniform
|
||||||
GLSL(vec3 _m = vec3(HOOKED_pos, random) + vec3(1.0);)
|
GLSL(vec3 _m = vec3(HOOKED_pos, random) + vec3(1.0);)
|
||||||
GLSL(float h = permute(permute(permute(_m.x)+_m.y)+_m.z);)
|
GLSL(float h = permute(permute(permute(_m.x)+_m.y)+_m.z);)
|
||||||
|
gl_sc_uniform_dynamic(sc);
|
||||||
gl_sc_uniform_f(sc, "random", (double)av_lfg_get(lfg) / UINT32_MAX);
|
gl_sc_uniform_f(sc, "random", (double)av_lfg_get(lfg) / UINT32_MAX);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue