mirror of
https://github.com/mpv-player/mpv
synced 2025-01-19 22:01:10 +00:00
vo_gpu: vulkan: add support for push constants
Can in theory avoid updating the uniform buffer every frame
This commit is contained in:
parent
ed345ffc2f
commit
ca85a153b4
@ -26,6 +26,9 @@ struct ra {
|
||||
// time.
|
||||
size_t max_shmem;
|
||||
|
||||
// Maximum push constant size. Set by the RA backend at init time.
|
||||
size_t max_pushc_size;
|
||||
|
||||
// Set of supported texture formats. Must be added by RA backend at init time.
|
||||
// If there are equivalent formats with different caveats, the preferred
|
||||
// formats should have a lower index. (E.g. GLES3 should put rg8 before la.)
|
||||
@ -245,6 +248,7 @@ struct ra_renderpass_params {
|
||||
// Uniforms, including texture/sampler inputs.
|
||||
struct ra_renderpass_input *inputs;
|
||||
int num_inputs;
|
||||
size_t push_constants_size; // must be <= ra.max_pushc_size and a multiple of 4
|
||||
|
||||
// Highly implementation-specific byte array storing a compiled version
|
||||
// of the program. Can be used to speed up shader compilation. A backend
|
||||
@ -317,6 +321,7 @@ struct ra_renderpass_run_params {
|
||||
// even if they do not change.
|
||||
struct ra_renderpass_input_val *values;
|
||||
int num_values;
|
||||
void *push_constants; // must be set if params.push_constants_size > 0
|
||||
|
||||
// --- pass->params.type==RA_RENDERPASS_TYPE_RASTER only
|
||||
|
||||
@ -387,6 +392,10 @@ struct ra_fns {
|
||||
// but must be implemented if RA_CAP_BUF_RO is supported.
|
||||
struct ra_layout (*uniform_layout)(struct ra_renderpass_input *inp);
|
||||
|
||||
// Returns the layout requirements of a push constant element. Optional,
|
||||
// but must be implemented if ra.max_pushc_size > 0.
|
||||
struct ra_layout (*push_constant_layout)(struct ra_renderpass_input *inp);
|
||||
|
||||
// Clear the dst with the given color (rgba) and within the given scissor.
|
||||
// dst must have dst->params.render_dst==true. Content outside of the
|
||||
// scissor is preserved.
|
||||
|
@ -29,6 +29,7 @@ union uniform_val {
|
||||
enum sc_uniform_type {
|
||||
SC_UNIFORM_TYPE_GLOBAL = 0, // global uniform (RA_CAP_GLOBAL_UNIFORM)
|
||||
SC_UNIFORM_TYPE_UBO = 1, // uniform buffer (RA_CAP_BUF_RO)
|
||||
SC_UNIFORM_TYPE_PUSHC = 2, // push constant (ra.max_pushc_size)
|
||||
};
|
||||
|
||||
struct sc_uniform {
|
||||
@ -37,7 +38,7 @@ struct sc_uniform {
|
||||
const char *glsl_type;
|
||||
union uniform_val v;
|
||||
char *buffer_format;
|
||||
// for SC_UNIFORM_TYPE_UBO:
|
||||
// for SC_UNIFORM_TYPE_UBO/PUSHC:
|
||||
struct ra_layout layout;
|
||||
size_t offset; // byte offset within the buffer
|
||||
};
|
||||
@ -56,6 +57,7 @@ struct sc_entry {
|
||||
struct timer_pool *timer;
|
||||
struct ra_buf *ubo;
|
||||
int ubo_index; // for ra_renderpass_input_val.index
|
||||
void *pushc;
|
||||
};
|
||||
|
||||
struct gl_shader_cache {
|
||||
@ -87,6 +89,7 @@ struct gl_shader_cache {
|
||||
|
||||
int ubo_binding;
|
||||
size_t ubo_size;
|
||||
size_t pushc_size;
|
||||
|
||||
struct ra_renderpass_input_val *values;
|
||||
int num_values;
|
||||
@ -129,6 +132,7 @@ void gl_sc_reset(struct gl_shader_cache *sc)
|
||||
sc->num_uniforms = 0;
|
||||
sc->ubo_binding = 0;
|
||||
sc->ubo_size = 0;
|
||||
sc->pushc_size = 0;
|
||||
for (int i = 0; i < RA_VARTYPE_COUNT; i++)
|
||||
sc->next_binding[i] = 0;
|
||||
sc->current_shader = NULL;
|
||||
@ -255,25 +259,45 @@ static int gl_sc_next_binding(struct gl_shader_cache *sc, enum ra_vartype type)
|
||||
}
|
||||
}
|
||||
|
||||
// Updates the UBO metadata for the given sc_uniform. Assumes sc_uniform->input
|
||||
// is already set. Also updates sc_uniform->type.
|
||||
static void update_ubo_params(struct gl_shader_cache *sc, struct sc_uniform *u)
|
||||
// Updates the metadata for the given sc_uniform. Assumes sc_uniform->input
|
||||
// and glsl_type/buffer_format are already set.
|
||||
static void update_uniform_params(struct gl_shader_cache *sc, struct sc_uniform *u)
|
||||
{
|
||||
if (!(sc->ra->caps & RA_CAP_BUF_RO))
|
||||
return;
|
||||
// Try not using push constants for "large" values like matrices, since
|
||||
// this is likely to both exceed the VGPR budget as well as the pushc size
|
||||
// budget
|
||||
bool try_pushc = u->input.dim_m == 1;
|
||||
|
||||
// Using UBOs with explicit layout(offset) like we do requires GLSL version
|
||||
// 440 or higher. In theory the UBO code can also use older versions, but
|
||||
// just try and avoid potential headaches. This also ensures they're only
|
||||
// used on drivers that are probably modern enough to actually support them
|
||||
// correctly.
|
||||
if (sc->ra->glsl_version < 440)
|
||||
return;
|
||||
// Attempt using push constants first
|
||||
if (try_pushc && sc->ra->glsl_vulkan && sc->ra->max_pushc_size) {
|
||||
struct ra_layout layout = sc->ra->fns->push_constant_layout(&u->input);
|
||||
size_t offset = MP_ALIGN_UP(sc->pushc_size, layout.align);
|
||||
// Push constants have limited size, so make sure we don't exceed this
|
||||
size_t new_size = offset + layout.size;
|
||||
if (new_size <= sc->ra->max_pushc_size) {
|
||||
u->type = SC_UNIFORM_TYPE_PUSHC;
|
||||
u->layout = layout;
|
||||
u->offset = offset;
|
||||
sc->pushc_size = new_size;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
u->type = SC_UNIFORM_TYPE_UBO;
|
||||
u->layout = sc->ra->fns->uniform_layout(&u->input);
|
||||
u->offset = MP_ALIGN_UP(sc->ubo_size, u->layout.align);
|
||||
sc->ubo_size = u->offset + u->layout.size;
|
||||
// Attempt using uniform buffer next. The GLSL version 440 check is due
|
||||
// to explicit offsets on UBO entries. In theory we could leave away
|
||||
// the offsets and support UBOs for older GL as well, but this is a nice
|
||||
// safety net for driver bugs (and also rules out potentially buggy drivers)
|
||||
if (sc->ra->glsl_version >= 440 && (sc->ra->caps & RA_CAP_BUF_RO)) {
|
||||
u->type = SC_UNIFORM_TYPE_UBO;
|
||||
u->layout = sc->ra->fns->uniform_layout(&u->input);
|
||||
u->offset = MP_ALIGN_UP(sc->ubo_size, u->layout.align);
|
||||
sc->ubo_size = u->offset + u->layout.size;
|
||||
return;
|
||||
}
|
||||
|
||||
// If all else fails, use global uniforms
|
||||
assert(sc->ra->caps & RA_CAP_GLOBAL_UNIFORM);
|
||||
u->type = SC_UNIFORM_TYPE_GLOBAL;
|
||||
}
|
||||
|
||||
void gl_sc_uniform_texture(struct gl_shader_cache *sc, char *name,
|
||||
@ -334,7 +358,7 @@ void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, float f)
|
||||
struct sc_uniform *u = find_uniform(sc, name);
|
||||
u->input.type = RA_VARTYPE_FLOAT;
|
||||
u->glsl_type = "float";
|
||||
update_ubo_params(sc, u);
|
||||
update_uniform_params(sc, u);
|
||||
u->v.f[0] = f;
|
||||
}
|
||||
|
||||
@ -343,7 +367,7 @@ void gl_sc_uniform_i(struct gl_shader_cache *sc, char *name, int i)
|
||||
struct sc_uniform *u = find_uniform(sc, name);
|
||||
u->input.type = RA_VARTYPE_INT;
|
||||
u->glsl_type = "int";
|
||||
update_ubo_params(sc, u);
|
||||
update_uniform_params(sc, u);
|
||||
u->v.i[0] = i;
|
||||
}
|
||||
|
||||
@ -353,7 +377,7 @@ void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, float f[2])
|
||||
u->input.type = RA_VARTYPE_FLOAT;
|
||||
u->input.dim_v = 2;
|
||||
u->glsl_type = "vec2";
|
||||
update_ubo_params(sc, u);
|
||||
update_uniform_params(sc, u);
|
||||
u->v.f[0] = f[0];
|
||||
u->v.f[1] = f[1];
|
||||
}
|
||||
@ -364,7 +388,7 @@ void gl_sc_uniform_vec3(struct gl_shader_cache *sc, char *name, float f[3])
|
||||
u->input.type = RA_VARTYPE_FLOAT;
|
||||
u->input.dim_v = 3;
|
||||
u->glsl_type = "vec3";
|
||||
update_ubo_params(sc, u);
|
||||
update_uniform_params(sc, u);
|
||||
u->v.f[0] = f[0];
|
||||
u->v.f[1] = f[1];
|
||||
u->v.f[2] = f[2];
|
||||
@ -383,7 +407,7 @@ void gl_sc_uniform_mat2(struct gl_shader_cache *sc, char *name,
|
||||
u->input.dim_v = 2;
|
||||
u->input.dim_m = 2;
|
||||
u->glsl_type = "mat2";
|
||||
update_ubo_params(sc, u);
|
||||
update_uniform_params(sc, u);
|
||||
for (int n = 0; n < 4; n++)
|
||||
u->v.f[n] = v[n];
|
||||
if (transpose)
|
||||
@ -405,7 +429,7 @@ void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name,
|
||||
u->input.dim_v = 3;
|
||||
u->input.dim_m = 3;
|
||||
u->glsl_type = "mat3";
|
||||
update_ubo_params(sc, u);
|
||||
update_uniform_params(sc, u);
|
||||
for (int n = 0; n < 9; n++)
|
||||
u->v.f[n] = v[n];
|
||||
if (transpose)
|
||||
@ -465,6 +489,20 @@ static void update_ubo(struct ra *ra, struct ra_buf *ubo, struct sc_uniform *u)
|
||||
}
|
||||
}
|
||||
|
||||
static void update_pushc(struct ra *ra, void *pushc, struct sc_uniform *u)
|
||||
{
|
||||
uintptr_t src = (uintptr_t) &u->v;
|
||||
uintptr_t dst = (uintptr_t) pushc + (ptrdiff_t) u->offset;
|
||||
struct ra_layout src_layout = ra_renderpass_input_layout(&u->input);
|
||||
struct ra_layout dst_layout = u->layout;
|
||||
|
||||
for (int i = 0; i < u->input.dim_m; i++) {
|
||||
memcpy((void *)dst, (void *)src, src_layout.stride);
|
||||
src += src_layout.stride;
|
||||
dst += dst_layout.stride;
|
||||
}
|
||||
}
|
||||
|
||||
static void update_uniform(struct gl_shader_cache *sc, struct sc_entry *e,
|
||||
struct sc_uniform *u, int n)
|
||||
{
|
||||
@ -489,6 +527,10 @@ static void update_uniform(struct gl_shader_cache *sc, struct sc_entry *e,
|
||||
assert(e->ubo);
|
||||
update_ubo(sc->ra, e->ubo, u);
|
||||
break;
|
||||
case SC_UNIFORM_TYPE_PUSHC:
|
||||
assert(e->pushc);
|
||||
update_pushc(sc->ra, e->pushc, u);
|
||||
break;
|
||||
default: abort();
|
||||
}
|
||||
}
|
||||
@ -571,6 +613,11 @@ static bool create_pass(struct gl_shader_cache *sc, struct sc_entry *entry)
|
||||
MP_TARRAY_APPEND(sc, params.inputs, params.num_inputs, ubo_input);
|
||||
}
|
||||
|
||||
if (sc->pushc_size) {
|
||||
params.push_constants_size = MP_ALIGN_UP(sc->pushc_size, 4);
|
||||
entry->pushc = talloc_zero_size(entry, params.push_constants_size);
|
||||
}
|
||||
|
||||
if (sc->ubo_size) {
|
||||
struct ra_buf_params ubo_params = {
|
||||
.type = RA_BUF_TYPE_UNIFORM,
|
||||
@ -623,8 +670,22 @@ static void add_uniforms(struct gl_shader_cache *sc, bstr *dst)
|
||||
struct sc_uniform *u = &sc->uniforms[n];
|
||||
if (u->type != SC_UNIFORM_TYPE_UBO)
|
||||
continue;
|
||||
ADD(dst, "layout(offset=%zu) %s %s;\n", u->offset,
|
||||
u->glsl_type, u->input.name);
|
||||
ADD(dst, "layout(offset=%zu) %s %s;\n", u->offset, u->glsl_type,
|
||||
u->input.name);
|
||||
}
|
||||
ADD(dst, "};\n");
|
||||
}
|
||||
|
||||
// Ditto for push constants
|
||||
if (sc->pushc_size > 0) {
|
||||
ADD(dst, "layout(push_constant) uniform PushC {\n");
|
||||
for (int n = 0; n < sc->num_uniforms; n++) {
|
||||
struct sc_uniform *u = &sc->uniforms[n];
|
||||
if (u->type != SC_UNIFORM_TYPE_PUSHC)
|
||||
continue;
|
||||
// push constants don't support explicit offsets
|
||||
ADD(dst, "/*offset=%zu*/ %s %s;\n", u->offset, u->glsl_type,
|
||||
u->input.name);
|
||||
}
|
||||
ADD(dst, "};\n");
|
||||
}
|
||||
@ -911,6 +972,7 @@ struct mp_pass_perf gl_sc_dispatch_draw(struct gl_shader_cache *sc,
|
||||
.pass = sc->current_shader->pass,
|
||||
.values = sc->values,
|
||||
.num_values = sc->num_values,
|
||||
.push_constants = sc->current_shader->pushc,
|
||||
.target = target,
|
||||
.vertex_data = ptr,
|
||||
.vertex_count = num,
|
||||
@ -942,6 +1004,7 @@ struct mp_pass_perf gl_sc_dispatch_compute(struct gl_shader_cache *sc,
|
||||
.pass = sc->current_shader->pass,
|
||||
.values = sc->values,
|
||||
.num_values = sc->num_values,
|
||||
.push_constants = sc->current_shader->pushc,
|
||||
.compute_groups = {w, h, d},
|
||||
};
|
||||
|
||||
|
@ -191,6 +191,7 @@ struct ra *ra_create_vk(struct mpvk_ctx *vk, struct mp_log *log)
|
||||
ra->glsl_version = vk->spirv->glsl_version;
|
||||
ra->glsl_vulkan = true;
|
||||
ra->max_shmem = vk->limits.maxComputeSharedMemorySize;
|
||||
ra->max_pushc_size = vk->limits.maxPushConstantsSize;
|
||||
|
||||
if (vk->pool->props.queueFlags & VK_QUEUE_COMPUTE_BIT)
|
||||
ra->caps |= RA_CAP_COMPUTE;
|
||||
@ -1079,6 +1080,12 @@ static struct ra_renderpass *vk_renderpass_create(struct ra *ra,
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
||||
.setLayoutCount = 1,
|
||||
.pSetLayouts = &pass_vk->dsLayout,
|
||||
.pushConstantRangeCount = params->push_constants_size ? 1 : 0,
|
||||
.pPushConstantRanges = &(VkPushConstantRange){
|
||||
.stageFlags = stageFlags[params->type],
|
||||
.offset = 0,
|
||||
.size = params->push_constants_size,
|
||||
},
|
||||
};
|
||||
|
||||
VK(vkCreatePipelineLayout(vk->dev, &linfo, MPVK_ALLOCATOR,
|
||||
@ -1416,6 +1423,13 @@ static void vk_renderpass_run(struct ra *ra,
|
||||
vkCmdBindDescriptorSets(cmd->buf, bindPoint[pass->params.type],
|
||||
pass_vk->pipeLayout, 0, 1, &ds, 0, NULL);
|
||||
|
||||
if (pass->params.push_constants_size) {
|
||||
vkCmdPushConstants(cmd->buf, pass_vk->pipeLayout,
|
||||
stageFlags[pass->params.type], 0,
|
||||
pass->params.push_constants_size,
|
||||
params->push_constants);
|
||||
}
|
||||
|
||||
switch (pass->params.type) {
|
||||
case RA_RENDERPASS_TYPE_COMPUTE:
|
||||
vkCmdDispatch(cmd->buf, params->compute_groups[0],
|
||||
@ -1664,6 +1678,7 @@ static struct ra_fns ra_fns_vk = {
|
||||
.clear = vk_clear,
|
||||
.blit = vk_blit,
|
||||
.uniform_layout = std140_layout,
|
||||
.push_constant_layout = std430_layout,
|
||||
.renderpass_create = vk_renderpass_create,
|
||||
.renderpass_destroy = vk_renderpass_destroy_lazy,
|
||||
.renderpass_run = vk_renderpass_run,
|
||||
|
Loading…
Reference in New Issue
Block a user