1
0
mirror of https://github.com/mpv-player/mpv synced 2025-01-19 22:01:10 +00:00

vo_gpu: vulkan: add support for push constants

Can in theory avoid updating the uniform buffer every frame
This commit is contained in:
Niklas Haas 2017-09-08 06:13:55 +02:00
parent ed345ffc2f
commit ca85a153b4
3 changed files with 112 additions and 25 deletions

View File

@ -26,6 +26,9 @@ struct ra {
// time.
size_t max_shmem;
// Maximum push constant size. Set by the RA backend at init time.
size_t max_pushc_size;
// Set of supported texture formats. Must be added by RA backend at init time.
// If there are equivalent formats with different caveats, the preferred
// formats should have a lower index. (E.g. GLES3 should put rg8 before la.)
@ -245,6 +248,7 @@ struct ra_renderpass_params {
// Uniforms, including texture/sampler inputs.
struct ra_renderpass_input *inputs;
int num_inputs;
size_t push_constants_size; // must be <= ra.max_pushc_size and a multiple of 4
// Highly implementation-specific byte array storing a compiled version
// of the program. Can be used to speed up shader compilation. A backend
@ -317,6 +321,7 @@ struct ra_renderpass_run_params {
// even if they do not change.
struct ra_renderpass_input_val *values;
int num_values;
void *push_constants; // must be set if params.push_constants_size > 0
// --- pass->params.type==RA_RENDERPASS_TYPE_RASTER only
@ -387,6 +392,10 @@ struct ra_fns {
// but must be implemented if RA_CAP_BUF_RO is supported.
struct ra_layout (*uniform_layout)(struct ra_renderpass_input *inp);
// Returns the layout requirements of a push constant element. Optional,
// but must be implemented if ra.max_pushc_size > 0.
struct ra_layout (*push_constant_layout)(struct ra_renderpass_input *inp);
// Clear the dst with the given color (rgba) and within the given scissor.
// dst must have dst->params.render_dst==true. Content outside of the
// scissor is preserved.

View File

@ -29,6 +29,7 @@ union uniform_val {
enum sc_uniform_type {
SC_UNIFORM_TYPE_GLOBAL = 0, // global uniform (RA_CAP_GLOBAL_UNIFORM)
SC_UNIFORM_TYPE_UBO = 1, // uniform buffer (RA_CAP_BUF_RO)
SC_UNIFORM_TYPE_PUSHC = 2, // push constant (ra.max_pushc_size)
};
struct sc_uniform {
@ -37,7 +38,7 @@ struct sc_uniform {
const char *glsl_type;
union uniform_val v;
char *buffer_format;
// for SC_UNIFORM_TYPE_UBO:
// for SC_UNIFORM_TYPE_UBO/PUSHC:
struct ra_layout layout;
size_t offset; // byte offset within the buffer
};
@ -56,6 +57,7 @@ struct sc_entry {
struct timer_pool *timer;
struct ra_buf *ubo;
int ubo_index; // for ra_renderpass_input_val.index
void *pushc;
};
struct gl_shader_cache {
@ -87,6 +89,7 @@ struct gl_shader_cache {
int ubo_binding;
size_t ubo_size;
size_t pushc_size;
struct ra_renderpass_input_val *values;
int num_values;
@ -129,6 +132,7 @@ void gl_sc_reset(struct gl_shader_cache *sc)
sc->num_uniforms = 0;
sc->ubo_binding = 0;
sc->ubo_size = 0;
sc->pushc_size = 0;
for (int i = 0; i < RA_VARTYPE_COUNT; i++)
sc->next_binding[i] = 0;
sc->current_shader = NULL;
@ -255,25 +259,45 @@ static int gl_sc_next_binding(struct gl_shader_cache *sc, enum ra_vartype type)
}
}
// Updates the UBO metadata for the given sc_uniform. Assumes sc_uniform->input
// is already set. Also updates sc_uniform->type.
static void update_ubo_params(struct gl_shader_cache *sc, struct sc_uniform *u)
// Updates the metadata for the given sc_uniform. Assumes sc_uniform->input
// and glsl_type/buffer_format are already set.
static void update_uniform_params(struct gl_shader_cache *sc, struct sc_uniform *u)
{
if (!(sc->ra->caps & RA_CAP_BUF_RO))
return;
// Try not using push constants for "large" values like matrices, since
// this is likely to both exceed the VGPR budget as well as the pushc size
// budget
bool try_pushc = u->input.dim_m == 1;
// Using UBOs with explicit layout(offset) like we do requires GLSL version
// 440 or higher. In theory the UBO code can also use older versions, but
// just try and avoid potential headaches. This also ensures they're only
// used on drivers that are probably modern enough to actually support them
// correctly.
if (sc->ra->glsl_version < 440)
return;
// Attempt using push constants first
if (try_pushc && sc->ra->glsl_vulkan && sc->ra->max_pushc_size) {
struct ra_layout layout = sc->ra->fns->push_constant_layout(&u->input);
size_t offset = MP_ALIGN_UP(sc->pushc_size, layout.align);
// Push constants have limited size, so make sure we don't exceed this
size_t new_size = offset + layout.size;
if (new_size <= sc->ra->max_pushc_size) {
u->type = SC_UNIFORM_TYPE_PUSHC;
u->layout = layout;
u->offset = offset;
sc->pushc_size = new_size;
return;
}
}
u->type = SC_UNIFORM_TYPE_UBO;
u->layout = sc->ra->fns->uniform_layout(&u->input);
u->offset = MP_ALIGN_UP(sc->ubo_size, u->layout.align);
sc->ubo_size = u->offset + u->layout.size;
// Attempt using uniform buffer next. The GLSL version 440 check is due
// to explicit offsets on UBO entries. In theory we could leave away
// the offsets and support UBOs for older GL as well, but this is a nice
// safety net for driver bugs (and also rules out potentially buggy drivers)
if (sc->ra->glsl_version >= 440 && (sc->ra->caps & RA_CAP_BUF_RO)) {
u->type = SC_UNIFORM_TYPE_UBO;
u->layout = sc->ra->fns->uniform_layout(&u->input);
u->offset = MP_ALIGN_UP(sc->ubo_size, u->layout.align);
sc->ubo_size = u->offset + u->layout.size;
return;
}
// If all else fails, use global uniforms
assert(sc->ra->caps & RA_CAP_GLOBAL_UNIFORM);
u->type = SC_UNIFORM_TYPE_GLOBAL;
}
void gl_sc_uniform_texture(struct gl_shader_cache *sc, char *name,
@ -334,7 +358,7 @@ void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, float f)
struct sc_uniform *u = find_uniform(sc, name);
u->input.type = RA_VARTYPE_FLOAT;
u->glsl_type = "float";
update_ubo_params(sc, u);
update_uniform_params(sc, u);
u->v.f[0] = f;
}
@ -343,7 +367,7 @@ void gl_sc_uniform_i(struct gl_shader_cache *sc, char *name, int i)
struct sc_uniform *u = find_uniform(sc, name);
u->input.type = RA_VARTYPE_INT;
u->glsl_type = "int";
update_ubo_params(sc, u);
update_uniform_params(sc, u);
u->v.i[0] = i;
}
@ -353,7 +377,7 @@ void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, float f[2])
u->input.type = RA_VARTYPE_FLOAT;
u->input.dim_v = 2;
u->glsl_type = "vec2";
update_ubo_params(sc, u);
update_uniform_params(sc, u);
u->v.f[0] = f[0];
u->v.f[1] = f[1];
}
@ -364,7 +388,7 @@ void gl_sc_uniform_vec3(struct gl_shader_cache *sc, char *name, float f[3])
u->input.type = RA_VARTYPE_FLOAT;
u->input.dim_v = 3;
u->glsl_type = "vec3";
update_ubo_params(sc, u);
update_uniform_params(sc, u);
u->v.f[0] = f[0];
u->v.f[1] = f[1];
u->v.f[2] = f[2];
@ -383,7 +407,7 @@ void gl_sc_uniform_mat2(struct gl_shader_cache *sc, char *name,
u->input.dim_v = 2;
u->input.dim_m = 2;
u->glsl_type = "mat2";
update_ubo_params(sc, u);
update_uniform_params(sc, u);
for (int n = 0; n < 4; n++)
u->v.f[n] = v[n];
if (transpose)
@ -405,7 +429,7 @@ void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name,
u->input.dim_v = 3;
u->input.dim_m = 3;
u->glsl_type = "mat3";
update_ubo_params(sc, u);
update_uniform_params(sc, u);
for (int n = 0; n < 9; n++)
u->v.f[n] = v[n];
if (transpose)
@ -465,6 +489,20 @@ static void update_ubo(struct ra *ra, struct ra_buf *ubo, struct sc_uniform *u)
}
}
static void update_pushc(struct ra *ra, void *pushc, struct sc_uniform *u)
{
uintptr_t src = (uintptr_t) &u->v;
uintptr_t dst = (uintptr_t) pushc + (ptrdiff_t) u->offset;
struct ra_layout src_layout = ra_renderpass_input_layout(&u->input);
struct ra_layout dst_layout = u->layout;
for (int i = 0; i < u->input.dim_m; i++) {
memcpy((void *)dst, (void *)src, src_layout.stride);
src += src_layout.stride;
dst += dst_layout.stride;
}
}
static void update_uniform(struct gl_shader_cache *sc, struct sc_entry *e,
struct sc_uniform *u, int n)
{
@ -489,6 +527,10 @@ static void update_uniform(struct gl_shader_cache *sc, struct sc_entry *e,
assert(e->ubo);
update_ubo(sc->ra, e->ubo, u);
break;
case SC_UNIFORM_TYPE_PUSHC:
assert(e->pushc);
update_pushc(sc->ra, e->pushc, u);
break;
default: abort();
}
}
@ -571,6 +613,11 @@ static bool create_pass(struct gl_shader_cache *sc, struct sc_entry *entry)
MP_TARRAY_APPEND(sc, params.inputs, params.num_inputs, ubo_input);
}
if (sc->pushc_size) {
params.push_constants_size = MP_ALIGN_UP(sc->pushc_size, 4);
entry->pushc = talloc_zero_size(entry, params.push_constants_size);
}
if (sc->ubo_size) {
struct ra_buf_params ubo_params = {
.type = RA_BUF_TYPE_UNIFORM,
@ -623,8 +670,22 @@ static void add_uniforms(struct gl_shader_cache *sc, bstr *dst)
struct sc_uniform *u = &sc->uniforms[n];
if (u->type != SC_UNIFORM_TYPE_UBO)
continue;
ADD(dst, "layout(offset=%zu) %s %s;\n", u->offset,
u->glsl_type, u->input.name);
ADD(dst, "layout(offset=%zu) %s %s;\n", u->offset, u->glsl_type,
u->input.name);
}
ADD(dst, "};\n");
}
// Ditto for push constants
if (sc->pushc_size > 0) {
ADD(dst, "layout(push_constant) uniform PushC {\n");
for (int n = 0; n < sc->num_uniforms; n++) {
struct sc_uniform *u = &sc->uniforms[n];
if (u->type != SC_UNIFORM_TYPE_PUSHC)
continue;
// push constants don't support explicit offsets
ADD(dst, "/*offset=%zu*/ %s %s;\n", u->offset, u->glsl_type,
u->input.name);
}
ADD(dst, "};\n");
}
@ -911,6 +972,7 @@ struct mp_pass_perf gl_sc_dispatch_draw(struct gl_shader_cache *sc,
.pass = sc->current_shader->pass,
.values = sc->values,
.num_values = sc->num_values,
.push_constants = sc->current_shader->pushc,
.target = target,
.vertex_data = ptr,
.vertex_count = num,
@ -942,6 +1004,7 @@ struct mp_pass_perf gl_sc_dispatch_compute(struct gl_shader_cache *sc,
.pass = sc->current_shader->pass,
.values = sc->values,
.num_values = sc->num_values,
.push_constants = sc->current_shader->pushc,
.compute_groups = {w, h, d},
};

View File

@ -191,6 +191,7 @@ struct ra *ra_create_vk(struct mpvk_ctx *vk, struct mp_log *log)
ra->glsl_version = vk->spirv->glsl_version;
ra->glsl_vulkan = true;
ra->max_shmem = vk->limits.maxComputeSharedMemorySize;
ra->max_pushc_size = vk->limits.maxPushConstantsSize;
if (vk->pool->props.queueFlags & VK_QUEUE_COMPUTE_BIT)
ra->caps |= RA_CAP_COMPUTE;
@ -1079,6 +1080,12 @@ static struct ra_renderpass *vk_renderpass_create(struct ra *ra,
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 1,
.pSetLayouts = &pass_vk->dsLayout,
.pushConstantRangeCount = params->push_constants_size ? 1 : 0,
.pPushConstantRanges = &(VkPushConstantRange){
.stageFlags = stageFlags[params->type],
.offset = 0,
.size = params->push_constants_size,
},
};
VK(vkCreatePipelineLayout(vk->dev, &linfo, MPVK_ALLOCATOR,
@ -1416,6 +1423,13 @@ static void vk_renderpass_run(struct ra *ra,
vkCmdBindDescriptorSets(cmd->buf, bindPoint[pass->params.type],
pass_vk->pipeLayout, 0, 1, &ds, 0, NULL);
if (pass->params.push_constants_size) {
vkCmdPushConstants(cmd->buf, pass_vk->pipeLayout,
stageFlags[pass->params.type], 0,
pass->params.push_constants_size,
params->push_constants);
}
switch (pass->params.type) {
case RA_RENDERPASS_TYPE_COMPUTE:
vkCmdDispatch(cmd->buf, params->compute_groups[0],
@ -1664,6 +1678,7 @@ static struct ra_fns ra_fns_vk = {
.clear = vk_clear,
.blit = vk_blit,
.uniform_layout = std140_layout,
.push_constant_layout = std430_layout,
.renderpass_create = vk_renderpass_create,
.renderpass_destroy = vk_renderpass_destroy_lazy,
.renderpass_run = vk_renderpass_run,