mirror of
https://github.com/mpv-player/mpv
synced 2025-01-04 14:12:10 +00:00
vo_gpu: aggressively prefer async compute
On AMD devices, we only get one graphics pipe but several compute pipes which can (in theory) run independently. As such, we should prefer compute shaders over fragment shaders in scenarios where we expect them to be better for parallelism. This is amusingly trivial to do, and actually improves performance even in a single-queue scenario.
This commit is contained in:
parent
bded247fb5
commit
dcda8bd36a
@ -53,6 +53,7 @@ enum {
|
|||||||
RA_CAP_GLOBAL_UNIFORM = 1 << 8, // supports using "naked" uniforms (not UBO)
|
RA_CAP_GLOBAL_UNIFORM = 1 << 8, // supports using "naked" uniforms (not UBO)
|
||||||
RA_CAP_GATHER = 1 << 9, // supports textureGather in GLSL
|
RA_CAP_GATHER = 1 << 9, // supports textureGather in GLSL
|
||||||
RA_CAP_FRAGCOORD = 1 << 10, // supports reading from gl_FragCoord
|
RA_CAP_FRAGCOORD = 1 << 10, // supports reading from gl_FragCoord
|
||||||
|
RA_CAP_PARALLEL_COMPUTE = 1 << 11, // supports parallel compute shaders
|
||||||
};
|
};
|
||||||
|
|
||||||
enum ra_ctype {
|
enum ra_ctype {
|
||||||
|
@ -1237,6 +1237,11 @@ static void finish_pass_tex(struct gl_video *p, struct ra_tex **dst_tex,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If RA_CAP_PARALLEL_COMPUTE is set, try to prefer compute shaders
|
||||||
|
// over fragment shaders wherever possible.
|
||||||
|
if (!p->pass_compute.active && (p->ra->caps & RA_CAP_PARALLEL_COMPUTE))
|
||||||
|
pass_is_compute(p, 16, 16);
|
||||||
|
|
||||||
if (p->pass_compute.active) {
|
if (p->pass_compute.active) {
|
||||||
gl_sc_uniform_image2D_wo(p->sc, "out_image", *dst_tex);
|
gl_sc_uniform_image2D_wo(p->sc, "out_image", *dst_tex);
|
||||||
if (!p->pass_compute.directly_writes)
|
if (!p->pass_compute.directly_writes)
|
||||||
|
@ -208,8 +208,13 @@ struct ra *ra_create_vk(struct mpvk_ctx *vk, struct mp_log *log)
|
|||||||
ra->max_shmem = vk->limits.maxComputeSharedMemorySize;
|
ra->max_shmem = vk->limits.maxComputeSharedMemorySize;
|
||||||
ra->max_pushc_size = vk->limits.maxPushConstantsSize;
|
ra->max_pushc_size = vk->limits.maxPushConstantsSize;
|
||||||
|
|
||||||
if (vk->pool_compute)
|
if (vk->pool_compute) {
|
||||||
ra->caps |= RA_CAP_COMPUTE;
|
ra->caps |= RA_CAP_COMPUTE;
|
||||||
|
// If we have more compute queues than graphics queues, we probably
|
||||||
|
// want to be using them. (This seems mostly relevant for AMD)
|
||||||
|
if (vk->pool_compute->num_queues > vk->pool_graphics->num_queues)
|
||||||
|
ra->caps |= RA_CAP_PARALLEL_COMPUTE;
|
||||||
|
}
|
||||||
|
|
||||||
if (!vk_setup_formats(ra))
|
if (!vk_setup_formats(ra))
|
||||||
goto error;
|
goto error;
|
||||||
|
Loading…
Reference in New Issue
Block a user