diff --git a/video/out/gpu/ra.h b/video/out/gpu/ra.h index ffb010960a..08ccdaee70 100644 --- a/video/out/gpu/ra.h +++ b/video/out/gpu/ra.h @@ -53,6 +53,7 @@ enum { RA_CAP_GLOBAL_UNIFORM = 1 << 8, // supports using "naked" uniforms (not UBO) RA_CAP_GATHER = 1 << 9, // supports textureGather in GLSL RA_CAP_FRAGCOORD = 1 << 10, // supports reading from gl_FragCoord + RA_CAP_PARALLEL_COMPUTE = 1 << 11, // supports parallel compute shaders }; enum ra_ctype { diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c index 3f0959931d..1b50166dc4 100644 --- a/video/out/gpu/video.c +++ b/video/out/gpu/video.c @@ -1237,6 +1237,11 @@ static void finish_pass_tex(struct gl_video *p, struct ra_tex **dst_tex, return; } + // If RA_CAP_PARALLEL_COMPUTE is set, try to prefer compute shaders + // over fragment shaders wherever possible. + if (!p->pass_compute.active && (p->ra->caps & RA_CAP_PARALLEL_COMPUTE)) + pass_is_compute(p, 16, 16); + if (p->pass_compute.active) { gl_sc_uniform_image2D_wo(p->sc, "out_image", *dst_tex); if (!p->pass_compute.directly_writes) diff --git a/video/out/vulkan/ra_vk.c b/video/out/vulkan/ra_vk.c index 905fc89596..f0353629e6 100644 --- a/video/out/vulkan/ra_vk.c +++ b/video/out/vulkan/ra_vk.c @@ -208,8 +208,13 @@ struct ra *ra_create_vk(struct mpvk_ctx *vk, struct mp_log *log) ra->max_shmem = vk->limits.maxComputeSharedMemorySize; ra->max_pushc_size = vk->limits.maxPushConstantsSize; - if (vk->pool_compute) + if (vk->pool_compute) { ra->caps |= RA_CAP_COMPUTE; + // If we have more compute queues than graphics queues, we probably + // want to be using them. (This seems mostly relevant for AMD) + if (vk->pool_compute->num_queues > vk->pool_graphics->num_queues) + ra->caps |= RA_CAP_PARALLEL_COMPUTE; + } if (!vk_setup_formats(ra)) goto error;