From f705e9ea0567c8dcf800ae1ee0647fca157c6199 Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 10 Nov 2021 03:50:54 +0100 Subject: [PATCH] lavfi/vulkan: refactor, fix and fully implement multiple queues --- libavfilter/vf_avgblur_vulkan.c | 27 +-- libavfilter/vf_chromaber_vulkan.c | 25 ++- libavfilter/vf_overlay_vulkan.c | 30 +-- libavfilter/vf_scale_vulkan.c | 27 +-- libavfilter/vulkan.c | 340 ++++++++++++++++++------------ libavfilter/vulkan.h | 103 +++++---- 6 files changed, 336 insertions(+), 216 deletions(-) diff --git a/libavfilter/vf_avgblur_vulkan.c b/libavfilter/vf_avgblur_vulkan.c index bf02dab1db..4795e482a9 100644 --- a/libavfilter/vf_avgblur_vulkan.c +++ b/libavfilter/vf_avgblur_vulkan.c @@ -24,12 +24,13 @@ #define CGS 32 typedef struct AvgBlurVulkanContext { - VulkanFilterContext vkctx; + FFVulkanContext vkctx; int initialized; + FFVkQueueFamilyCtx qf; FFVkExecContext *exec; - VulkanPipeline *pl_hor; - VulkanPipeline *pl_ver; + FFVulkanPipeline *pl_hor; + FFVulkanPipeline *pl_ver; /* Shader updators, must be in the main filter struct */ VkDescriptorImageInfo input_images[3]; @@ -73,16 +74,14 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) FFSPIRVShader *shd; AvgBlurVulkanContext *s = ctx->priv; const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - VkSampler *sampler = ff_vk_init_sampler(ctx, 1, VK_FILTER_LINEAR); - VulkanDescriptorSetBinding desc_i[2] = { + FFVulkanDescriptorSetBinding desc_i[2] = { { .name = "input_img", .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .dimensions = 2, .elems = planes, .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .samplers = DUP_SAMPLER_ARRAY4(*sampler), }, { .name = "output_img", @@ -95,17 +94,17 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) }, }; - if (!sampler) - return AVERROR_EXTERNAL; + ff_vk_qf_init(ctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0); - s->vkctx.queue_family_idx = s->vkctx.hwctx->queue_family_comp_index; - s->vkctx.queue_count = s->vkctx.hwctx->nb_comp_queues; + desc_i[0].sampler = ff_vk_init_sampler(ctx, 1, VK_FILTER_LINEAR); + if (!desc_i[0].sampler) + return AVERROR_EXTERNAL; { /* Create shader for the horizontal pass */ desc_i[0].updater = s->input_images; desc_i[1].updater = s->tmp_images; - s->pl_hor = ff_vk_create_pipeline(ctx); + s->pl_hor = ff_vk_create_pipeline(ctx, &s->qf); if (!s->pl_hor) return AVERROR(ENOMEM); @@ -148,7 +147,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) desc_i[0].updater = s->tmp_images; desc_i[1].updater = s->output_images; - s->pl_ver = ff_vk_create_pipeline(ctx); + s->pl_ver = ff_vk_create_pipeline(ctx, &s->qf); if (!s->pl_ver) return AVERROR(ENOMEM); @@ -188,7 +187,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) } /* Execution context */ - RET(ff_vk_create_exec_ctx(ctx, &s->exec)); + RET(ff_vk_create_exec_ctx(ctx, &s->exec, &s->qf)); s->initialized = 1; @@ -311,6 +310,8 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *tmp_f if (err) return err; + ff_vk_qf_rotate(&s->qf); + return err; fail: diff --git a/libavfilter/vf_chromaber_vulkan.c b/libavfilter/vf_chromaber_vulkan.c index 25ef20fe73..83ab72f716 100644 --- a/libavfilter/vf_chromaber_vulkan.c +++ b/libavfilter/vf_chromaber_vulkan.c @@ -24,11 +24,12 @@ #define CGROUPS (int [3]){ 32, 32, 1 } typedef struct ChromaticAberrationVulkanContext { - VulkanFilterContext vkctx; + FFVulkanContext vkctx; int initialized; + FFVkQueueFamilyCtx qf; FFVkExecContext *exec; - VulkanPipeline *pl; + FFVulkanPipeline *pl; /* Shader updators, must be in the main filter struct */ VkDescriptorImageInfo input_images[3]; @@ -67,17 +68,18 @@ static const char distort_chroma_kernel[] = { static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) { int err; + FFVkSampler *sampler; ChromaticAberrationVulkanContext *s = ctx->priv; + const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + + ff_vk_qf_init(ctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0); /* Create a sampler */ - VkSampler *sampler = ff_vk_init_sampler(ctx, 0, VK_FILTER_LINEAR); + sampler = ff_vk_init_sampler(ctx, 0, VK_FILTER_LINEAR); if (!sampler) return AVERROR_EXTERNAL; - s->vkctx.queue_family_idx = s->vkctx.hwctx->queue_family_comp_index; - s->vkctx.queue_count = s->vkctx.hwctx->nb_comp_queues; - - s->pl = ff_vk_create_pipeline(ctx); + s->pl = ff_vk_create_pipeline(ctx, &s->qf); if (!s->pl) return AVERROR(ENOMEM); @@ -86,8 +88,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) s->opts.dist[1] = (s->opts.dist[1] / 100.0f) + 1.0f; { /* Create the shader */ - const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - VulkanDescriptorSetBinding desc_i[2] = { + FFVulkanDescriptorSetBinding desc_i[2] = { { .name = "input_img", .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, @@ -95,7 +96,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) .elems = planes, .stages = VK_SHADER_STAGE_COMPUTE_BIT, .updater = s->input_images, - .samplers = DUP_SAMPLER_ARRAY4(*sampler), + .sampler = sampler, }, { .name = "output_img", @@ -158,7 +159,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) RET(ff_vk_init_compute_pipeline(ctx, s->pl)); /* Execution context */ - RET(ff_vk_create_exec_ctx(ctx, &s->exec)); + RET(ff_vk_create_exec_ctx(ctx, &s->exec, &s->qf)); s->initialized = 1; @@ -256,6 +257,8 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f) if (err) return err; + ff_vk_qf_rotate(&s->qf); + return err; fail: diff --git a/libavfilter/vf_overlay_vulkan.c b/libavfilter/vf_overlay_vulkan.c index f08800af2c..b902ad83f5 100644 --- a/libavfilter/vf_overlay_vulkan.c +++ b/libavfilter/vf_overlay_vulkan.c @@ -25,11 +25,12 @@ #define CGROUPS (int [3]){ 32, 32, 1 } typedef struct OverlayVulkanContext { - VulkanFilterContext vkctx; + FFVulkanContext vkctx; int initialized; - VulkanPipeline *pl; + FFVkQueueFamilyCtx qf; FFVkExecContext *exec; + FFVulkanPipeline *pl; FFFrameSync fs; FFVkBuffer params_buf; @@ -79,23 +80,24 @@ static const char overlay_alpha[] = { static av_cold int init_filter(AVFilterContext *ctx) { int err; + FFVkSampler *sampler; OverlayVulkanContext *s = ctx->priv; - VkSampler *sampler = ff_vk_init_sampler(ctx, 1, VK_FILTER_NEAREST); + const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + + ff_vk_qf_init(ctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0); + + sampler = ff_vk_init_sampler(ctx, 1, VK_FILTER_NEAREST); if (!sampler) return AVERROR_EXTERNAL; - s->pl = ff_vk_create_pipeline(ctx); + s->pl = ff_vk_create_pipeline(ctx, &s->qf); if (!s->pl) return AVERROR(ENOMEM); - s->vkctx.queue_family_idx = s->vkctx.hwctx->queue_family_comp_index; - s->vkctx.queue_count = s->vkctx.hwctx->nb_comp_queues; - { /* Create the shader */ - const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); const int ialpha = av_pix_fmt_desc_get(s->vkctx.input_format)->flags & AV_PIX_FMT_FLAG_ALPHA; - VulkanDescriptorSetBinding desc_i[3] = { + FFVulkanDescriptorSetBinding desc_i[3] = { { .name = "main_img", .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, @@ -103,7 +105,7 @@ static av_cold int init_filter(AVFilterContext *ctx) .elems = planes, .stages = VK_SHADER_STAGE_COMPUTE_BIT, .updater = s->main_images, - .samplers = DUP_SAMPLER_ARRAY4(*sampler), + .sampler = sampler, }, { .name = "overlay_img", @@ -112,7 +114,7 @@ static av_cold int init_filter(AVFilterContext *ctx) .elems = planes, .stages = VK_SHADER_STAGE_COMPUTE_BIT, .updater = s->overlay_images, - .samplers = DUP_SAMPLER_ARRAY4(*sampler), + .sampler = sampler, }, { .name = "output_img", @@ -126,7 +128,7 @@ static av_cold int init_filter(AVFilterContext *ctx) }, }; - VulkanDescriptorSetBinding desc_b = { + FFVulkanDescriptorSetBinding desc_b = { .name = "params", .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .mem_quali = "readonly", @@ -215,7 +217,7 @@ static av_cold int init_filter(AVFilterContext *ctx) } /* Execution context */ - RET(ff_vk_create_exec_ctx(ctx, &s->exec)); + RET(ff_vk_create_exec_ctx(ctx, &s->exec, &s->qf)); s->initialized = 1; @@ -339,6 +341,8 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, if (err) return err; + ff_vk_qf_rotate(&s->qf); + return err; fail: diff --git a/libavfilter/vf_scale_vulkan.c b/libavfilter/vf_scale_vulkan.c index 680e9c2f4f..3a2251f8df 100644 --- a/libavfilter/vf_scale_vulkan.c +++ b/libavfilter/vf_scale_vulkan.c @@ -33,11 +33,12 @@ enum ScalerFunc { }; typedef struct ScaleVulkanContext { - VulkanFilterContext vkctx; + FFVulkanContext vkctx; int initialized; + FFVkQueueFamilyCtx qf; FFVkExecContext *exec; - VulkanPipeline *pl; + FFVulkanPipeline *pl; FFVkBuffer params_buf; /* Shader updators, must be in the main filter struct */ @@ -107,7 +108,7 @@ static const char write_444[] = { static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) { int err; - VkSampler *sampler; + FFVkSampler *sampler; VkFilter sampler_mode; ScaleVulkanContext *s = ctx->priv; @@ -115,9 +116,9 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) int crop_y = in->crop_top; int crop_w = in->width - (in->crop_left + in->crop_right); int crop_h = in->height - (in->crop_top + in->crop_bottom); + int in_planes = av_pix_fmt_count_planes(s->vkctx.input_format); - s->vkctx.queue_family_idx = s->vkctx.hwctx->queue_family_comp_index; - s->vkctx.queue_count = s->vkctx.hwctx->nb_comp_queues; + ff_vk_qf_init(ctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0); switch (s->scaler) { case F_NEAREST: @@ -133,20 +134,20 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) if (!sampler) return AVERROR_EXTERNAL; - s->pl = ff_vk_create_pipeline(ctx); + s->pl = ff_vk_create_pipeline(ctx, &s->qf); if (!s->pl) return AVERROR(ENOMEM); { /* Create the shader */ - VulkanDescriptorSetBinding desc_i[2] = { + FFVulkanDescriptorSetBinding desc_i[2] = { { .name = "input_img", .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .dimensions = 2, - .elems = av_pix_fmt_count_planes(s->vkctx.input_format), + .elems = in_planes, .stages = VK_SHADER_STAGE_COMPUTE_BIT, .updater = s->input_images, - .samplers = DUP_SAMPLER_ARRAY4(*sampler), + .sampler = sampler, }, { .name = "output_img", @@ -160,7 +161,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) }, }; - VulkanDescriptorSetBinding desc_b = { + FFVulkanDescriptorSetBinding desc_b = { .name = "params", .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .mem_quali = "readonly", @@ -178,7 +179,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) ff_vk_set_compute_shader_sizes(ctx, shd, CGROUPS); RET(ff_vk_add_descriptor_set(ctx, s->pl, shd, desc_i, 2, 0)); /* set 0 */ - RET(ff_vk_add_descriptor_set(ctx, s->pl, shd, &desc_b, 1, 0)); /* set 0 */ + RET(ff_vk_add_descriptor_set(ctx, s->pl, shd, &desc_b, 1, 0)); /* set 1 */ GLSLD( scale_bilinear ); @@ -280,7 +281,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) } /* Execution context */ - RET(ff_vk_create_exec_ctx(ctx, &s->exec)); + RET(ff_vk_create_exec_ctx(ctx, &s->exec, &s->qf)); s->initialized = 1; @@ -384,6 +385,8 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f) if (err) return err; + ff_vk_qf_rotate(&s->qf); + return err; fail: diff --git a/libavfilter/vulkan.c b/libavfilter/vulkan.c index c9a2ae4593..a30699963e 100644 --- a/libavfilter/vulkan.c +++ b/libavfilter/vulkan.c @@ -20,6 +20,7 @@ #include "vulkan.h" #include "glslang.h" +#include "libavutil/avassert.h" #include "libavutil/vulkan_loader.h" /* Generic macro for creating contexts which need to keep their addresses @@ -88,15 +89,54 @@ const char *ff_vk_ret2str(VkResult res) #undef CASE } +void ff_vk_qf_init(AVFilterContext *avctx, FFVkQueueFamilyCtx *qf, + VkQueueFlagBits dev_family, int queue_limit) +{ + FFVulkanContext *s = avctx->priv; + + if (!queue_limit) + queue_limit = INT32_MAX; + + switch (dev_family) { + case VK_QUEUE_GRAPHICS_BIT: + qf->queue_family = s->hwctx->queue_family_index; + qf->nb_queues = FFMIN(s->hwctx->nb_graphics_queues, queue_limit); + return; + case VK_QUEUE_COMPUTE_BIT: + qf->queue_family = s->hwctx->queue_family_comp_index; + qf->nb_queues = FFMIN(s->hwctx->nb_comp_queues, queue_limit); + return; + case VK_QUEUE_TRANSFER_BIT: + qf->queue_family = s->hwctx->queue_family_tx_index; + qf->nb_queues = FFMIN(s->hwctx->nb_tx_queues, queue_limit); + return; + case VK_QUEUE_VIDEO_ENCODE_BIT_KHR: + qf->queue_family = s->hwctx->queue_family_encode_index; + qf->nb_queues = FFMIN(s->hwctx->nb_encode_queues, queue_limit); + return; + case VK_QUEUE_VIDEO_DECODE_BIT_KHR: + qf->queue_family = s->hwctx->queue_family_decode_index; + qf->nb_queues = FFMIN(s->hwctx->nb_decode_queues, queue_limit); + return; + default: + av_assert0(0); /* Should never happen */ + } + + return; +} + +void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf) +{ + qf->cur_queue = (qf->cur_queue + 1) % qf->nb_queues; +} + static int vk_alloc_mem(AVFilterContext *avctx, VkMemoryRequirements *req, VkMemoryPropertyFlagBits req_flags, void *alloc_extension, VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem) { VkResult ret; int index = -1; - VkPhysicalDeviceProperties props; - VkPhysicalDeviceMemoryProperties mprops; - VulkanFilterContext *s = avctx->priv; + FFVulkanContext *s = avctx->priv; FFVulkanFunctions *vk = &s->vkfn; VkMemoryAllocateInfo alloc_info = { @@ -104,24 +144,21 @@ static int vk_alloc_mem(AVFilterContext *avctx, VkMemoryRequirements *req, .pNext = alloc_extension, }; - vk->GetPhysicalDeviceProperties(s->hwctx->phys_dev, &props); - vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &mprops); - /* Align if we need to */ if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) - req->size = FFALIGN(req->size, props.limits.minMemoryMapAlignment); + req->size = FFALIGN(req->size, s->props.limits.minMemoryMapAlignment); alloc_info.allocationSize = req->size; /* The vulkan spec requires memory types to be sorted in the "optimal" * order, so the first matching type we find will be the best/fastest one */ - for (int i = 0; i < mprops.memoryTypeCount; i++) { + for (int i = 0; i < s->mprops.memoryTypeCount; i++) { /* The memory type must be supported by the requirements (bitfield) */ if (!(req->memoryTypeBits & (1 << i))) continue; /* The memory type flags must include our properties */ - if ((mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags) + if ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags) continue; /* Found a suitable memory type */ @@ -145,7 +182,7 @@ static int vk_alloc_mem(AVFilterContext *avctx, VkMemoryRequirements *req, return AVERROR(ENOMEM); } - *mem_flags |= mprops.memoryTypes[index].propertyFlags; + *mem_flags |= s->mprops.memoryTypes[index].propertyFlags; return 0; } @@ -156,7 +193,7 @@ int ff_vk_create_buf(AVFilterContext *avctx, FFVkBuffer *buf, size_t size, int err; VkResult ret; int use_ded_mem; - VulkanFilterContext *s = avctx->priv; + FFVulkanContext *s = avctx->priv; FFVulkanFunctions *vk = &s->vkfn; VkBufferCreateInfo buf_spawn = { @@ -220,7 +257,7 @@ int ff_vk_map_buffers(AVFilterContext *avctx, FFVkBuffer *buf, uint8_t *mem[], int nb_buffers, int invalidate) { VkResult ret; - VulkanFilterContext *s = avctx->priv; + FFVulkanContext *s = avctx->priv; FFVulkanFunctions *vk = &s->vkfn; VkMappedMemoryRange *inval_list = NULL; int inval_count = 0; @@ -271,7 +308,7 @@ int ff_vk_unmap_buffers(AVFilterContext *avctx, FFVkBuffer *buf, int nb_buffers, { int err = 0; VkResult ret; - VulkanFilterContext *s = avctx->priv; + FFVulkanContext *s = avctx->priv; FFVulkanFunctions *vk = &s->vkfn; VkMappedMemoryRange *flush_list = NULL; int flush_count = 0; @@ -311,7 +348,7 @@ int ff_vk_unmap_buffers(AVFilterContext *avctx, FFVkBuffer *buf, int nb_buffers, void ff_vk_free_buf(AVFilterContext *avctx, FFVkBuffer *buf) { - VulkanFilterContext *s = avctx->priv; + FFVulkanContext *s = avctx->priv; FFVulkanFunctions *vk = &s->vkfn; if (!buf) @@ -323,7 +360,7 @@ void ff_vk_free_buf(AVFilterContext *avctx, FFVkBuffer *buf) vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc); } -int ff_vk_add_push_constant(AVFilterContext *avctx, VulkanPipeline *pl, +int ff_vk_add_push_constant(AVFilterContext *avctx, FFVulkanPipeline *pl, int offset, int size, VkShaderStageFlagBits stage) { VkPushConstantRange *pc; @@ -343,37 +380,37 @@ int ff_vk_add_push_constant(AVFilterContext *avctx, VulkanPipeline *pl, return 0; } -FN_CREATING(VulkanFilterContext, FFVkExecContext, exec_ctx, exec_ctx, exec_ctx_num) -int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx) +FN_CREATING(FFVulkanContext, FFVkExecContext, exec_ctx, exec_ctx, exec_ctx_num) +int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx, + FFVkQueueFamilyCtx *qf) { VkResult ret; FFVkExecContext *e; - VulkanFilterContext *s = avctx->priv; + FFVulkanContext *s = avctx->priv; FFVulkanFunctions *vk = &s->vkfn; - int queue_family = s->queue_family_idx; - int nb_queues = s->queue_count; - VkCommandPoolCreateInfo cqueue_create = { .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, - .queueFamilyIndex = queue_family, + .queueFamilyIndex = qf->queue_family, }; VkCommandBufferAllocateInfo cbuf_create = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, - .commandBufferCount = nb_queues, + .commandBufferCount = qf->nb_queues, }; e = create_exec_ctx(s); if (!e) return AVERROR(ENOMEM); - e->queues = av_mallocz(nb_queues * sizeof(*e->queues)); + e->qf = qf; + + e->queues = av_mallocz(qf->nb_queues * sizeof(*e->queues)); if (!e->queues) return AVERROR(ENOMEM); - e->bufs = av_mallocz(nb_queues * sizeof(*e->bufs)); + e->bufs = av_mallocz(qf->nb_queues * sizeof(*e->bufs)); if (!e->bufs) return AVERROR(ENOMEM); @@ -396,9 +433,9 @@ int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx) return AVERROR_EXTERNAL; } - for (int i = 0; i < nb_queues; i++) { + for (int i = 0; i < qf->nb_queues; i++) { FFVkQueueCtx *q = &e->queues[i]; - vk->GetDeviceQueue(s->hwctx->act_dev, queue_family, i, &q->queue); + vk->GetDeviceQueue(s->hwctx->act_dev, qf->queue_family, i, &q->queue); } *ctx = e; @@ -408,8 +445,7 @@ int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx) void ff_vk_discard_exec_deps(AVFilterContext *avctx, FFVkExecContext *e) { - VulkanFilterContext *s = avctx->priv; - FFVkQueueCtx *q = &e->queues[s->cur_queue_idx]; + FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; for (int j = 0; j < q->nb_buf_deps; j++) av_buffer_unref(&q->buf_deps[j]); @@ -426,9 +462,9 @@ void ff_vk_discard_exec_deps(AVFilterContext *avctx, FFVkExecContext *e) int ff_vk_start_exec_recording(AVFilterContext *avctx, FFVkExecContext *e) { VkResult ret; - VulkanFilterContext *s = avctx->priv; + FFVulkanContext *s = avctx->priv; FFVulkanFunctions *vk = &s->vkfn; - FFVkQueueCtx *q = &e->queues[s->cur_queue_idx]; + FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; VkCommandBufferBeginInfo cmd_start = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, @@ -455,7 +491,7 @@ int ff_vk_start_exec_recording(AVFilterContext *avctx, FFVkExecContext *e) /* Discard queue dependencies */ ff_vk_discard_exec_deps(avctx, e); - ret = vk->BeginCommandBuffer(e->bufs[s->cur_queue_idx], &cmd_start); + ret = vk->BeginCommandBuffer(e->bufs[e->qf->cur_queue], &cmd_start); if (ret != VK_SUCCESS) { av_log(avctx, AV_LOG_ERROR, "Failed to start command recoding: %s\n", ff_vk_ret2str(ret)); @@ -467,17 +503,15 @@ int ff_vk_start_exec_recording(AVFilterContext *avctx, FFVkExecContext *e) VkCommandBuffer ff_vk_get_exec_buf(AVFilterContext *avctx, FFVkExecContext *e) { - VulkanFilterContext *s = avctx->priv; - return e->bufs[s->cur_queue_idx]; + return e->bufs[e->qf->cur_queue]; } int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e, AVFrame *frame, VkPipelineStageFlagBits in_wait_dst_flag) { AVFrame **dst; - VulkanFilterContext *s = avctx->priv; AVVkFrame *f = (AVVkFrame *)frame->data[0]; - FFVkQueueCtx *q = &e->queues[s->cur_queue_idx]; + FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; AVHWFramesContext *fc = (AVHWFramesContext *)frame->hw_frames_ctx->data; int planes = av_pix_fmt_count_planes(fc->sw_format); @@ -517,16 +551,21 @@ int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e, return AVERROR(ENOMEM); } + e->sem_sig_val_dst = av_fast_realloc(e->sem_sig_val_dst, &e->sem_sig_val_dst_alloc, + (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val_dst)); + if (!e->sem_sig_val_dst) { + ff_vk_discard_exec_deps(avctx, e); + return AVERROR(ENOMEM); + } + e->sem_wait[e->sem_wait_cnt] = f->sem[i]; e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag; e->sem_wait_val[e->sem_wait_cnt] = f->sem_value[i]; e->sem_wait_cnt++; - /* TODO: fix this in case execution fails */ - f->sem_value[i]++; - e->sem_sig[e->sem_sig_cnt] = f->sem[i]; - e->sem_sig_val[e->sem_sig_cnt] = f->sem_value[i]; + e->sem_sig_val[e->sem_sig_cnt] = f->sem_value[i] + 1; + e->sem_sig_val_dst[e->sem_sig_cnt] = &f->sem_value[i]; e->sem_sig_cnt++; } @@ -551,9 +590,9 @@ int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e, int ff_vk_submit_exec_queue(AVFilterContext *avctx, FFVkExecContext *e) { VkResult ret; - VulkanFilterContext *s = avctx->priv; + FFVulkanContext *s = avctx->priv; FFVulkanFunctions *vk = &s->vkfn; - FFVkQueueCtx *q = &e->queues[s->cur_queue_idx]; + FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = { .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, @@ -568,7 +607,7 @@ int ff_vk_submit_exec_queue(AVFilterContext *avctx, FFVkExecContext *e) .pNext = &s_timeline_sem_info, .commandBufferCount = 1, - .pCommandBuffers = &e->bufs[s->cur_queue_idx], + .pCommandBuffers = &e->bufs[e->qf->cur_queue], .pWaitSemaphores = e->sem_wait, .pWaitDstStageMask = e->sem_wait_dst, @@ -578,7 +617,7 @@ int ff_vk_submit_exec_queue(AVFilterContext *avctx, FFVkExecContext *e) .signalSemaphoreCount = e->sem_sig_cnt, }; - ret = vk->EndCommandBuffer(e->bufs[s->cur_queue_idx]); + ret = vk->EndCommandBuffer(e->bufs[e->qf->cur_queue]); if (ret != VK_SUCCESS) { av_log(avctx, AV_LOG_ERROR, "Unable to finish command buffer: %s\n", ff_vk_ret2str(ret)); @@ -592,8 +631,8 @@ int ff_vk_submit_exec_queue(AVFilterContext *avctx, FFVkExecContext *e) return AVERROR_EXTERNAL; } - /* Rotate queues */ - s->cur_queue_idx = (s->cur_queue_idx + 1) % s->queue_count; + for (int i = 0; i < e->sem_sig_cnt; i++) + *e->sem_sig_val_dst[i] += 1; return 0; } @@ -602,8 +641,7 @@ int ff_vk_add_dep_exec_ctx(AVFilterContext *avctx, FFVkExecContext *e, AVBufferRef **deps, int nb_deps) { AVBufferRef **dst; - VulkanFilterContext *s = avctx->priv; - FFVkQueueCtx *q = &e->queues[s->cur_queue_idx]; + FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; if (!deps || !nb_deps) return 0; @@ -632,7 +670,7 @@ err: static int vulkan_filter_set_device(AVFilterContext *avctx, AVBufferRef *device) { - VulkanFilterContext *s = avctx->priv; + FFVulkanContext *s = avctx->priv; av_buffer_unref(&s->device_ref); @@ -649,7 +687,7 @@ static int vulkan_filter_set_device(AVFilterContext *avctx, static int vulkan_filter_set_frames(AVFilterContext *avctx, AVBufferRef *frames) { - VulkanFilterContext *s = avctx->priv; + FFVulkanContext *s = avctx->priv; av_buffer_unref(&s->frames_ref); @@ -664,7 +702,8 @@ int ff_vk_filter_config_input(AVFilterLink *inlink) { int err; AVFilterContext *avctx = inlink->dst; - VulkanFilterContext *s = avctx->priv; + FFVulkanContext *s = avctx->priv; + FFVulkanFunctions *vk = &s->vkfn; AVHWFramesContext *input_frames; if (!inlink->hw_frames_ctx) { @@ -695,6 +734,9 @@ int ff_vk_filter_config_input(AVFilterLink *inlink) if (err < 0) return err; + vk->GetPhysicalDeviceProperties(s->hwctx->phys_dev, &s->props); + vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops); + /* Default output parameters match input parameters. */ s->input_format = input_frames->sw_format; if (s->output_format == AV_PIX_FMT_NONE) @@ -711,7 +753,7 @@ int ff_vk_filter_config_output_inplace(AVFilterLink *outlink) { int err; AVFilterContext *avctx = outlink->src; - VulkanFilterContext *s = avctx->priv; + FFVulkanContext *s = avctx->priv; av_buffer_unref(&outlink->hw_frames_ctx); @@ -741,7 +783,7 @@ int ff_vk_filter_config_output(AVFilterLink *outlink) { int err; AVFilterContext *avctx = outlink->src; - VulkanFilterContext *s = avctx->priv; + FFVulkanContext *s = avctx->priv; AVBufferRef *output_frames_ref; AVHWFramesContext *output_frames; @@ -790,7 +832,7 @@ fail: int ff_vk_filter_init(AVFilterContext *avctx) { - VulkanFilterContext *s = avctx->priv; + FFVulkanContext *s = avctx->priv; s->output_format = AV_PIX_FMT_NONE; @@ -800,12 +842,12 @@ int ff_vk_filter_init(AVFilterContext *avctx) return 0; } -FN_CREATING(VulkanFilterContext, VkSampler, sampler, samplers, samplers_num) -VkSampler *ff_vk_init_sampler(AVFilterContext *avctx, int unnorm_coords, - VkFilter filt) +FN_CREATING(FFVulkanContext, FFVkSampler, sampler, samplers, samplers_num) +FFVkSampler *ff_vk_init_sampler(AVFilterContext *avctx, int unnorm_coords, + VkFilter filt) { VkResult ret; - VulkanFilterContext *s = avctx->priv; + FFVulkanContext *s = avctx->priv; FFVulkanFunctions *vk = &s->vkfn; VkSamplerCreateInfo sampler_info = { @@ -823,19 +865,22 @@ VkSampler *ff_vk_init_sampler(AVFilterContext *avctx, int unnorm_coords, .unnormalizedCoordinates = unnorm_coords, }; - VkSampler *sampler = create_sampler(s); - if (!sampler) + FFVkSampler *sctx = create_sampler(s); + if (!sctx) return NULL; ret = vk->CreateSampler(s->hwctx->act_dev, &sampler_info, - s->hwctx->alloc, sampler); + s->hwctx->alloc, &sctx->sampler[0]); if (ret != VK_SUCCESS) { av_log(avctx, AV_LOG_ERROR, "Unable to init sampler: %s\n", ff_vk_ret2str(ret)); return NULL; } - return sampler; + for (int i = 1; i < 4; i++) + sctx->sampler[i] = sctx->sampler[0]; + + return sctx; } int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt) @@ -863,7 +908,7 @@ typedef struct ImageViewCtx { static void destroy_imageview(void *opaque, uint8_t *data) { - VulkanFilterContext *s = opaque; + FFVulkanContext *s = opaque; FFVulkanFunctions *vk = &s->vkfn; ImageViewCtx *iv = (ImageViewCtx *)data; @@ -877,7 +922,7 @@ int ff_vk_create_imageview(AVFilterContext *avctx, FFVkExecContext *e, { int err; AVBufferRef *buf; - VulkanFilterContext *s = avctx->priv; + FFVulkanContext *s = avctx->priv; FFVulkanFunctions *vk = &s->vkfn; VkImageViewCreateInfo imgview_spawn = { @@ -924,8 +969,8 @@ int ff_vk_create_imageview(AVFilterContext *avctx, FFVkExecContext *e, return 0; } -FN_CREATING(VulkanPipeline, FFSPIRVShader, shader, shaders, shaders_num) -FFSPIRVShader *ff_vk_init_shader(AVFilterContext *avctx, VulkanPipeline *pl, +FN_CREATING(FFVulkanPipeline, FFSPIRVShader, shader, shaders, shaders_num) +FFSPIRVShader *ff_vk_init_shader(AVFilterContext *avctx, FFVulkanPipeline *pl, const char *name, VkShaderStageFlags stage) { FFSPIRVShader *shd = create_shader(pl); @@ -984,7 +1029,7 @@ int ff_vk_compile_shader(AVFilterContext *avctx, FFSPIRVShader *shd, { int err; VkResult ret; - VulkanFilterContext *s = avctx->priv; + FFVulkanContext *s = avctx->priv; FFVulkanFunctions *vk = &s->vkfn; VkShaderModuleCreateInfo shader_create; uint8_t *spirv; @@ -1043,25 +1088,24 @@ static const struct descriptor_props { [VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = { sizeof(VkBufferView), "imageBuffer", 1, 0, 0, 0, }, }; -int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl, - FFSPIRVShader *shd, VulkanDescriptorSetBinding *desc, +int ff_vk_add_descriptor_set(AVFilterContext *avctx, FFVulkanPipeline *pl, + FFSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc, int num, int only_print_to_shader) { VkResult ret; VkDescriptorSetLayout *layout; - VulkanFilterContext *s = avctx->priv; + FFVulkanContext *s = avctx->priv; FFVulkanFunctions *vk = &s->vkfn; if (only_print_to_shader) goto print; pl->desc_layout = av_realloc_array(pl->desc_layout, sizeof(*pl->desc_layout), - pl->desc_layout_num + 1); + pl->desc_layout_num + pl->qf->nb_queues); if (!pl->desc_layout) return AVERROR(ENOMEM); layout = &pl->desc_layout[pl->desc_layout_num]; - memset(layout, 0, sizeof(*layout)); { /* Create descriptor set layout descriptions */ VkDescriptorSetLayoutCreateInfo desc_create_layout = { 0 }; @@ -1076,21 +1120,27 @@ int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl, desc_binding[i].descriptorType = desc[i].type; desc_binding[i].descriptorCount = FFMAX(desc[i].elems, 1); desc_binding[i].stageFlags = desc[i].stages; - desc_binding[i].pImmutableSamplers = desc[i].samplers; + desc_binding[i].pImmutableSamplers = desc[i].sampler ? + desc[i].sampler->sampler : + NULL; } desc_create_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; desc_create_layout.pBindings = desc_binding; desc_create_layout.bindingCount = num; - ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout, - s->hwctx->alloc, layout); - av_free(desc_binding); - if (ret != VK_SUCCESS) { - av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor set " - "layout: %s\n", ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; + for (int i = 0; i < pl->qf->nb_queues; i++) { + ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout, + s->hwctx->alloc, &layout[i]); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor set " + "layout: %s\n", ff_vk_ret2str(ret)); + av_free(desc_binding); + return AVERROR_EXTERNAL; + } } + + av_free(desc_binding); } { /* Pool each descriptor by type and update pool counts */ @@ -1108,7 +1158,7 @@ int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl, memset(&pl->pool_size_desc[j], 0, sizeof(VkDescriptorPoolSize)); } pl->pool_size_desc[j].type = desc[i].type; - pl->pool_size_desc[j].descriptorCount += FFMAX(desc[i].elems, 1); + pl->pool_size_desc[j].descriptorCount += FFMAX(desc[i].elems, 1)*pl->qf->nb_queues; } } @@ -1132,27 +1182,32 @@ int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl, pl->desc_template_info = av_realloc_array(pl->desc_template_info, sizeof(*pl->desc_template_info), - pl->desc_layout_num + 1); + pl->total_descriptor_sets + pl->qf->nb_queues); if (!pl->desc_template_info) return AVERROR(ENOMEM); - dt = &pl->desc_template_info[pl->desc_layout_num]; - memset(dt, 0, sizeof(*dt)); + dt = &pl->desc_template_info[pl->total_descriptor_sets]; + memset(dt, 0, sizeof(*dt)*pl->qf->nb_queues); - dt->sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO; - dt->templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET; - dt->descriptorSetLayout = *layout; - dt->pDescriptorUpdateEntries = des_entries; - dt->descriptorUpdateEntryCount = num; + for (int i = 0; i < pl->qf->nb_queues; i++) { + dt[i].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO; + dt[i].templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET; + dt[i].descriptorSetLayout = layout[i]; + dt[i].pDescriptorUpdateEntries = des_entries; + dt[i].descriptorUpdateEntryCount = num; + } } - pl->desc_layout_num++; + pl->descriptor_sets_num++; + + pl->desc_layout_num += pl->qf->nb_queues; + pl->total_descriptor_sets += pl->qf->nb_queues; print: /* Write shader info */ for (int i = 0; i < num; i++) { const struct descriptor_props *prop = &descriptor_props[desc[i].type]; - GLSLA("layout (set = %i, binding = %i", pl->desc_layout_num - 1, i); + GLSLA("layout (set = %i, binding = %i", pl->descriptor_sets_num - 1, i); if (desc[i].mem_layout) GLSLA(", %s", desc[i].mem_layout); @@ -1184,12 +1239,14 @@ print: return 0; } -void ff_vk_update_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl, +void ff_vk_update_descriptor_set(AVFilterContext *avctx, FFVulkanPipeline *pl, int set_id) { - VulkanFilterContext *s = avctx->priv; + FFVulkanContext *s = avctx->priv; FFVulkanFunctions *vk = &s->vkfn; + set_id = set_id*pl->qf->nb_queues + pl->qf->cur_queue; + vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev, pl->desc_set[set_id], pl->desc_template[set_id], @@ -1200,27 +1257,29 @@ void ff_vk_update_push_exec(AVFilterContext *avctx, FFVkExecContext *e, VkShaderStageFlagBits stage, int offset, size_t size, void *src) { - VulkanFilterContext *s = avctx->priv; + FFVulkanContext *s = avctx->priv; FFVulkanFunctions *vk = &s->vkfn; - vk->CmdPushConstants(e->bufs[s->cur_queue_idx], e->bound_pl->pipeline_layout, + vk->CmdPushConstants(e->bufs[e->qf->cur_queue], e->bound_pl->pipeline_layout, stage, offset, size, src); } -int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl) +int ff_vk_init_pipeline_layout(AVFilterContext *avctx, FFVulkanPipeline *pl) { VkResult ret; - VulkanFilterContext *s = avctx->priv; + FFVulkanContext *s = avctx->priv; FFVulkanFunctions *vk = &s->vkfn; - pl->descriptor_sets_num = pl->desc_layout_num * s->queue_count; + pl->desc_staging = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_staging)); + if (!pl->desc_staging) + return AVERROR(ENOMEM); { /* Init descriptor set pool */ VkDescriptorPoolCreateInfo pool_create_info = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, .poolSizeCount = pl->pool_size_desc_num, .pPoolSizes = pl->pool_size_desc, - .maxSets = pl->descriptor_sets_num, + .maxSets = pl->total_descriptor_sets, }; ret = vk->CreateDescriptorPool(s->hwctx->act_dev, &pool_create_info, @@ -1237,11 +1296,11 @@ int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl) VkDescriptorSetAllocateInfo alloc_info = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, .descriptorPool = pl->desc_pool, - .descriptorSetCount = pl->descriptor_sets_num, + .descriptorSetCount = pl->total_descriptor_sets, .pSetLayouts = pl->desc_layout, }; - pl->desc_set = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_set)); + pl->desc_set = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_set)); if (!pl->desc_set) return AVERROR(ENOMEM); @@ -1257,12 +1316,14 @@ int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl) { /* Finally create the pipeline layout */ VkPipelineLayoutCreateInfo spawn_pipeline_layout = { .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .setLayoutCount = pl->desc_layout_num, - .pSetLayouts = pl->desc_layout, + .pSetLayouts = (VkDescriptorSetLayout *)pl->desc_staging, .pushConstantRangeCount = pl->push_consts_num, .pPushConstantRanges = pl->push_consts, }; + for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) + pl->desc_staging[spawn_pipeline_layout.setLayoutCount++] = pl->desc_layout[i]; + ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &spawn_pipeline_layout, s->hwctx->alloc, &pl->pipeline_layout); av_freep(&pl->push_consts); @@ -1275,21 +1336,19 @@ int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl) } { /* Descriptor template (for tightly packed descriptors) */ - VkDescriptorUpdateTemplateCreateInfo *desc_template_info; + VkDescriptorUpdateTemplateCreateInfo *dt; - pl->desc_template = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_template)); + pl->desc_template = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_template)); if (!pl->desc_template) return AVERROR(ENOMEM); /* Create update templates for the descriptor sets */ - for (int i = 0; i < pl->descriptor_sets_num; i++) { - desc_template_info = &pl->desc_template_info[i % pl->desc_layout_num]; - desc_template_info->pipelineLayout = pl->pipeline_layout; + for (int i = 0; i < pl->total_descriptor_sets; i++) { + dt = &pl->desc_template_info[i]; + dt->pipelineLayout = pl->pipeline_layout; ret = vk->CreateDescriptorUpdateTemplate(s->hwctx->act_dev, - desc_template_info, - s->hwctx->alloc, + dt, s->hwctx->alloc, &pl->desc_template[i]); - av_free((void *)desc_template_info->pDescriptorUpdateEntries); if (ret != VK_SUCCESS) { av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor " "template: %s\n", ff_vk_ret2str(ret)); @@ -1297,23 +1356,34 @@ int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl) } } + /* Free the duplicated memory used for the template entries */ + for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) { + dt = &pl->desc_template_info[i]; + av_free((void *)dt->pDescriptorUpdateEntries); + } + av_freep(&pl->desc_template_info); } return 0; } -FN_CREATING(VulkanFilterContext, VulkanPipeline, pipeline, pipelines, pipelines_num) -VulkanPipeline *ff_vk_create_pipeline(AVFilterContext *avctx) +FN_CREATING(FFVulkanContext, FFVulkanPipeline, pipeline, pipelines, pipelines_num) +FFVulkanPipeline *ff_vk_create_pipeline(AVFilterContext *avctx, + FFVkQueueFamilyCtx *qf) { - return create_pipeline(avctx->priv); + FFVulkanPipeline *pl = create_pipeline(avctx->priv); + if (pl) + pl->qf = qf; + + return pl; } -int ff_vk_init_compute_pipeline(AVFilterContext *avctx, VulkanPipeline *pl) +int ff_vk_init_compute_pipeline(AVFilterContext *avctx, FFVulkanPipeline *pl) { int i; VkResult ret; - VulkanFilterContext *s = avctx->priv; + FFVulkanContext *s = avctx->priv; FFVulkanFunctions *vk = &s->vkfn; VkComputePipelineCreateInfo pipe = { @@ -1346,26 +1416,31 @@ int ff_vk_init_compute_pipeline(AVFilterContext *avctx, VulkanPipeline *pl) } void ff_vk_bind_pipeline_exec(AVFilterContext *avctx, FFVkExecContext *e, - VulkanPipeline *pl) + FFVulkanPipeline *pl) { - VulkanFilterContext *s = avctx->priv; + FFVulkanContext *s = avctx->priv; FFVulkanFunctions *vk = &s->vkfn; - vk->CmdBindPipeline(e->bufs[s->cur_queue_idx], pl->bind_point, pl->pipeline); + vk->CmdBindPipeline(e->bufs[e->qf->cur_queue], pl->bind_point, pl->pipeline); - vk->CmdBindDescriptorSets(e->bufs[s->cur_queue_idx], pl->bind_point, - pl->pipeline_layout, 0, pl->descriptor_sets_num, - pl->desc_set, 0, 0); + for (int i = 0; i < pl->descriptor_sets_num; i++) + pl->desc_staging[i] = pl->desc_set[i*pl->qf->nb_queues + pl->qf->cur_queue]; + + vk->CmdBindDescriptorSets(e->bufs[e->qf->cur_queue], pl->bind_point, + pl->pipeline_layout, 0, + pl->descriptor_sets_num, + (VkDescriptorSet *)pl->desc_staging, + 0, NULL); e->bound_pl = pl; } -static void free_exec_ctx(VulkanFilterContext *s, FFVkExecContext *e) +static void free_exec_ctx(FFVulkanContext *s, FFVkExecContext *e) { FFVulkanFunctions *vk = &s->vkfn; /* Make sure all queues have finished executing */ - for (int i = 0; i < s->queue_count; i++) { + for (int i = 0; i < e->qf->nb_queues; i++) { FFVkQueueCtx *q = &e->queues[i]; if (q->fence) { @@ -1389,7 +1464,7 @@ static void free_exec_ctx(VulkanFilterContext *s, FFVkExecContext *e) } if (e->bufs) - vk->FreeCommandBuffers(s->hwctx->act_dev, e->pool, s->queue_count, e->bufs); + vk->FreeCommandBuffers(s->hwctx->act_dev, e->pool, e->qf->nb_queues, e->bufs); if (e->pool) vk->DestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc); @@ -1397,13 +1472,14 @@ static void free_exec_ctx(VulkanFilterContext *s, FFVkExecContext *e) av_freep(&e->queues); av_freep(&e->sem_sig); av_freep(&e->sem_sig_val); + av_freep(&e->sem_sig_val_dst); av_freep(&e->sem_wait); av_freep(&e->sem_wait_dst); av_freep(&e->sem_wait_val); av_free(e); } -static void free_pipeline(VulkanFilterContext *s, VulkanPipeline *pl) +static void free_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl) { FFVulkanFunctions *vk = &s->vkfn; @@ -1433,6 +1509,7 @@ static void free_pipeline(VulkanFilterContext *s, VulkanPipeline *pl) vk->DestroyDescriptorPool(s->hwctx->act_dev, pl->desc_pool, s->hwctx->alloc); + av_freep(&pl->desc_staging); av_freep(&pl->desc_set); av_freep(&pl->shaders); av_freep(&pl->desc_layout); @@ -1443,8 +1520,10 @@ static void free_pipeline(VulkanFilterContext *s, VulkanPipeline *pl) /* Only freed in case of failure */ av_freep(&pl->pool_size_desc); if (pl->desc_template_info) { - for (int i = 0; i < pl->descriptor_sets_num; i++) - av_free((void *)pl->desc_template_info[i].pDescriptorUpdateEntries); + for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) { + VkDescriptorUpdateTemplateCreateInfo *dt = &pl->desc_template_info[i]; + av_free((void *)dt->pDescriptorUpdateEntries); + } av_freep(&pl->desc_template_info); } @@ -1453,7 +1532,7 @@ static void free_pipeline(VulkanFilterContext *s, VulkanPipeline *pl) void ff_vk_filter_uninit(AVFilterContext *avctx) { - VulkanFilterContext *s = avctx->priv; + FFVulkanContext *s = avctx->priv; FFVulkanFunctions *vk = &s->vkfn; ff_vk_glslang_uninit(); @@ -1463,7 +1542,8 @@ void ff_vk_filter_uninit(AVFilterContext *avctx) av_freep(&s->exec_ctx); for (int i = 0; i < s->samplers_num; i++) { - vk->DestroySampler(s->hwctx->act_dev, *s->samplers[i], s->hwctx->alloc); + vk->DestroySampler(s->hwctx->act_dev, s->samplers[i]->sampler[0], + s->hwctx->alloc); av_free(s->samplers[i]); } av_freep(&s->samplers); diff --git a/libavfilter/vulkan.h b/libavfilter/vulkan.h index 89b76ba355..9d17d2b14f 100644 --- a/libavfilter/vulkan.h +++ b/libavfilter/vulkan.h @@ -20,6 +20,7 @@ #define AVFILTER_VULKAN_H #define VK_NO_PROTOTYPES +#define VK_ENABLE_BETA_EXTENSIONS #include "avfilter.h" #include "libavutil/pixdesc.h" @@ -52,9 +53,6 @@ goto fail; \ } while (0) -/* Useful for attaching immutable samplers to arrays */ -#define DUP_SAMPLER_ARRAY4(x) (VkSampler []){ x, x, x, x, } - typedef struct FFSPIRVShader { const char *name; /* Name for id/debugging purposes */ AVBPrint src; @@ -62,7 +60,11 @@ typedef struct FFSPIRVShader { VkPipelineShaderStageCreateInfo shader; } FFSPIRVShader; -typedef struct VulkanDescriptorSetBinding { +typedef struct FFVkSampler { + VkSampler sampler[4]; +} FFVkSampler; + +typedef struct FFVulkanDescriptorSetBinding { const char *name; VkDescriptorType type; const char *mem_layout; /* Storage images (rgba8, etc.) and buffers (std430, etc.) */ @@ -71,9 +73,9 @@ typedef struct VulkanDescriptorSetBinding { uint32_t dimensions; /* Needed for e.g. sampler%iD */ uint32_t elems; /* 0 - scalar, 1 or more - vector */ VkShaderStageFlags stages; - const VkSampler *samplers; /* Immutable samplers, length - #elems */ + FFVkSampler *sampler; /* Sampler to use for all elems */ void *updater; /* Pointer to VkDescriptor*Info */ -} VulkanDescriptorSetBinding; +} FFVulkanDescriptorSetBinding; typedef struct FFVkBuffer { VkBuffer buf; @@ -81,7 +83,15 @@ typedef struct FFVkBuffer { VkMemoryPropertyFlagBits flags; } FFVkBuffer; -typedef struct VulkanPipeline { +typedef struct FFVkQueueFamilyCtx { + int queue_family; + int nb_queues; + int cur_queue; +} FFVkQueueFamilyCtx; + +typedef struct FFVulkanPipeline { + FFVkQueueFamilyCtx *qf; + VkPipelineBindPoint bind_point; /* Contexts */ @@ -97,18 +107,21 @@ typedef struct VulkanPipeline { int push_consts_num; /* Descriptors */ - VkDescriptorSetLayout *desc_layout; - VkDescriptorPool desc_pool; - VkDescriptorSet *desc_set; - VkDescriptorUpdateTemplate *desc_template; - int desc_layout_num; - int descriptor_sets_num; - int pool_size_desc_num; + VkDescriptorSetLayout *desc_layout; + VkDescriptorPool desc_pool; + VkDescriptorSet *desc_set; + void **desc_staging; + VkDescriptorSetLayoutBinding **desc_binding; + VkDescriptorUpdateTemplate *desc_template; + int desc_layout_num; + int descriptor_sets_num; + int total_descriptor_sets; + int pool_size_desc_num; /* Temporary, used to store data in between initialization stages */ VkDescriptorUpdateTemplateCreateInfo *desc_template_info; VkDescriptorPoolSize *pool_size_desc; -} VulkanPipeline; +} FFVulkanPipeline; typedef struct FFVkQueueCtx { VkFence fence; @@ -126,6 +139,8 @@ typedef struct FFVkQueueCtx { } FFVkQueueCtx; typedef struct FFVkExecContext { + FFVkQueueFamilyCtx *qf; + VkCommandPool pool; VkCommandBuffer *bufs; FFVkQueueCtx *queues; @@ -134,7 +149,7 @@ typedef struct FFVkExecContext { int *nb_deps; int *dep_alloc_size; - VulkanPipeline *bound_pl; + FFVulkanPipeline *bound_pl; VkSemaphore *sem_wait; int sem_wait_alloc; /* Allocated sem_wait */ @@ -152,23 +167,23 @@ typedef struct FFVkExecContext { uint64_t *sem_sig_val; int sem_sig_val_alloc; + + uint64_t **sem_sig_val_dst; + int sem_sig_val_dst_alloc; } FFVkExecContext; -typedef struct VulkanFilterContext { +typedef struct FFVulkanContext { const AVClass *class; FFVulkanFunctions vkfn; FFVulkanExtensions extensions; + VkPhysicalDeviceProperties props; + VkPhysicalDeviceMemoryProperties mprops; AVBufferRef *device_ref; AVBufferRef *frames_ref; /* For in-place filtering */ AVHWDeviceContext *device; AVVulkanDeviceContext *hwctx; - /* State - mirrored with the exec ctx */ - int cur_queue_idx; - int queue_family_idx; - int queue_count; - /* Properties */ int output_width; int output_height; @@ -176,7 +191,7 @@ typedef struct VulkanFilterContext { enum AVPixelFormat input_format; /* Samplers */ - VkSampler **samplers; + FFVkSampler **samplers; int samplers_num; /* Exec contexts */ @@ -184,12 +199,12 @@ typedef struct VulkanFilterContext { int exec_ctx_num; /* Pipelines (each can have 1 shader of each type) */ - VulkanPipeline **pipelines; + FFVulkanPipeline **pipelines; int pipelines_num; void *scratch; /* Scratch memory used only in functions */ unsigned int scratch_size; -} VulkanFilterContext; +} FFVulkanContext; /* Identity mapping - r = r, b = b, g = g, a = a */ extern const VkComponentMapping ff_comp_identity_map; @@ -218,11 +233,23 @@ int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt); */ const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt); +/** + * Initialize a queue family. + * A queue limit of 0 means no limit. + */ +void ff_vk_qf_init(AVFilterContext *avctx, FFVkQueueFamilyCtx *qf, + VkQueueFlagBits dev_family, int queue_limit); + +/** + * Rotate through the queues in a queue family. + */ +void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf); + /** * Create a Vulkan sampler, will be auto-freed in ff_vk_filter_uninit() */ -VkSampler *ff_vk_init_sampler(AVFilterContext *avctx, int unnorm_coords, - VkFilter filt); +FFVkSampler *ff_vk_init_sampler(AVFilterContext *avctx, int unnorm_coords, + VkFilter filt); /** * Create an imageview. @@ -237,19 +264,20 @@ int ff_vk_create_imageview(AVFilterContext *avctx, FFVkExecContext *e, * Define a push constant for a given stage into a pipeline. * Must be called before the pipeline layout has been initialized. */ -int ff_vk_add_push_constant(AVFilterContext *avctx, VulkanPipeline *pl, +int ff_vk_add_push_constant(AVFilterContext *avctx, FFVulkanPipeline *pl, int offset, int size, VkShaderStageFlagBits stage); /** * Inits a pipeline. Everything in it will be auto-freed when calling * ff_vk_filter_uninit(). */ -VulkanPipeline *ff_vk_create_pipeline(AVFilterContext *avctx); +FFVulkanPipeline *ff_vk_create_pipeline(AVFilterContext *avctx, + FFVkQueueFamilyCtx *qf); /** * Inits a shader for a specific pipeline. Will be auto-freed on uninit. */ -FFSPIRVShader *ff_vk_init_shader(AVFilterContext *avctx, VulkanPipeline *pl, +FFSPIRVShader *ff_vk_init_shader(AVFilterContext *avctx, FFVulkanPipeline *pl, const char *name, VkShaderStageFlags stage); /** @@ -261,8 +289,8 @@ void ff_vk_set_compute_shader_sizes(AVFilterContext *avctx, FFSPIRVShader *shd, /** * Adds a descriptor set to the shader and registers them in the pipeline. */ -int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl, - FFSPIRVShader *shd, VulkanDescriptorSetBinding *desc, +int ff_vk_add_descriptor_set(AVFilterContext *avctx, FFVulkanPipeline *pl, + FFSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc, int num, int only_print_to_shader); /** @@ -280,27 +308,28 @@ void ff_vk_print_shader(AVFilterContext *avctx, FFSPIRVShader *shd, int prio); * Initializes the pipeline layout after all shaders and descriptor sets have * been finished. */ -int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl); +int ff_vk_init_pipeline_layout(AVFilterContext *avctx, FFVulkanPipeline *pl); /** * Initializes a compute pipeline. Will pick the first shader with the * COMPUTE flag set. */ -int ff_vk_init_compute_pipeline(AVFilterContext *avctx, VulkanPipeline *pl); +int ff_vk_init_compute_pipeline(AVFilterContext *avctx, FFVulkanPipeline *pl); /** * Updates a descriptor set via the updaters defined. * Can be called immediately after pipeline creation, but must be called * at least once before queue submission. */ -void ff_vk_update_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl, +void ff_vk_update_descriptor_set(AVFilterContext *avctx, FFVulkanPipeline *pl, int set_id); /** * Init an execution context for command recording and queue submission. * WIll be auto-freed on uninit. */ -int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx); +int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx, + FFVkQueueFamilyCtx *qf); /** * Begin recording to the command buffer. Previous execution must have been @@ -313,7 +342,7 @@ int ff_vk_start_exec_recording(AVFilterContext *avctx, FFVkExecContext *e); * Must be called after ff_vk_start_exec_recording() and before submission. */ void ff_vk_bind_pipeline_exec(AVFilterContext *avctx, FFVkExecContext *e, - VulkanPipeline *pl); + FFVulkanPipeline *pl); /** * Updates push constants.