From 97b526c192add6f252b327245fd9223546867352 Mon Sep 17 00:00:00 2001 From: Lynne Date: Mon, 6 Apr 2020 18:18:50 +0100 Subject: [PATCH] hwcontext_vulkan: only use one semaphore per image The idea was to allow separate planes to be filtered independently, however, in hindsight, literaly nothing uses separate per-plane semaphores and it would only work when each plane is backed by separate device memory. --- libavfilter/vulkan.c | 38 +++++----- libavutil/hwcontext_vulkan.c | 138 ++++++++++++++++++----------------- libavutil/hwcontext_vulkan.h | 4 +- 3 files changed, 89 insertions(+), 91 deletions(-) diff --git a/libavfilter/vulkan.c b/libavfilter/vulkan.c index ff76ab15e9..c103440529 100644 --- a/libavfilter/vulkan.c +++ b/libavfilter/vulkan.c @@ -390,32 +390,28 @@ int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e, AVFrame *frame, VkPipelineStageFlagBits in_wait_dst_flag) { AVVkFrame *f = (AVVkFrame *)frame->data[0]; - AVHWFramesContext *fc = (AVHWFramesContext *)frame->hw_frames_ctx->data; - int planes = av_pix_fmt_count_planes(fc->sw_format); - for (int i = 0; i < planes; i++) { - e->sem_wait = av_fast_realloc(e->sem_wait, &e->sem_wait_alloc, - (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait)); - if (!e->sem_wait) - return AVERROR(ENOMEM); + e->sem_wait = av_fast_realloc(e->sem_wait, &e->sem_wait_alloc, + (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait)); + if (!e->sem_wait) + return AVERROR(ENOMEM); - e->sem_wait_dst = av_fast_realloc(e->sem_wait_dst, &e->sem_wait_dst_alloc, - (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_dst)); - if (!e->sem_wait_dst) - return AVERROR(ENOMEM); + e->sem_wait_dst = av_fast_realloc(e->sem_wait_dst, &e->sem_wait_dst_alloc, + (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_dst)); + if (!e->sem_wait_dst) + return AVERROR(ENOMEM); - e->sem_sig = av_fast_realloc(e->sem_sig, &e->sem_sig_alloc, - (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig)); - if (!e->sem_sig) - return AVERROR(ENOMEM); + e->sem_sig = av_fast_realloc(e->sem_sig, &e->sem_sig_alloc, + (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig)); + if (!e->sem_sig) + return AVERROR(ENOMEM); - e->sem_wait[e->sem_wait_cnt] = f->sem[i]; - e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag; - e->sem_wait_cnt++; + e->sem_wait[e->sem_wait_cnt] = f->sem; + e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag; + e->sem_wait_cnt++; - e->sem_sig[e->sem_sig_cnt] = f->sem[i]; - e->sem_sig_cnt++; - } + e->sem_sig[e->sem_sig_cnt] = f->sem; + e->sem_sig_cnt++; return 0; } diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index ed88979d0d..e4546f67ca 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -81,7 +81,7 @@ typedef struct AVVkFrameInternal { CUexternalMemory ext_mem[AV_NUM_DATA_POINTERS]; CUmipmappedArray cu_mma[AV_NUM_DATA_POINTERS]; CUarray cu_array[AV_NUM_DATA_POINTERS]; - CUexternalSemaphore cu_sem[AV_NUM_DATA_POINTERS]; + CUexternalSemaphore cu_sem; #endif } AVVkFrameInternal; @@ -1042,9 +1042,10 @@ static void vulkan_free_internal(AVVkFrameInternal *internal) AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal; CudaFunctions *cu = cu_internal->cuda_dl; + if (internal->cu_sem) + CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem)); + for (int i = 0; i < planes; i++) { - if (internal->cu_sem[i]) - CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem[i])); if (internal->cu_mma[i]) CHECK_CU(cu->cuMipmappedArrayDestroy(internal->cu_mma[i])); if (internal->ext_mem[i]) @@ -1070,9 +1071,10 @@ static void vulkan_frame_free(void *opaque, uint8_t *data) for (int i = 0; i < planes; i++) { vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc); vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc); - vkDestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc); } + vkDestroySemaphore(hwctx->act_dev, f->sem, hwctx->alloc); + av_free(f); } @@ -1166,8 +1168,8 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, .commandBufferCount = 1, .pCommandBuffers = &ectx->buf, - .pSignalSemaphores = frame->sem, - .signalSemaphoreCount = planes, + .pSignalSemaphores = &frame->sem, + .signalSemaphoreCount = 1, }; switch (pmode) { @@ -1288,19 +1290,19 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame, goto fail; } - /* Create semaphore */ - ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn, - hwctx->alloc, &f->sem[i]); - if (ret != VK_SUCCESS) { - av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n", - vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - f->layout[i] = image_create_info.initialLayout; f->access[i] = 0x0; } + /* Create semaphore */ + ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn, + hwctx->alloc, &f->sem); + if (ret != VK_SUCCESS) { + av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + f->flags = 0x0; f->tiling = tiling; @@ -1622,9 +1624,10 @@ static void vulkan_unmap_from(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap) for (int i = 0; i < planes; i++) { vkDestroyImage(hwctx->act_dev, map->frame->img[i], hwctx->alloc); vkFreeMemory(hwctx->act_dev, map->frame->mem[i], hwctx->alloc); - vkDestroySemaphore(hwctx->act_dev, map->frame->sem[i], hwctx->alloc); } + vkDestroySemaphore(hwctx->act_dev, map->frame->sem, hwctx->alloc); + av_freep(&map->frame); } @@ -1668,6 +1671,9 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { 0 }; VkBindImagePlaneMemoryInfo plane_info[AV_NUM_DATA_POINTERS] = { 0 }; VkExternalMemoryHandleTypeFlagBits htype = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; + VkSemaphoreCreateInfo sem_spawn = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + }; VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdPropertiesKHR); @@ -1741,10 +1747,6 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f .handleTypes = htype, }; - VkSemaphoreCreateInfo sem_spawn = { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, - }; - const int p_w = i > 0 ? AV_CEIL_RSHIFT(hwfc->width, fmt_desc->log2_chroma_w) : hwfc->width; const int p_h = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, fmt_desc->log2_chroma_h) : hwfc->height; @@ -1785,19 +1787,6 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f goto fail; } - ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn, - hwctx->alloc, &f->sem[i]); - if (ret != VK_SUCCESS) { - av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n", - vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - /* We'd import a semaphore onto the one we created using - * vkImportSemaphoreFdKHR but unfortunately neither DRM nor VAAPI - * offer us anything we could import and sync with, so instead - * just signal the semaphore we created. */ - f->layout[i] = image_create_info.initialLayout; f->access[i] = 0x0; @@ -1818,6 +1807,19 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f } } + ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn, + hwctx->alloc, &f->sem); + if (ret != VK_SUCCESS) { + av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + /* We'd import a semaphore onto the one we created using + * vkImportSemaphoreFdKHR but unfortunately neither DRM nor VAAPI + * offer us anything we could import and sync with, so instead + * just signal the semaphore we created. */ + /* Bind the allocated memory to the images */ ret = vkBindImageMemory2(hwctx->act_dev, bind_counts, bind_info); if (ret != VK_SUCCESS) { @@ -1838,12 +1840,11 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f return 0; fail: - for (int i = 0; i < desc->nb_layers; i++) { + for (int i = 0; i < desc->nb_layers; i++) vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc); - vkDestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc); - } for (int i = 0; i < desc->nb_objects; i++) vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc); + vkDestroySemaphore(hwctx->act_dev, f->sem, hwctx->alloc); av_free(f); @@ -1953,6 +1954,15 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc, dst_int = dst_f->internal; if (!dst_int || !dst_int->cuda_fc_ref) { + VkSemaphoreGetFdInfoKHR sem_export = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR, + .semaphore = dst_f->sem, + .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, + }; + CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = { + .type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD, + }; + if (!dst_f->internal) dst_f->internal = dst_int = av_mallocz(sizeof(*dst_f->internal)); @@ -1991,14 +2001,6 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc, .memory = dst_f->mem[i], .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR, }; - VkSemaphoreGetFdInfoKHR sem_export = { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR, - .semaphore = dst_f->sem[i], - .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, - }; - CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = { - .type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD, - }; ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info, &ext_desc.handle.fd); @@ -2028,22 +2030,22 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc, err = AVERROR_EXTERNAL; goto fail; } + } - ret = pfn_vkGetSemaphoreFdKHR(hwctx->act_dev, &sem_export, - &ext_sem_desc.handle.fd); - if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n", - vk_ret2str(ret)); - err = AVERROR_EXTERNAL; - goto fail; - } + ret = pfn_vkGetSemaphoreFdKHR(hwctx->act_dev, &sem_export, + &ext_sem_desc.handle.fd); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n", + vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } - ret = CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem[i], - &ext_sem_desc)); - if (ret < 0) { - err = AVERROR_EXTERNAL; - goto fail; - } + ret = CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem, + &ext_sem_desc)); + if (ret < 0) { + err = AVERROR_EXTERNAL; + goto fail; } } @@ -2069,8 +2071,8 @@ static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc, AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx; AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal; CudaFunctions *cu = cu_internal->cuda_dl; - CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 }; - CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 }; + CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par = { 0 }; + CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par = { 0 }; ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx)); if (ret < 0) { @@ -2086,8 +2088,8 @@ static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc, } dst_int = dst_f->internal; - ret = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par, - planes, cuda_dev->stream)); + ret = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(&dst_int->cu_sem, &s_w_par, + 1, cuda_dev->stream)); if (ret < 0) { err = AVERROR_EXTERNAL; goto fail; @@ -2115,8 +2117,8 @@ static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc, } } - ret = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par, - planes, cuda_dev->stream)); + ret = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(&dst_int->cu_sem, &s_s_par, + 1, cuda_dev->stream)); if (ret < 0) { err = AVERROR_EXTERNAL; goto fail; @@ -2492,11 +2494,11 @@ static int transfer_image_buf(AVHWDeviceContext *ctx, AVVkFrame *frame, .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .commandBufferCount = 1, .pCommandBuffers = &s->cmd.buf, - .pSignalSemaphores = frame->sem, - .pWaitSemaphores = frame->sem, + .pSignalSemaphores = &frame->sem, + .pWaitSemaphores = &frame->sem, .pWaitDstStageMask = sem_wait_dst, - .signalSemaphoreCount = planes, - .waitSemaphoreCount = planes, + .signalSemaphoreCount = 1, + .waitSemaphoreCount = 1, }; ret = vkBeginCommandBuffer(s->cmd.buf, &cmd_start); diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h index 01d6d98b01..ebc28916f3 100644 --- a/libavutil/hwcontext_vulkan.h +++ b/libavutil/hwcontext_vulkan.h @@ -133,10 +133,10 @@ typedef struct AVVkFrame { VkImageLayout layout[AV_NUM_DATA_POINTERS]; /** - * Per-image semaphores. Must not be freed manually. Must be waited on + * Per-frame semaphore. Must not be freed manually. Must be waited on * and signalled at every queue submission. */ - VkSemaphore sem[AV_NUM_DATA_POINTERS]; + VkSemaphore sem; /** * Internal data.