mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2024-12-27 09:52:17 +00:00
hwcontext_vulkan: only use one semaphore per image
The idea was to allow separate planes to be filtered independently, however, in hindsight, literaly nothing uses separate per-plane semaphores and it would only work when each plane is backed by separate device memory.
This commit is contained in:
parent
3f9fd9dcfd
commit
97b526c192
@ -390,32 +390,28 @@ int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e,
|
||||
AVFrame *frame, VkPipelineStageFlagBits in_wait_dst_flag)
|
||||
{
|
||||
AVVkFrame *f = (AVVkFrame *)frame->data[0];
|
||||
AVHWFramesContext *fc = (AVHWFramesContext *)frame->hw_frames_ctx->data;
|
||||
int planes = av_pix_fmt_count_planes(fc->sw_format);
|
||||
|
||||
for (int i = 0; i < planes; i++) {
|
||||
e->sem_wait = av_fast_realloc(e->sem_wait, &e->sem_wait_alloc,
|
||||
(e->sem_wait_cnt + 1)*sizeof(*e->sem_wait));
|
||||
if (!e->sem_wait)
|
||||
return AVERROR(ENOMEM);
|
||||
e->sem_wait = av_fast_realloc(e->sem_wait, &e->sem_wait_alloc,
|
||||
(e->sem_wait_cnt + 1)*sizeof(*e->sem_wait));
|
||||
if (!e->sem_wait)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
e->sem_wait_dst = av_fast_realloc(e->sem_wait_dst, &e->sem_wait_dst_alloc,
|
||||
(e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_dst));
|
||||
if (!e->sem_wait_dst)
|
||||
return AVERROR(ENOMEM);
|
||||
e->sem_wait_dst = av_fast_realloc(e->sem_wait_dst, &e->sem_wait_dst_alloc,
|
||||
(e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_dst));
|
||||
if (!e->sem_wait_dst)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
e->sem_sig = av_fast_realloc(e->sem_sig, &e->sem_sig_alloc,
|
||||
(e->sem_sig_cnt + 1)*sizeof(*e->sem_sig));
|
||||
if (!e->sem_sig)
|
||||
return AVERROR(ENOMEM);
|
||||
e->sem_sig = av_fast_realloc(e->sem_sig, &e->sem_sig_alloc,
|
||||
(e->sem_sig_cnt + 1)*sizeof(*e->sem_sig));
|
||||
if (!e->sem_sig)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
e->sem_wait[e->sem_wait_cnt] = f->sem[i];
|
||||
e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag;
|
||||
e->sem_wait_cnt++;
|
||||
e->sem_wait[e->sem_wait_cnt] = f->sem;
|
||||
e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag;
|
||||
e->sem_wait_cnt++;
|
||||
|
||||
e->sem_sig[e->sem_sig_cnt] = f->sem[i];
|
||||
e->sem_sig_cnt++;
|
||||
}
|
||||
e->sem_sig[e->sem_sig_cnt] = f->sem;
|
||||
e->sem_sig_cnt++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -81,7 +81,7 @@ typedef struct AVVkFrameInternal {
|
||||
CUexternalMemory ext_mem[AV_NUM_DATA_POINTERS];
|
||||
CUmipmappedArray cu_mma[AV_NUM_DATA_POINTERS];
|
||||
CUarray cu_array[AV_NUM_DATA_POINTERS];
|
||||
CUexternalSemaphore cu_sem[AV_NUM_DATA_POINTERS];
|
||||
CUexternalSemaphore cu_sem;
|
||||
#endif
|
||||
} AVVkFrameInternal;
|
||||
|
||||
@ -1042,9 +1042,10 @@ static void vulkan_free_internal(AVVkFrameInternal *internal)
|
||||
AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
|
||||
CudaFunctions *cu = cu_internal->cuda_dl;
|
||||
|
||||
if (internal->cu_sem)
|
||||
CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem));
|
||||
|
||||
for (int i = 0; i < planes; i++) {
|
||||
if (internal->cu_sem[i])
|
||||
CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem[i]));
|
||||
if (internal->cu_mma[i])
|
||||
CHECK_CU(cu->cuMipmappedArrayDestroy(internal->cu_mma[i]));
|
||||
if (internal->ext_mem[i])
|
||||
@ -1070,9 +1071,10 @@ static void vulkan_frame_free(void *opaque, uint8_t *data)
|
||||
for (int i = 0; i < planes; i++) {
|
||||
vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
|
||||
vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
|
||||
vkDestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
|
||||
}
|
||||
|
||||
vkDestroySemaphore(hwctx->act_dev, f->sem, hwctx->alloc);
|
||||
|
||||
av_free(f);
|
||||
}
|
||||
|
||||
@ -1166,8 +1168,8 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
|
||||
.commandBufferCount = 1,
|
||||
.pCommandBuffers = &ectx->buf,
|
||||
|
||||
.pSignalSemaphores = frame->sem,
|
||||
.signalSemaphoreCount = planes,
|
||||
.pSignalSemaphores = &frame->sem,
|
||||
.signalSemaphoreCount = 1,
|
||||
};
|
||||
|
||||
switch (pmode) {
|
||||
@ -1288,19 +1290,19 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* Create semaphore */
|
||||
ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn,
|
||||
hwctx->alloc, &f->sem[i]);
|
||||
if (ret != VK_SUCCESS) {
|
||||
av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
|
||||
vk_ret2str(ret));
|
||||
return AVERROR_EXTERNAL;
|
||||
}
|
||||
|
||||
f->layout[i] = image_create_info.initialLayout;
|
||||
f->access[i] = 0x0;
|
||||
}
|
||||
|
||||
/* Create semaphore */
|
||||
ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn,
|
||||
hwctx->alloc, &f->sem);
|
||||
if (ret != VK_SUCCESS) {
|
||||
av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
|
||||
vk_ret2str(ret));
|
||||
return AVERROR_EXTERNAL;
|
||||
}
|
||||
|
||||
f->flags = 0x0;
|
||||
f->tiling = tiling;
|
||||
|
||||
@ -1622,9 +1624,10 @@ static void vulkan_unmap_from(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
|
||||
for (int i = 0; i < planes; i++) {
|
||||
vkDestroyImage(hwctx->act_dev, map->frame->img[i], hwctx->alloc);
|
||||
vkFreeMemory(hwctx->act_dev, map->frame->mem[i], hwctx->alloc);
|
||||
vkDestroySemaphore(hwctx->act_dev, map->frame->sem[i], hwctx->alloc);
|
||||
}
|
||||
|
||||
vkDestroySemaphore(hwctx->act_dev, map->frame->sem, hwctx->alloc);
|
||||
|
||||
av_freep(&map->frame);
|
||||
}
|
||||
|
||||
@ -1668,6 +1671,9 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
|
||||
VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { 0 };
|
||||
VkBindImagePlaneMemoryInfo plane_info[AV_NUM_DATA_POINTERS] = { 0 };
|
||||
VkExternalMemoryHandleTypeFlagBits htype = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
|
||||
VkSemaphoreCreateInfo sem_spawn = {
|
||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
|
||||
};
|
||||
|
||||
VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdPropertiesKHR);
|
||||
|
||||
@ -1741,10 +1747,6 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
|
||||
.handleTypes = htype,
|
||||
};
|
||||
|
||||
VkSemaphoreCreateInfo sem_spawn = {
|
||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
|
||||
};
|
||||
|
||||
const int p_w = i > 0 ? AV_CEIL_RSHIFT(hwfc->width, fmt_desc->log2_chroma_w) : hwfc->width;
|
||||
const int p_h = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, fmt_desc->log2_chroma_h) : hwfc->height;
|
||||
|
||||
@ -1785,19 +1787,6 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
|
||||
goto fail;
|
||||
}
|
||||
|
||||
ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn,
|
||||
hwctx->alloc, &f->sem[i]);
|
||||
if (ret != VK_SUCCESS) {
|
||||
av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
|
||||
vk_ret2str(ret));
|
||||
return AVERROR_EXTERNAL;
|
||||
}
|
||||
|
||||
/* We'd import a semaphore onto the one we created using
|
||||
* vkImportSemaphoreFdKHR but unfortunately neither DRM nor VAAPI
|
||||
* offer us anything we could import and sync with, so instead
|
||||
* just signal the semaphore we created. */
|
||||
|
||||
f->layout[i] = image_create_info.initialLayout;
|
||||
f->access[i] = 0x0;
|
||||
|
||||
@ -1818,6 +1807,19 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
|
||||
}
|
||||
}
|
||||
|
||||
ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn,
|
||||
hwctx->alloc, &f->sem);
|
||||
if (ret != VK_SUCCESS) {
|
||||
av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
|
||||
vk_ret2str(ret));
|
||||
return AVERROR_EXTERNAL;
|
||||
}
|
||||
|
||||
/* We'd import a semaphore onto the one we created using
|
||||
* vkImportSemaphoreFdKHR but unfortunately neither DRM nor VAAPI
|
||||
* offer us anything we could import and sync with, so instead
|
||||
* just signal the semaphore we created. */
|
||||
|
||||
/* Bind the allocated memory to the images */
|
||||
ret = vkBindImageMemory2(hwctx->act_dev, bind_counts, bind_info);
|
||||
if (ret != VK_SUCCESS) {
|
||||
@ -1838,12 +1840,11 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
for (int i = 0; i < desc->nb_layers; i++) {
|
||||
for (int i = 0; i < desc->nb_layers; i++)
|
||||
vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
|
||||
vkDestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
|
||||
}
|
||||
for (int i = 0; i < desc->nb_objects; i++)
|
||||
vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
|
||||
vkDestroySemaphore(hwctx->act_dev, f->sem, hwctx->alloc);
|
||||
|
||||
av_free(f);
|
||||
|
||||
@ -1953,6 +1954,15 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
|
||||
|
||||
dst_int = dst_f->internal;
|
||||
if (!dst_int || !dst_int->cuda_fc_ref) {
|
||||
VkSemaphoreGetFdInfoKHR sem_export = {
|
||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
|
||||
.semaphore = dst_f->sem,
|
||||
.handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
|
||||
};
|
||||
CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
|
||||
.type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD,
|
||||
};
|
||||
|
||||
if (!dst_f->internal)
|
||||
dst_f->internal = dst_int = av_mallocz(sizeof(*dst_f->internal));
|
||||
|
||||
@ -1991,14 +2001,6 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
|
||||
.memory = dst_f->mem[i],
|
||||
.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
|
||||
};
|
||||
VkSemaphoreGetFdInfoKHR sem_export = {
|
||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
|
||||
.semaphore = dst_f->sem[i],
|
||||
.handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
|
||||
};
|
||||
CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
|
||||
.type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD,
|
||||
};
|
||||
|
||||
ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info,
|
||||
&ext_desc.handle.fd);
|
||||
@ -2028,22 +2030,22 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
|
||||
err = AVERROR_EXTERNAL;
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
ret = pfn_vkGetSemaphoreFdKHR(hwctx->act_dev, &sem_export,
|
||||
&ext_sem_desc.handle.fd);
|
||||
if (ret != VK_SUCCESS) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n",
|
||||
vk_ret2str(ret));
|
||||
err = AVERROR_EXTERNAL;
|
||||
goto fail;
|
||||
}
|
||||
ret = pfn_vkGetSemaphoreFdKHR(hwctx->act_dev, &sem_export,
|
||||
&ext_sem_desc.handle.fd);
|
||||
if (ret != VK_SUCCESS) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n",
|
||||
vk_ret2str(ret));
|
||||
err = AVERROR_EXTERNAL;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
ret = CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem[i],
|
||||
&ext_sem_desc));
|
||||
if (ret < 0) {
|
||||
err = AVERROR_EXTERNAL;
|
||||
goto fail;
|
||||
}
|
||||
ret = CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem,
|
||||
&ext_sem_desc));
|
||||
if (ret < 0) {
|
||||
err = AVERROR_EXTERNAL;
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
@ -2069,8 +2071,8 @@ static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc,
|
||||
AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
|
||||
AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
|
||||
CudaFunctions *cu = cu_internal->cuda_dl;
|
||||
CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 };
|
||||
CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 };
|
||||
CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par = { 0 };
|
||||
CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par = { 0 };
|
||||
|
||||
ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
|
||||
if (ret < 0) {
|
||||
@ -2086,8 +2088,8 @@ static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc,
|
||||
}
|
||||
dst_int = dst_f->internal;
|
||||
|
||||
ret = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
|
||||
planes, cuda_dev->stream));
|
||||
ret = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(&dst_int->cu_sem, &s_w_par,
|
||||
1, cuda_dev->stream));
|
||||
if (ret < 0) {
|
||||
err = AVERROR_EXTERNAL;
|
||||
goto fail;
|
||||
@ -2115,8 +2117,8 @@ static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc,
|
||||
}
|
||||
}
|
||||
|
||||
ret = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
|
||||
planes, cuda_dev->stream));
|
||||
ret = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(&dst_int->cu_sem, &s_s_par,
|
||||
1, cuda_dev->stream));
|
||||
if (ret < 0) {
|
||||
err = AVERROR_EXTERNAL;
|
||||
goto fail;
|
||||
@ -2492,11 +2494,11 @@ static int transfer_image_buf(AVHWDeviceContext *ctx, AVVkFrame *frame,
|
||||
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
||||
.commandBufferCount = 1,
|
||||
.pCommandBuffers = &s->cmd.buf,
|
||||
.pSignalSemaphores = frame->sem,
|
||||
.pWaitSemaphores = frame->sem,
|
||||
.pSignalSemaphores = &frame->sem,
|
||||
.pWaitSemaphores = &frame->sem,
|
||||
.pWaitDstStageMask = sem_wait_dst,
|
||||
.signalSemaphoreCount = planes,
|
||||
.waitSemaphoreCount = planes,
|
||||
.signalSemaphoreCount = 1,
|
||||
.waitSemaphoreCount = 1,
|
||||
};
|
||||
|
||||
ret = vkBeginCommandBuffer(s->cmd.buf, &cmd_start);
|
||||
|
@ -133,10 +133,10 @@ typedef struct AVVkFrame {
|
||||
VkImageLayout layout[AV_NUM_DATA_POINTERS];
|
||||
|
||||
/**
|
||||
* Per-image semaphores. Must not be freed manually. Must be waited on
|
||||
* Per-frame semaphore. Must not be freed manually. Must be waited on
|
||||
* and signalled at every queue submission.
|
||||
*/
|
||||
VkSemaphore sem[AV_NUM_DATA_POINTERS];
|
||||
VkSemaphore sem;
|
||||
|
||||
/**
|
||||
* Internal data.
|
||||
|
Loading…
Reference in New Issue
Block a user