hwcontext_vulkan: support threadsafe queue and frame operations

This commit is contained in:
Lynne 2022-03-15 23:00:32 +01:00
parent 2a1fd2814f
commit 46a77c6496
No known key found for this signature in database
GPG Key ID: A2FEA5F03F034464
2 changed files with 180 additions and 49 deletions

View File

@ -27,6 +27,7 @@
#include <dlfcn.h>
#endif
#include "thread.h"
#include <unistd.h>
#include "config.h"
@ -92,8 +93,10 @@ typedef struct VulkanDevicePriv {
VkPhysicalDeviceVulkan13Features device_features_1_3;
/* Queues */
uint32_t qfs[5];
int num_qfs;
pthread_mutex_t **qf_mutex;
uint32_t nb_tot_qfs;
uint32_t img_qfs[5];
uint32_t nb_img_qfs;
/* Debug callback */
VkDebugUtilsMessengerEXT debug_ctx;
@ -127,6 +130,8 @@ typedef struct VulkanFramesPriv {
} VulkanFramesPriv;
typedef struct AVVkFrameInternal {
pthread_mutex_t update_mutex;
#if CONFIG_CUDA
/* Importing external memory into cuda is really expensive so we keep the
* memory imported all the time */
@ -1305,6 +1310,12 @@ static void vulkan_device_free(AVHWDeviceContext *ctx)
if (p->libvulkan)
dlclose(p->libvulkan);
for (uint32_t i = 0; i < p->nb_tot_qfs; i++) {
pthread_mutex_destroy(p->qf_mutex[i]);
av_freep(&p->qf_mutex[i]);
}
av_freep(&p->qf_mutex);
RELEASE_PROPS(hwctx->enabled_inst_extensions, hwctx->nb_enabled_inst_extensions);
RELEASE_PROPS(hwctx->enabled_dev_extensions, hwctx->nb_enabled_dev_extensions);
}
@ -1437,13 +1448,26 @@ end:
return err;
}
static void lock_queue(AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index)
{
VulkanDevicePriv *p = ctx->internal->priv;
pthread_mutex_lock(&p->qf_mutex[queue_family][index]);
}
static void unlock_queue(AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index)
{
VulkanDevicePriv *p = ctx->internal->priv;
pthread_mutex_unlock(&p->qf_mutex[queue_family][index]);
}
static int vulkan_device_init(AVHWDeviceContext *ctx)
{
int err;
uint32_t queue_num;
uint32_t qf_num;
AVVulkanDeviceContext *hwctx = ctx->hwctx;
VulkanDevicePriv *p = ctx->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
VkQueueFamilyProperties *qf;
int graph_index, comp_index, tx_index, enc_index, dec_index;
/* Set device extension flags */
@ -1482,12 +1506,37 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
p->dev_is_nvidia = (p->props.properties.vendorID == 0x10de);
p->dev_is_intel = (p->props.properties.vendorID == 0x8086);
vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL);
if (!queue_num) {
vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, NULL);
if (!qf_num) {
av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
return AVERROR_EXTERNAL;
}
qf = av_malloc_array(qf_num, sizeof(VkQueueFamilyProperties));
if (!qf)
return AVERROR(ENOMEM);
vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, qf);
p->qf_mutex = av_calloc(qf_num, sizeof(*p->qf_mutex));
if (!p->qf_mutex)
return AVERROR(ENOMEM);
p->nb_tot_qfs = qf_num;
for (uint32_t i = 0; i < qf_num; i++) {
p->qf_mutex[i] = av_calloc(qf[i].queueCount, sizeof(**p->qf_mutex));
if (!p->qf_mutex[i])
return AVERROR(ENOMEM);
for (uint32_t j = 0; j < qf[i].queueCount; j++) {
err = pthread_mutex_init(&p->qf_mutex[i][j], NULL);
if (err != 0) {
av_log(ctx, AV_LOG_ERROR, "pthread_mutex_init failed : %s\n",
av_err2str(err));
return AVERROR(err);
}
}
}
graph_index = hwctx->queue_family_index;
comp_index = hwctx->queue_family_comp_index;
tx_index = hwctx->queue_family_tx_index;
@ -1502,9 +1551,9 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
return AVERROR(EINVAL); \
} else if (fidx < 0 || ctx_qf < 0) { \
break; \
} else if (ctx_qf >= queue_num) { \
} else if (ctx_qf >= qf_num) { \
av_log(ctx, AV_LOG_ERROR, "Invalid %s family index %i (device has %i families)!\n", \
type, ctx_qf, queue_num); \
type, ctx_qf, qf_num); \
return AVERROR(EINVAL); \
} \
\
@ -1521,7 +1570,7 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
tx_index = (ctx_qf == tx_index) ? -1 : tx_index; \
enc_index = (ctx_qf == enc_index) ? -1 : enc_index; \
dec_index = (ctx_qf == dec_index) ? -1 : dec_index; \
p->qfs[p->num_qfs++] = ctx_qf; \
p->img_qfs[p->nb_img_qfs++] = ctx_qf; \
} while (0)
CHECK_QUEUE("graphics", 0, graph_index, hwctx->queue_family_index, hwctx->nb_graphics_queues);
@ -1532,6 +1581,11 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
#undef CHECK_QUEUE
if (!hwctx->lock_queue)
hwctx->lock_queue = lock_queue;
if (!hwctx->unlock_queue)
hwctx->unlock_queue = unlock_queue;
/* Get device capabilities */
vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
@ -1733,9 +1787,6 @@ static void vulkan_free_internal(AVVkFrame *f)
{
AVVkFrameInternal *internal = f->internal;
if (!internal)
return;
#if CONFIG_CUDA
if (internal->cuda_fc_ref) {
AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data;
@ -1764,6 +1815,7 @@ static void vulkan_free_internal(AVVkFrame *f)
}
#endif
pthread_mutex_destroy(&internal->update_mutex);
av_freep(&f->internal);
}
@ -1924,9 +1976,11 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
uint32_t src_qf, dst_qf;
VkImageLayout new_layout;
VkAccessFlags new_access;
AVVulkanFramesContext *vkfc = hwfc->hwctx;
const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
AVFrame tmp = { .data[0] = (uint8_t *)frame };
uint64_t sem_sig_val[AV_NUM_DATA_POINTERS];
VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
@ -1945,6 +1999,12 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
};
VkPipelineStageFlagBits wait_st[AV_NUM_DATA_POINTERS];
if ((err = wait_start_exec_ctx(hwfc, ectx)))
return err;
vkfc->lock_frame(hwfc, frame);
for (int i = 0; i < planes; i++) {
wait_st[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
sem_sig_val[i] = frame->sem_value[i] + 1;
@ -1981,9 +2041,6 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
break;
}
if ((err = wait_start_exec_ctx(hwfc, ectx)))
return err;
/* Change the image layout to something more optimal for writes.
* This also signals the newly created semaphore, making it usable
* for synchronization */
@ -2009,7 +2066,10 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
VK_PIPELINE_STAGE_TRANSFER_BIT,
0, 0, NULL, 0, NULL, planes, img_bar);
return submit_exec_ctx(hwfc, ectx, &s_info, frame, 0);
err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 0);
vkfc->unlock_frame(hwfc, frame);
return err;
}
static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format,
@ -2091,10 +2151,10 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.usage = usage,
.samples = VK_SAMPLE_COUNT_1_BIT,
.pQueueFamilyIndices = p->qfs,
.queueFamilyIndexCount = p->num_qfs,
.sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
VK_SHARING_MODE_EXCLUSIVE,
.pQueueFamilyIndices = p->img_qfs,
.queueFamilyIndexCount = p->nb_img_qfs,
.sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
VK_SHARING_MODE_EXCLUSIVE,
};
get_plane_wh(&create_info.extent.width, &create_info.extent.height,
@ -2118,6 +2178,7 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
return AVERROR_EXTERNAL;
}
f->queue_family[i] = p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0];
f->layout[i] = create_info.initialLayout;
f->access[i] = 0x0;
f->sem_value[i] = 0;
@ -2162,10 +2223,10 @@ static void try_export_flags(AVHWFramesContext *hwfc,
VkPhysicalDeviceImageDrmFormatModifierInfoEXT phy_dev_mod_info = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT,
.pNext = NULL,
.pQueueFamilyIndices = p->qfs,
.queueFamilyIndexCount = p->num_qfs,
.sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
VK_SHARING_MODE_EXCLUSIVE,
.pQueueFamilyIndices = p->img_qfs,
.queueFamilyIndexCount = p->nb_img_qfs,
.sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
VK_SHARING_MODE_EXCLUSIVE,
};
VkPhysicalDeviceExternalImageFormatInfo enext = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
@ -2260,6 +2321,16 @@ fail:
return NULL;
}
static void lock_frame(AVHWFramesContext *fc, AVVkFrame *vkf)
{
pthread_mutex_lock(&vkf->internal->update_mutex);
}
static void unlock_frame(AVHWFramesContext *fc, AVVkFrame *vkf)
{
pthread_mutex_unlock(&vkf->internal->update_mutex);
}
static void vulkan_frames_uninit(AVHWFramesContext *hwfc)
{
VulkanFramesPriv *fp = hwfc->internal->priv;
@ -2422,6 +2493,11 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc)
return AVERROR(ENOMEM);
}
if (!hwctx->lock_frame)
hwctx->lock_frame = lock_frame;
if (!hwctx->unlock_frame)
hwctx->unlock_frame = unlock_frame;
return 0;
}
@ -2728,10 +2804,10 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
.usage = VK_IMAGE_USAGE_SAMPLED_BIT |
VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
.samples = VK_SAMPLE_COUNT_1_BIT,
.pQueueFamilyIndices = p->qfs,
.queueFamilyIndexCount = p->num_qfs,
.sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
VK_SHARING_MODE_EXCLUSIVE,
.pQueueFamilyIndices = p->img_qfs,
.queueFamilyIndexCount = p->nb_img_qfs,
.sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
VK_SHARING_MODE_EXCLUSIVE,
};
/* Image format verification */
@ -2810,6 +2886,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
* offer us anything we could import and sync with, so instead
* just signal the semaphore we created. */
f->queue_family[i] = p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0];
f->layout[i] = create_info.initialLayout;
f->access[i] = 0x0;
f->sem_value[i] = 0;
@ -3018,20 +3095,12 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
CU_AD_FORMAT_UNSIGNED_INT8;
dst_f = (AVVkFrame *)frame->data[0];
dst_int = dst_f->internal;
if (!dst_int || !dst_int->cuda_fc_ref) {
if (!dst_f->internal)
dst_f->internal = dst_int = av_mallocz(sizeof(*dst_f->internal));
if (!dst_int)
return AVERROR(ENOMEM);
if (!dst_int->cuda_fc_ref) {
dst_int->cuda_fc_ref = av_buffer_ref(cuda_hwfc);
if (!dst_int->cuda_fc_ref) {
av_freep(&dst_f->internal);
if (!dst_int->cuda_fc_ref)
return AVERROR(ENOMEM);
}
for (int i = 0; i < planes; i++) {
CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = {
@ -3705,13 +3774,14 @@ static int unmap_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs,
return err;
}
static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f,
static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
AVBufferRef **bufs, size_t *buf_offsets,
const int *buf_stride, int w,
int h, enum AVPixelFormat pix_fmt, int to_buf)
{
int err;
AVVkFrame *frame = (AVVkFrame *)f->data[0];
AVVulkanFramesContext *vkfc = hwfc->hwctx;
VulkanFramesPriv *fp = hwfc->internal->priv;
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
@ -3746,11 +3816,13 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f,
.waitSemaphoreCount = planes,
};
for (int i = 0; i < planes; i++)
sem_signal_values[i] = frame->sem_value[i] + 1;
vkfc->lock_frame(hwfc, frame);
if ((err = wait_start_exec_ctx(hwfc, ectx)))
return err;
goto end;
for (int i = 0; i < planes; i++)
sem_signal_values[i] = frame->sem_value[i] + 1;
/* Change the image layout to something more optimal for transfers */
for (int i = 0; i < planes; i++) {
@ -3825,14 +3897,18 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f,
if (!f->buf[ref])
break;
if ((err = add_buf_dep_exec_ctx(hwfc, ectx, &f->buf[ref], 1)))
return err;
goto end;
}
if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, planes)))
return err;
return submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref);
goto end;
err = submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref);
} else {
return submit_exec_ctx(hwfc, ectx, &s_info, frame, 1);
err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 1);
}
end:
vkfc->unlock_frame(hwfc, frame);
return err;
}
static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
@ -3961,8 +4037,9 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
}
/* Copy buffers into/from image */
err = transfer_image_buf(hwfc, vkf, bufs, buf_offsets, tmp.linesize,
swf->width, swf->height, swf->format, from);
err = transfer_image_buf(hwfc, (AVFrame *)vkf, bufs, buf_offsets,
tmp.linesize, swf->width, swf->height, swf->format,
from);
if (from) {
/* Map, copy buffer (which came FROM the VkImage) to the frame, unmap */
@ -4143,7 +4220,25 @@ static int vulkan_frames_derive_to(AVHWFramesContext *dst_fc,
AVVkFrame *av_vk_frame_alloc(void)
{
return av_mallocz(sizeof(AVVkFrame));
int err;
AVVkFrame *f = av_mallocz(sizeof(AVVkFrame));
if (!f)
return NULL;
f->internal = av_mallocz(sizeof(*f->internal));
if (!f->internal) {
av_free(f);
return NULL;
}
err = pthread_mutex_init(&f->internal->update_mutex, NULL);
if (err != 0) {
av_free(f->internal);
av_free(f);
return NULL;
}
return f;
}
const HWContextType ff_hwcontext_type_vulkan = {

View File

@ -27,6 +27,8 @@
#include "pixfmt.h"
#include "frame.h"
typedef struct AVVkFrame AVVkFrame;
/**
* @file
* API-specific header for AV_HWDEVICE_TYPE_VULKAN.
@ -135,6 +137,19 @@ typedef struct AVVulkanDeviceContext {
*/
int queue_family_decode_index;
int nb_decode_queues;
/**
* Locks a queue, preventing other threads from submitting any command
* buffers to this queue.
* If set to NULL, will be set to lavu-internal functions that utilize a
* mutex.
*/
void (*lock_queue)(struct AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index);
/**
* Similar to lock_queue(), unlocks a queue. Must only be called after locking.
*/
void (*unlock_queue)(struct AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index);
} AVVulkanDeviceContext;
/**
@ -195,6 +210,21 @@ typedef struct AVVulkanFramesContext {
* av_hwframe_ctx_init().
*/
AVVkFrameFlags flags;
/**
* Locks a frame, preventing other threads from changing frame properties.
* Users SHOULD only ever lock just before command submission in order
* to get accurate frame properties, and unlock immediately after command
* submission without waiting for it to finish.
*
* If unset, will be set to lavu-internal functions that utilize a mutex.
*/
void (*lock_frame)(struct AVHWFramesContext *fc, AVVkFrame *vkf);
/**
* Similar to lock_frame(), unlocks a frame. Must only be called after locking.
*/
void (*unlock_frame)(struct AVHWFramesContext *fc, AVVkFrame *vkf);
} AVVulkanFramesContext;
/*
@ -210,7 +240,7 @@ typedef struct AVVulkanFramesContext {
* @note the size of this structure is not part of the ABI, to allocate
* you must use @av_vk_frame_alloc().
*/
typedef struct AVVkFrame {
struct AVVkFrame {
/**
* Vulkan images to which the memory is bound to.
*/
@ -264,6 +294,12 @@ typedef struct AVVkFrame {
* Describes the binding offset of each plane to the VkDeviceMemory.
*/
ptrdiff_t offset[AV_NUM_DATA_POINTERS];
/**
* Queue family of the images. Must be VK_QUEUE_FAMILY_IGNORED if
* the image was allocated with the CONCURRENT concurrency option.
*/
uint32_t queue_family[AV_NUM_DATA_POINTERS];
} AVVkFrame;
/**