mirror of https://github.com/mpv-player/mpv
vo_gpu: vulkan: Add support for exporting buffer memory
The CUDA/Vulkan interop works on the basis of memory being exported from Vulkan and then imported by CUDA. To enable this, we add a way to declare a buffer as being intended for export, and then add a function to do the export. For now, we support the fd and Handle based exports on Linux and Windows respectively. There are others, which we can support when a need arises. Also note that this is just for exporting buffers, rather than textures (VkImages). Image import on the CUDA side is supposed to work, but it is currently buggy and waiting for a new driver release. Finally, at least with my nvidia hardware and drivers, everything seems to work even if we don't initialise the buffer with the right exportability options. Nevertheless I'm enforcing it so that we're following the spec.
This commit is contained in:
parent
6fbd933108
commit
93f800a00f
|
@ -188,6 +188,7 @@ enum ra_buf_type {
|
|||
RA_BUF_TYPE_SHADER_STORAGE, // shader buffer (SSBO), for RA_VARTYPE_BUF_RW
|
||||
RA_BUF_TYPE_UNIFORM, // uniform buffer (UBO), for RA_VARTYPE_BUF_RO
|
||||
RA_BUF_TYPE_VERTEX, // not publicly usable (RA-internal usage)
|
||||
RA_BUF_TYPE_SHARED_MEMORY, // device memory for sharing with external API
|
||||
};
|
||||
|
||||
struct ra_buf_params {
|
||||
|
|
|
@ -73,4 +73,8 @@ struct mpvk_ctx {
|
|||
// Cached capabilities
|
||||
VkPhysicalDeviceLimits limits;
|
||||
VkPhysicalDeviceFeatures features;
|
||||
|
||||
// Extension availability
|
||||
bool has_ext_external_memory;
|
||||
bool has_ext_external_memory_export;
|
||||
};
|
||||
|
|
|
@ -2,6 +2,10 @@
|
|||
#include "utils.h"
|
||||
#include "osdep/timer.h"
|
||||
|
||||
#if HAVE_WIN32_DESKTOP
|
||||
#include <versionhelpers.h>
|
||||
#endif
|
||||
|
||||
// Controls the multiplication factor for new slab allocations. The new slab
|
||||
// will always be allocated such that the size of the slab is this factor times
|
||||
// the previous slab. Higher values make it grow faster.
|
||||
|
@ -57,6 +61,7 @@ struct vk_heap {
|
|||
VkBufferUsageFlags usage; // the buffer usage type (or 0)
|
||||
VkMemoryPropertyFlags flags; // the memory type flags (or 0)
|
||||
uint32_t typeBits; // the memory type index requirements (or 0)
|
||||
bool exportable; // whether memory is exportable to other APIs
|
||||
struct vk_slab **slabs; // array of slabs sorted by size
|
||||
int num_slabs;
|
||||
};
|
||||
|
@ -126,8 +131,20 @@ static struct vk_slab *slab_alloc(struct mpvk_ctx *vk, struct vk_heap *heap,
|
|||
.end = slab->size,
|
||||
});
|
||||
|
||||
VkExportMemoryAllocateInfoKHR eminfo = {
|
||||
.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR,
|
||||
#if HAVE_WIN32_DESKTOP
|
||||
.handleTypes = IsWindows8OrGreater()
|
||||
? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR
|
||||
: VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
|
||||
#else
|
||||
.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
|
||||
#endif
|
||||
};
|
||||
|
||||
VkMemoryAllocateInfo minfo = {
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.pNext = heap->exportable ? &eminfo : NULL,
|
||||
.allocationSize = slab->size,
|
||||
};
|
||||
|
||||
|
@ -141,8 +158,14 @@ static struct vk_slab *slab_alloc(struct mpvk_ctx *vk, struct vk_heap *heap,
|
|||
for (int i = 0; i < vk->num_pools; i++)
|
||||
qfs[i] = vk->pools[i]->qf;
|
||||
|
||||
VkExternalMemoryBufferCreateInfo ebinfo = {
|
||||
.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
|
||||
.handleTypes = eminfo.handleTypes,
|
||||
};
|
||||
|
||||
VkBufferCreateInfo binfo = {
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = heap->exportable ? &ebinfo : NULL,
|
||||
.size = slab->size,
|
||||
.usage = heap->usage,
|
||||
.sharingMode = vk->num_pools > 1 ? VK_SHARING_MODE_CONCURRENT
|
||||
|
@ -292,7 +315,8 @@ void vk_free_memslice(struct mpvk_ctx *vk, struct vk_memslice slice)
|
|||
// reqs: can be NULL
|
||||
static struct vk_heap *find_heap(struct mpvk_ctx *vk, VkBufferUsageFlags usage,
|
||||
VkMemoryPropertyFlags flags,
|
||||
VkMemoryRequirements *reqs)
|
||||
VkMemoryRequirements *reqs,
|
||||
bool exportable)
|
||||
{
|
||||
struct vk_malloc *ma = vk->alloc;
|
||||
int typeBits = reqs ? reqs->memoryTypeBits : 0;
|
||||
|
@ -304,6 +328,8 @@ static struct vk_heap *find_heap(struct mpvk_ctx *vk, VkBufferUsageFlags usage,
|
|||
continue;
|
||||
if (ma->heaps[i].typeBits != typeBits)
|
||||
continue;
|
||||
if (ma->heaps[i].exportable != exportable)
|
||||
continue;
|
||||
return &ma->heaps[i];
|
||||
}
|
||||
|
||||
|
@ -314,6 +340,7 @@ static struct vk_heap *find_heap(struct mpvk_ctx *vk, VkBufferUsageFlags usage,
|
|||
.usage = usage,
|
||||
.flags = flags,
|
||||
.typeBits = typeBits,
|
||||
.exportable = exportable,
|
||||
};
|
||||
return heap;
|
||||
}
|
||||
|
@ -396,6 +423,7 @@ static bool slice_heap(struct mpvk_ctx *vk, struct vk_heap *heap, size_t size,
|
|||
.vkmem = slab->mem,
|
||||
.offset = MP_ALIGN_UP(reg.start, alignment),
|
||||
.size = size,
|
||||
.slab_size = slab->size,
|
||||
.priv = slab,
|
||||
};
|
||||
|
||||
|
@ -413,15 +441,24 @@ static bool slice_heap(struct mpvk_ctx *vk, struct vk_heap *heap, size_t size,
|
|||
bool vk_malloc_generic(struct mpvk_ctx *vk, VkMemoryRequirements reqs,
|
||||
VkMemoryPropertyFlags flags, struct vk_memslice *out)
|
||||
{
|
||||
struct vk_heap *heap = find_heap(vk, 0, flags, &reqs);
|
||||
struct vk_heap *heap = find_heap(vk, 0, flags, &reqs, false);
|
||||
return slice_heap(vk, heap, reqs.size, reqs.alignment, out);
|
||||
}
|
||||
|
||||
bool vk_malloc_buffer(struct mpvk_ctx *vk, VkBufferUsageFlags bufFlags,
|
||||
VkMemoryPropertyFlags memFlags, VkDeviceSize size,
|
||||
VkDeviceSize alignment, struct vk_bufslice *out)
|
||||
VkDeviceSize alignment, bool exportable,
|
||||
struct vk_bufslice *out)
|
||||
{
|
||||
struct vk_heap *heap = find_heap(vk, bufFlags, memFlags, NULL);
|
||||
if (exportable) {
|
||||
if (!vk->has_ext_external_memory_export) {
|
||||
MP_ERR(vk, "Exportable memory requires the %s extension\n",
|
||||
MP_VK_EXTERNAL_MEMORY_EXPORT_EXTENSION_NAME);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
struct vk_heap *heap = find_heap(vk, bufFlags, memFlags, NULL, exportable);
|
||||
if (!slice_heap(vk, heap, size, alignment, &out->mem))
|
||||
return false;
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@ struct vk_memslice {
|
|||
VkDeviceMemory vkmem;
|
||||
size_t offset;
|
||||
size_t size;
|
||||
size_t slab_size;
|
||||
void *priv;
|
||||
};
|
||||
|
||||
|
@ -32,4 +33,5 @@ struct vk_bufslice {
|
|||
// creating/destroying lots of (little) VkBuffers.
|
||||
bool vk_malloc_buffer(struct mpvk_ctx *vk, VkBufferUsageFlags bufFlags,
|
||||
VkMemoryPropertyFlags memFlags, VkDeviceSize size,
|
||||
VkDeviceSize alignment, struct vk_bufslice *out);
|
||||
VkDeviceSize alignment, bool exportable,
|
||||
struct vk_bufslice *out);
|
||||
|
|
|
@ -4,6 +4,10 @@
|
|||
#include "ra_vk.h"
|
||||
#include "malloc.h"
|
||||
|
||||
#if HAVE_WIN32_DESKTOP
|
||||
#include <versionhelpers.h>
|
||||
#endif
|
||||
|
||||
static struct ra_fns ra_fns_vk;
|
||||
|
||||
enum queue_type {
|
||||
|
@ -787,6 +791,7 @@ static struct ra_buf *vk_buf_create(struct ra *ra,
|
|||
VkBufferUsageFlags bufFlags = 0;
|
||||
VkMemoryPropertyFlags memFlags = 0;
|
||||
VkDeviceSize align = 4; // alignment 4 is needed for buf_update
|
||||
bool exportable = false;
|
||||
|
||||
switch (params->type) {
|
||||
case RA_BUF_TYPE_TEX_UPLOAD:
|
||||
|
@ -811,6 +816,11 @@ static struct ra_buf *vk_buf_create(struct ra *ra,
|
|||
bufFlags |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
|
||||
memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
|
||||
break;
|
||||
case RA_BUF_TYPE_SHARED_MEMORY:
|
||||
bufFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
|
||||
memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
|
||||
exportable = true;
|
||||
break;
|
||||
default: abort();
|
||||
}
|
||||
|
||||
|
@ -826,7 +836,7 @@ static struct ra_buf *vk_buf_create(struct ra *ra,
|
|||
}
|
||||
|
||||
if (!vk_malloc_buffer(vk, bufFlags, memFlags, params->size, align,
|
||||
&buf_vk->slice))
|
||||
exportable, &buf_vk->slice))
|
||||
{
|
||||
goto error;
|
||||
}
|
||||
|
@ -916,6 +926,64 @@ error:
|
|||
return false;
|
||||
}
|
||||
|
||||
static bool ra_vk_mem_get_external_info(struct ra *ra, struct vk_memslice *mem, struct vk_external_mem *ret)
|
||||
{
|
||||
struct mpvk_ctx *vk = ra_vk_get(ra);
|
||||
|
||||
#if HAVE_WIN32_DESKTOP
|
||||
HANDLE mem_handle;
|
||||
|
||||
VkMemoryGetWin32HandleInfoKHR info = {
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR,
|
||||
.pNext = NULL,
|
||||
.memory = mem->vkmem,
|
||||
.handleType = IsWindows8OrGreater()
|
||||
? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR
|
||||
: VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
|
||||
};
|
||||
|
||||
VK_LOAD_PFN(vkGetMemoryWin32HandleKHR);
|
||||
VK(pfn_vkGetMemoryWin32HandleKHR(vk->dev, &info, &mem_handle));
|
||||
|
||||
ret->mem_handle = mem_handle;
|
||||
#else
|
||||
int mem_fd;
|
||||
|
||||
VkMemoryGetFdInfoKHR info = {
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
|
||||
.pNext = NULL,
|
||||
.memory = mem->vkmem,
|
||||
.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
|
||||
};
|
||||
|
||||
VK_LOAD_PFN(vkGetMemoryFdKHR);
|
||||
VK(pfn_vkGetMemoryFdKHR(vk->dev, &info, &mem_fd));
|
||||
|
||||
ret->mem_fd = mem_fd;
|
||||
#endif
|
||||
ret->size = mem->size;
|
||||
ret->offset = mem->offset;
|
||||
ret->mem_size = mem->slab_size;
|
||||
|
||||
return true;
|
||||
|
||||
error:
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ra_vk_buf_get_external_info(struct ra *ra, struct ra_buf *buf, struct vk_external_mem *ret)
|
||||
{
|
||||
if (buf->params.type != RA_BUF_TYPE_SHARED_MEMORY) {
|
||||
MP_ERR(ra, "Buffer must be of TYPE_SHARED_MEMORY to be able to export it...");
|
||||
return false;
|
||||
}
|
||||
|
||||
struct ra_buf_vk *buf_vk = buf->priv;
|
||||
struct vk_memslice *mem = &buf_vk->slice.mem;
|
||||
|
||||
return ra_vk_mem_get_external_info(ra, mem, ret);
|
||||
}
|
||||
|
||||
#define MPVK_NUM_DS MPVK_MAX_STREAMING_DEPTH
|
||||
|
||||
// For ra_renderpass.priv
|
||||
|
|
|
@ -29,3 +29,17 @@ struct vk_cmd *ra_vk_submit(struct ra *ra, struct ra_tex *tex);
|
|||
// May be called on a struct ra of any type. Returns NULL if the ra is not
|
||||
// a vulkan ra.
|
||||
struct mpvk_ctx *ra_vk_get(struct ra *ra);
|
||||
|
||||
struct vk_external_mem {
|
||||
#if HAVE_WIN32_DESKTOP
|
||||
HANDLE mem_handle;
|
||||
#else
|
||||
int mem_fd;
|
||||
#endif
|
||||
size_t mem_size;
|
||||
size_t size;
|
||||
size_t offset;
|
||||
};
|
||||
|
||||
// Export an ra_buf for importing by another api.
|
||||
bool ra_vk_buf_get_external_info(struct ra *ra, struct ra_buf *buf, struct vk_external_mem *ret);
|
||||
|
|
|
@ -438,6 +438,38 @@ static void add_qinfo(void *tactx, VkDeviceQueueCreateInfo **qinfos,
|
|||
MP_TARRAY_APPEND(tactx, *qinfos, *num_qinfos, qinfo);
|
||||
}
|
||||
|
||||
static bool detect_device_extensions(struct mpvk_ctx *vk)
|
||||
{
|
||||
bool ret = false;
|
||||
VkExtensionProperties *props = NULL;
|
||||
|
||||
uint32_t num_exts;
|
||||
VK(vkEnumerateDeviceExtensionProperties(vk->physd, NULL,
|
||||
&num_exts, NULL));
|
||||
|
||||
props = talloc_array(NULL, VkExtensionProperties, num_exts);
|
||||
VK(vkEnumerateDeviceExtensionProperties(vk->physd,
|
||||
NULL, &num_exts, props));
|
||||
|
||||
for (uint32_t i = 0; i < num_exts; i++) {
|
||||
if (!strcmp(VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME,
|
||||
props[i].extensionName)) {
|
||||
vk->has_ext_external_memory = true;
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(MP_VK_EXTERNAL_MEMORY_EXPORT_EXTENSION_NAME,
|
||||
props[i].extensionName)) {
|
||||
vk->has_ext_external_memory_export = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
ret = true;
|
||||
error:
|
||||
talloc_free(props);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts)
|
||||
{
|
||||
assert(vk->physd);
|
||||
|
@ -493,9 +525,18 @@ bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts)
|
|||
add_qinfo(tmp, &qinfos, &num_qinfos, qfs, idx_comp, opts.queue_count);
|
||||
add_qinfo(tmp, &qinfos, &num_qinfos, qfs, idx_tf, opts.queue_count);
|
||||
|
||||
if (!detect_device_extensions(vk)) {
|
||||
MP_WARN(vk, "Failed to enumerate device extensions. "
|
||||
"Some features may be disabled.\n");
|
||||
}
|
||||
|
||||
const char **exts = NULL;
|
||||
int num_exts = 0;
|
||||
MP_TARRAY_APPEND(tmp, exts, num_exts, VK_KHR_SWAPCHAIN_EXTENSION_NAME);
|
||||
if (vk->has_ext_external_memory)
|
||||
MP_TARRAY_APPEND(tmp, exts, num_exts, VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME);
|
||||
if (vk->has_ext_external_memory_export)
|
||||
MP_TARRAY_APPEND(tmp, exts, num_exts, MP_VK_EXTERNAL_MEMORY_EXPORT_EXTENSION_NAME);
|
||||
if (vk->spirv->required_ext)
|
||||
MP_TARRAY_APPEND(tmp, exts, num_exts, vk->spirv->required_ext);
|
||||
|
||||
|
|
|
@ -10,6 +10,12 @@
|
|||
#define VK_LOAD_PFN(name) PFN_##name pfn_##name = (PFN_##name) \
|
||||
vkGetInstanceProcAddr(vk->inst, #name);
|
||||
|
||||
#if HAVE_WIN32_DESKTOP
|
||||
#define MP_VK_EXTERNAL_MEMORY_EXPORT_EXTENSION_NAME VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME
|
||||
#else
|
||||
#define MP_VK_EXTERNAL_MEMORY_EXPORT_EXTENSION_NAME VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME
|
||||
#endif
|
||||
|
||||
// Return a human-readable name for various struct mpvk_ctx enums
|
||||
const char* vk_err(VkResult res);
|
||||
|
||||
|
|
Loading…
Reference in New Issue