vo_gpu: vulkan: Add support for exporting buffer memory

The CUDA/Vulkan interop works on the basis of memory being exported
from Vulkan and then imported by CUDA. To enable this, we add a way
to declare a buffer as being intended for export, and then add a
function to do the export.

For now, we support the fd and Handle based exports on Linux and
Windows respectively. There are others, which we can support when
a need arises.

Also note that this is just for exporting buffers, rather than
textures (VkImages). Image import on the CUDA side is supposed to
work, but it is currently buggy and waiting for a new driver release.

Finally, at least with my nvidia hardware and drivers, everything
seems to work even if we don't initialise the buffer with the right
exportability options. Nevertheless I'm enforcing it so that we're
following the spec.
This commit is contained in:
Philip Langdale 2018-09-29 17:56:07 -07:00 committed by sfan5
parent 6fbd933108
commit 93f800a00f
8 changed files with 179 additions and 6 deletions

View File

@ -188,6 +188,7 @@ enum ra_buf_type {
RA_BUF_TYPE_SHADER_STORAGE, // shader buffer (SSBO), for RA_VARTYPE_BUF_RW
RA_BUF_TYPE_UNIFORM, // uniform buffer (UBO), for RA_VARTYPE_BUF_RO
RA_BUF_TYPE_VERTEX, // not publicly usable (RA-internal usage)
RA_BUF_TYPE_SHARED_MEMORY, // device memory for sharing with external API
};
struct ra_buf_params {

View File

@ -73,4 +73,8 @@ struct mpvk_ctx {
// Cached capabilities
VkPhysicalDeviceLimits limits;
VkPhysicalDeviceFeatures features;
// Extension availability
bool has_ext_external_memory;
bool has_ext_external_memory_export;
};

View File

@ -2,6 +2,10 @@
#include "utils.h"
#include "osdep/timer.h"
#if HAVE_WIN32_DESKTOP
#include <versionhelpers.h>
#endif
// Controls the multiplication factor for new slab allocations. The new slab
// will always be allocated such that the size of the slab is this factor times
// the previous slab. Higher values make it grow faster.
@ -57,6 +61,7 @@ struct vk_heap {
VkBufferUsageFlags usage; // the buffer usage type (or 0)
VkMemoryPropertyFlags flags; // the memory type flags (or 0)
uint32_t typeBits; // the memory type index requirements (or 0)
bool exportable; // whether memory is exportable to other APIs
struct vk_slab **slabs; // array of slabs sorted by size
int num_slabs;
};
@ -126,8 +131,20 @@ static struct vk_slab *slab_alloc(struct mpvk_ctx *vk, struct vk_heap *heap,
.end = slab->size,
});
VkExportMemoryAllocateInfoKHR eminfo = {
.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR,
#if HAVE_WIN32_DESKTOP
.handleTypes = IsWindows8OrGreater()
? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR
: VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
#else
.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
#endif
};
VkMemoryAllocateInfo minfo = {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = heap->exportable ? &eminfo : NULL,
.allocationSize = slab->size,
};
@ -141,8 +158,14 @@ static struct vk_slab *slab_alloc(struct mpvk_ctx *vk, struct vk_heap *heap,
for (int i = 0; i < vk->num_pools; i++)
qfs[i] = vk->pools[i]->qf;
VkExternalMemoryBufferCreateInfo ebinfo = {
.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
.handleTypes = eminfo.handleTypes,
};
VkBufferCreateInfo binfo = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = heap->exportable ? &ebinfo : NULL,
.size = slab->size,
.usage = heap->usage,
.sharingMode = vk->num_pools > 1 ? VK_SHARING_MODE_CONCURRENT
@ -292,7 +315,8 @@ void vk_free_memslice(struct mpvk_ctx *vk, struct vk_memslice slice)
// reqs: can be NULL
static struct vk_heap *find_heap(struct mpvk_ctx *vk, VkBufferUsageFlags usage,
VkMemoryPropertyFlags flags,
VkMemoryRequirements *reqs)
VkMemoryRequirements *reqs,
bool exportable)
{
struct vk_malloc *ma = vk->alloc;
int typeBits = reqs ? reqs->memoryTypeBits : 0;
@ -304,6 +328,8 @@ static struct vk_heap *find_heap(struct mpvk_ctx *vk, VkBufferUsageFlags usage,
continue;
if (ma->heaps[i].typeBits != typeBits)
continue;
if (ma->heaps[i].exportable != exportable)
continue;
return &ma->heaps[i];
}
@ -314,6 +340,7 @@ static struct vk_heap *find_heap(struct mpvk_ctx *vk, VkBufferUsageFlags usage,
.usage = usage,
.flags = flags,
.typeBits = typeBits,
.exportable = exportable,
};
return heap;
}
@ -396,6 +423,7 @@ static bool slice_heap(struct mpvk_ctx *vk, struct vk_heap *heap, size_t size,
.vkmem = slab->mem,
.offset = MP_ALIGN_UP(reg.start, alignment),
.size = size,
.slab_size = slab->size,
.priv = slab,
};
@ -413,15 +441,24 @@ static bool slice_heap(struct mpvk_ctx *vk, struct vk_heap *heap, size_t size,
bool vk_malloc_generic(struct mpvk_ctx *vk, VkMemoryRequirements reqs,
VkMemoryPropertyFlags flags, struct vk_memslice *out)
{
struct vk_heap *heap = find_heap(vk, 0, flags, &reqs);
struct vk_heap *heap = find_heap(vk, 0, flags, &reqs, false);
return slice_heap(vk, heap, reqs.size, reqs.alignment, out);
}
bool vk_malloc_buffer(struct mpvk_ctx *vk, VkBufferUsageFlags bufFlags,
VkMemoryPropertyFlags memFlags, VkDeviceSize size,
VkDeviceSize alignment, struct vk_bufslice *out)
VkDeviceSize alignment, bool exportable,
struct vk_bufslice *out)
{
struct vk_heap *heap = find_heap(vk, bufFlags, memFlags, NULL);
if (exportable) {
if (!vk->has_ext_external_memory_export) {
MP_ERR(vk, "Exportable memory requires the %s extension\n",
MP_VK_EXTERNAL_MEMORY_EXPORT_EXTENSION_NAME);
return false;
}
}
struct vk_heap *heap = find_heap(vk, bufFlags, memFlags, NULL, exportable);
if (!slice_heap(vk, heap, size, alignment, &out->mem))
return false;

View File

@ -11,6 +11,7 @@ struct vk_memslice {
VkDeviceMemory vkmem;
size_t offset;
size_t size;
size_t slab_size;
void *priv;
};
@ -32,4 +33,5 @@ struct vk_bufslice {
// creating/destroying lots of (little) VkBuffers.
bool vk_malloc_buffer(struct mpvk_ctx *vk, VkBufferUsageFlags bufFlags,
VkMemoryPropertyFlags memFlags, VkDeviceSize size,
VkDeviceSize alignment, struct vk_bufslice *out);
VkDeviceSize alignment, bool exportable,
struct vk_bufslice *out);

View File

@ -4,6 +4,10 @@
#include "ra_vk.h"
#include "malloc.h"
#if HAVE_WIN32_DESKTOP
#include <versionhelpers.h>
#endif
static struct ra_fns ra_fns_vk;
enum queue_type {
@ -787,6 +791,7 @@ static struct ra_buf *vk_buf_create(struct ra *ra,
VkBufferUsageFlags bufFlags = 0;
VkMemoryPropertyFlags memFlags = 0;
VkDeviceSize align = 4; // alignment 4 is needed for buf_update
bool exportable = false;
switch (params->type) {
case RA_BUF_TYPE_TEX_UPLOAD:
@ -811,6 +816,11 @@ static struct ra_buf *vk_buf_create(struct ra *ra,
bufFlags |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
break;
case RA_BUF_TYPE_SHARED_MEMORY:
bufFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
exportable = true;
break;
default: abort();
}
@ -826,7 +836,7 @@ static struct ra_buf *vk_buf_create(struct ra *ra,
}
if (!vk_malloc_buffer(vk, bufFlags, memFlags, params->size, align,
&buf_vk->slice))
exportable, &buf_vk->slice))
{
goto error;
}
@ -916,6 +926,64 @@ error:
return false;
}
static bool ra_vk_mem_get_external_info(struct ra *ra, struct vk_memslice *mem, struct vk_external_mem *ret)
{
struct mpvk_ctx *vk = ra_vk_get(ra);
#if HAVE_WIN32_DESKTOP
HANDLE mem_handle;
VkMemoryGetWin32HandleInfoKHR info = {
.sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR,
.pNext = NULL,
.memory = mem->vkmem,
.handleType = IsWindows8OrGreater()
? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR
: VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
};
VK_LOAD_PFN(vkGetMemoryWin32HandleKHR);
VK(pfn_vkGetMemoryWin32HandleKHR(vk->dev, &info, &mem_handle));
ret->mem_handle = mem_handle;
#else
int mem_fd;
VkMemoryGetFdInfoKHR info = {
.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
.pNext = NULL,
.memory = mem->vkmem,
.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
};
VK_LOAD_PFN(vkGetMemoryFdKHR);
VK(pfn_vkGetMemoryFdKHR(vk->dev, &info, &mem_fd));
ret->mem_fd = mem_fd;
#endif
ret->size = mem->size;
ret->offset = mem->offset;
ret->mem_size = mem->slab_size;
return true;
error:
return false;
}
bool ra_vk_buf_get_external_info(struct ra *ra, struct ra_buf *buf, struct vk_external_mem *ret)
{
if (buf->params.type != RA_BUF_TYPE_SHARED_MEMORY) {
MP_ERR(ra, "Buffer must be of TYPE_SHARED_MEMORY to be able to export it...");
return false;
}
struct ra_buf_vk *buf_vk = buf->priv;
struct vk_memslice *mem = &buf_vk->slice.mem;
return ra_vk_mem_get_external_info(ra, mem, ret);
}
#define MPVK_NUM_DS MPVK_MAX_STREAMING_DEPTH
// For ra_renderpass.priv

View File

@ -29,3 +29,17 @@ struct vk_cmd *ra_vk_submit(struct ra *ra, struct ra_tex *tex);
// May be called on a struct ra of any type. Returns NULL if the ra is not
// a vulkan ra.
struct mpvk_ctx *ra_vk_get(struct ra *ra);
struct vk_external_mem {
#if HAVE_WIN32_DESKTOP
HANDLE mem_handle;
#else
int mem_fd;
#endif
size_t mem_size;
size_t size;
size_t offset;
};
// Export an ra_buf for importing by another api.
bool ra_vk_buf_get_external_info(struct ra *ra, struct ra_buf *buf, struct vk_external_mem *ret);

View File

@ -438,6 +438,38 @@ static void add_qinfo(void *tactx, VkDeviceQueueCreateInfo **qinfos,
MP_TARRAY_APPEND(tactx, *qinfos, *num_qinfos, qinfo);
}
static bool detect_device_extensions(struct mpvk_ctx *vk)
{
bool ret = false;
VkExtensionProperties *props = NULL;
uint32_t num_exts;
VK(vkEnumerateDeviceExtensionProperties(vk->physd, NULL,
&num_exts, NULL));
props = talloc_array(NULL, VkExtensionProperties, num_exts);
VK(vkEnumerateDeviceExtensionProperties(vk->physd,
NULL, &num_exts, props));
for (uint32_t i = 0; i < num_exts; i++) {
if (!strcmp(VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME,
props[i].extensionName)) {
vk->has_ext_external_memory = true;
continue;
}
if (!strcmp(MP_VK_EXTERNAL_MEMORY_EXPORT_EXTENSION_NAME,
props[i].extensionName)) {
vk->has_ext_external_memory_export = true;
continue;
}
}
ret = true;
error:
talloc_free(props);
return ret;
}
bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts)
{
assert(vk->physd);
@ -493,9 +525,18 @@ bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts)
add_qinfo(tmp, &qinfos, &num_qinfos, qfs, idx_comp, opts.queue_count);
add_qinfo(tmp, &qinfos, &num_qinfos, qfs, idx_tf, opts.queue_count);
if (!detect_device_extensions(vk)) {
MP_WARN(vk, "Failed to enumerate device extensions. "
"Some features may be disabled.\n");
}
const char **exts = NULL;
int num_exts = 0;
MP_TARRAY_APPEND(tmp, exts, num_exts, VK_KHR_SWAPCHAIN_EXTENSION_NAME);
if (vk->has_ext_external_memory)
MP_TARRAY_APPEND(tmp, exts, num_exts, VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME);
if (vk->has_ext_external_memory_export)
MP_TARRAY_APPEND(tmp, exts, num_exts, MP_VK_EXTERNAL_MEMORY_EXPORT_EXTENSION_NAME);
if (vk->spirv->required_ext)
MP_TARRAY_APPEND(tmp, exts, num_exts, vk->spirv->required_ext);

View File

@ -10,6 +10,12 @@
#define VK_LOAD_PFN(name) PFN_##name pfn_##name = (PFN_##name) \
vkGetInstanceProcAddr(vk->inst, #name);
#if HAVE_WIN32_DESKTOP
#define MP_VK_EXTERNAL_MEMORY_EXPORT_EXTENSION_NAME VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME
#else
#define MP_VK_EXTERNAL_MEMORY_EXPORT_EXTENSION_NAME VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME
#endif
// Return a human-readable name for various struct mpvk_ctx enums
const char* vk_err(VkResult res);