mirror of
https://github.com/mpv-player/mpv
synced 2025-01-23 16:13:39 +00:00
12c6700a3c
A vulkan validation layer update pointed out that this was wrong; we still need to use the access type corresponding to the stage mask, even if it means our code won't be able to skip the pipeline barrier (which would be wrong anyway). In additiona to this, we're also not allowed to specify any source access mask when transitioning from top_of_pipe, which doesn't make any sense anyway.
1892 lines
64 KiB
C
1892 lines
64 KiB
C
#include "video/out/gpu/utils.h"
|
|
#include "video/out/gpu/spirv.h"
|
|
|
|
#include "ra_vk.h"
|
|
#include "malloc.h"
|
|
|
|
static struct ra_fns ra_fns_vk;
|
|
|
|
enum queue_type {
|
|
GRAPHICS,
|
|
COMPUTE,
|
|
TRANSFER,
|
|
};
|
|
|
|
// For ra.priv
|
|
struct ra_vk {
|
|
struct mpvk_ctx *vk;
|
|
struct ra_tex *clear_tex; // stupid hack for clear()
|
|
struct vk_cmd *cmd; // currently recording cmd
|
|
};
|
|
|
|
struct mpvk_ctx *ra_vk_get(struct ra *ra)
|
|
{
|
|
if (ra->fns != &ra_fns_vk)
|
|
return NULL;
|
|
|
|
struct ra_vk *p = ra->priv;
|
|
return p->vk;
|
|
}
|
|
|
|
static void vk_submit(struct ra *ra)
|
|
{
|
|
struct ra_vk *p = ra->priv;
|
|
struct mpvk_ctx *vk = ra_vk_get(ra);
|
|
|
|
if (p->cmd) {
|
|
vk_cmd_queue(vk, p->cmd);
|
|
p->cmd = NULL;
|
|
}
|
|
}
|
|
|
|
// Returns a command buffer, or NULL on error
|
|
static struct vk_cmd *vk_require_cmd(struct ra *ra, enum queue_type type)
|
|
{
|
|
struct ra_vk *p = ra->priv;
|
|
struct mpvk_ctx *vk = ra_vk_get(ra);
|
|
|
|
struct vk_cmdpool *pool;
|
|
switch (type) {
|
|
case GRAPHICS: pool = vk->pool_graphics; break;
|
|
case COMPUTE: pool = vk->pool_compute; break;
|
|
|
|
// GRAPHICS and COMPUTE also imply TRANSFER capability (vulkan spec)
|
|
case TRANSFER:
|
|
pool = vk->pool_transfer;
|
|
if (!pool)
|
|
pool = vk->pool_compute;
|
|
if (!pool)
|
|
pool = vk->pool_graphics;
|
|
break;
|
|
default: abort();
|
|
}
|
|
|
|
assert(pool);
|
|
if (p->cmd && p->cmd->pool == pool)
|
|
return p->cmd;
|
|
|
|
vk_submit(ra);
|
|
p->cmd = vk_cmd_begin(vk, pool);
|
|
return p->cmd;
|
|
}
|
|
|
|
#define MAKE_LAZY_DESTRUCTOR(fun, argtype) \
|
|
static void fun##_lazy(struct ra *ra, argtype *arg) { \
|
|
struct ra_vk *p = ra->priv; \
|
|
struct mpvk_ctx *vk = ra_vk_get(ra); \
|
|
if (p->cmd) { \
|
|
vk_cmd_callback(p->cmd, (vk_cb) fun, ra, arg); \
|
|
} else { \
|
|
vk_dev_callback(vk, (vk_cb) fun, ra, arg); \
|
|
} \
|
|
}
|
|
|
|
static void vk_destroy_ra(struct ra *ra)
|
|
{
|
|
struct ra_vk *p = ra->priv;
|
|
struct mpvk_ctx *vk = ra_vk_get(ra);
|
|
|
|
vk_submit(ra);
|
|
mpvk_flush_commands(vk);
|
|
mpvk_poll_commands(vk, UINT64_MAX);
|
|
ra_tex_free(ra, &p->clear_tex);
|
|
|
|
talloc_free(ra);
|
|
}
|
|
|
|
static bool vk_setup_formats(struct ra *ra)
|
|
{
|
|
struct mpvk_ctx *vk = ra_vk_get(ra);
|
|
|
|
for (const struct vk_format *vk_fmt = vk_formats; vk_fmt->name; vk_fmt++) {
|
|
VkFormatProperties prop;
|
|
vkGetPhysicalDeviceFormatProperties(vk->physd, vk_fmt->iformat, &prop);
|
|
|
|
// As a bare minimum, we need to sample from an allocated image
|
|
VkFormatFeatureFlags flags = prop.optimalTilingFeatures;
|
|
if (!(flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT))
|
|
continue;
|
|
|
|
VkFormatFeatureFlags linear_bits, render_bits;
|
|
linear_bits = VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
|
|
render_bits = VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT |
|
|
VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT;
|
|
|
|
struct ra_format *fmt = talloc_zero(ra, struct ra_format);
|
|
*fmt = (struct ra_format) {
|
|
.name = vk_fmt->name,
|
|
.priv = (void *)vk_fmt,
|
|
.ctype = vk_fmt->ctype,
|
|
.ordered = !vk_fmt->fucked_order,
|
|
.num_components = vk_fmt->components,
|
|
.pixel_size = vk_fmt->bytes,
|
|
.linear_filter = !!(flags & linear_bits),
|
|
.renderable = !!(flags & render_bits),
|
|
};
|
|
|
|
for (int i = 0; i < 4; i++)
|
|
fmt->component_size[i] = fmt->component_depth[i] = vk_fmt->bits[i];
|
|
|
|
fmt->glsl_format = ra_fmt_glsl_format(fmt);
|
|
|
|
MP_TARRAY_APPEND(ra, ra->formats, ra->num_formats, fmt);
|
|
}
|
|
|
|
// Populate some other capabilities related to formats while we're at it
|
|
VkImageType imgType[3] = {
|
|
VK_IMAGE_TYPE_1D,
|
|
VK_IMAGE_TYPE_2D,
|
|
VK_IMAGE_TYPE_3D
|
|
};
|
|
|
|
// R8_UNORM is supported on literally every single vulkan implementation
|
|
const VkFormat testfmt = VK_FORMAT_R8_UNORM;
|
|
|
|
for (int d = 0; d < 3; d++) {
|
|
VkImageFormatProperties iprop;
|
|
VkResult res = vkGetPhysicalDeviceImageFormatProperties(vk->physd,
|
|
testfmt, imgType[d], VK_IMAGE_TILING_OPTIMAL,
|
|
VK_IMAGE_USAGE_SAMPLED_BIT, 0, &iprop);
|
|
|
|
switch (imgType[d]) {
|
|
case VK_IMAGE_TYPE_1D:
|
|
if (res == VK_SUCCESS)
|
|
ra->caps |= RA_CAP_TEX_1D;
|
|
break;
|
|
case VK_IMAGE_TYPE_2D:
|
|
// 2D formats must be supported by RA, so ensure this is the case
|
|
VK_ASSERT(res, "Querying 2D format limits");
|
|
ra->max_texture_wh = MPMIN(iprop.maxExtent.width, iprop.maxExtent.height);
|
|
break;
|
|
case VK_IMAGE_TYPE_3D:
|
|
if (res == VK_SUCCESS)
|
|
ra->caps |= RA_CAP_TEX_3D;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// RA_CAP_BLIT implies both blitting between images as well as blitting
|
|
// directly to the swapchain image, so check for all three operations
|
|
bool blittable = true;
|
|
VkFormatProperties prop;
|
|
vkGetPhysicalDeviceFormatProperties(vk->physd, testfmt, &prop);
|
|
if (!(prop.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_SRC_BIT))
|
|
blittable = false;
|
|
if (!(prop.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT))
|
|
blittable = false;
|
|
|
|
vkGetPhysicalDeviceFormatProperties(vk->physd, vk->surf_format.format, &prop);
|
|
if (!(prop.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT))
|
|
blittable = false;
|
|
|
|
if (blittable)
|
|
ra->caps |= RA_CAP_BLIT;
|
|
|
|
return true;
|
|
|
|
error:
|
|
return false;
|
|
}
|
|
|
|
static struct ra_fns ra_fns_vk;
|
|
|
|
struct ra *ra_create_vk(struct mpvk_ctx *vk, struct mp_log *log)
|
|
{
|
|
assert(vk->dev);
|
|
assert(vk->alloc);
|
|
|
|
struct ra *ra = talloc_zero(NULL, struct ra);
|
|
ra->log = log;
|
|
ra->fns = &ra_fns_vk;
|
|
|
|
struct ra_vk *p = ra->priv = talloc_zero(ra, struct ra_vk);
|
|
p->vk = vk;
|
|
|
|
ra->caps |= vk->spirv->ra_caps;
|
|
ra->glsl_version = vk->spirv->glsl_version;
|
|
ra->glsl_vulkan = true;
|
|
ra->max_shmem = vk->limits.maxComputeSharedMemorySize;
|
|
ra->max_pushc_size = vk->limits.maxPushConstantsSize;
|
|
|
|
if (vk->pool_compute) {
|
|
ra->caps |= RA_CAP_COMPUTE;
|
|
// If we have more compute queues than graphics queues, we probably
|
|
// want to be using them. (This seems mostly relevant for AMD)
|
|
if (vk->pool_compute->num_queues > vk->pool_graphics->num_queues)
|
|
ra->caps |= RA_CAP_PARALLEL_COMPUTE;
|
|
}
|
|
|
|
if (!vk_setup_formats(ra))
|
|
goto error;
|
|
|
|
// UBO support is required
|
|
ra->caps |= RA_CAP_BUF_RO | RA_CAP_FRAGCOORD;
|
|
|
|
// textureGather is only supported in GLSL 400+
|
|
if (ra->glsl_version >= 400)
|
|
ra->caps |= RA_CAP_GATHER;
|
|
|
|
// Try creating a shader storage buffer
|
|
struct ra_buf_params ssbo_params = {
|
|
.type = RA_BUF_TYPE_SHADER_STORAGE,
|
|
.size = 16,
|
|
};
|
|
|
|
struct ra_buf *ssbo = ra_buf_create(ra, &ssbo_params);
|
|
if (ssbo) {
|
|
ra->caps |= RA_CAP_BUF_RW;
|
|
ra_buf_free(ra, &ssbo);
|
|
}
|
|
|
|
// To support clear() by region, we need to allocate a dummy 1x1 image that
|
|
// will be used as the source of blit operations
|
|
struct ra_tex_params clear_params = {
|
|
.dimensions = 1, // no point in using a 2D image if height = 1
|
|
.w = 1,
|
|
.h = 1,
|
|
.d = 1,
|
|
.format = ra_find_float16_format(ra, 4),
|
|
.blit_src = 1,
|
|
.host_mutable = 1,
|
|
};
|
|
|
|
p->clear_tex = ra_tex_create(ra, &clear_params);
|
|
if (!p->clear_tex) {
|
|
MP_ERR(ra, "Failed creating 1x1 dummy texture for clear()!\n");
|
|
goto error;
|
|
}
|
|
|
|
return ra;
|
|
|
|
error:
|
|
vk_destroy_ra(ra);
|
|
return NULL;
|
|
}
|
|
|
|
// Boilerplate wrapper around vkCreateRenderPass to ensure passes remain
|
|
// compatible. The renderpass will automatically transition the image out of
|
|
// initialLayout and into finalLayout.
|
|
static VkResult vk_create_render_pass(VkDevice dev, const struct ra_format *fmt,
|
|
VkAttachmentLoadOp loadOp,
|
|
VkImageLayout initialLayout,
|
|
VkImageLayout finalLayout,
|
|
VkRenderPass *out)
|
|
{
|
|
struct vk_format *vk_fmt = fmt->priv;
|
|
assert(fmt->renderable);
|
|
|
|
VkRenderPassCreateInfo rinfo = {
|
|
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
|
|
.attachmentCount = 1,
|
|
.pAttachments = &(VkAttachmentDescription) {
|
|
.format = vk_fmt->iformat,
|
|
.samples = VK_SAMPLE_COUNT_1_BIT,
|
|
.loadOp = loadOp,
|
|
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
|
|
.initialLayout = initialLayout,
|
|
.finalLayout = finalLayout,
|
|
},
|
|
.subpassCount = 1,
|
|
.pSubpasses = &(VkSubpassDescription) {
|
|
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
|
|
.colorAttachmentCount = 1,
|
|
.pColorAttachments = &(VkAttachmentReference) {
|
|
.attachment = 0,
|
|
.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
|
|
},
|
|
},
|
|
};
|
|
|
|
return vkCreateRenderPass(dev, &rinfo, MPVK_ALLOCATOR, out);
|
|
}
|
|
|
|
// For ra_tex.priv
|
|
struct ra_tex_vk {
|
|
bool external_img;
|
|
enum queue_type upload_queue;
|
|
VkImageType type;
|
|
VkImage img;
|
|
struct vk_memslice mem;
|
|
// for sampling
|
|
VkImageView view;
|
|
VkSampler sampler;
|
|
// for rendering
|
|
VkFramebuffer framebuffer;
|
|
VkRenderPass dummyPass;
|
|
// for uploading
|
|
struct ra_buf_pool pbo;
|
|
// "current" metadata, can change during the course of execution
|
|
VkImageLayout current_layout;
|
|
VkAccessFlags current_access;
|
|
// the signal guards reuse, and can be NULL
|
|
struct vk_signal *sig;
|
|
VkPipelineStageFlags sig_stage;
|
|
VkSemaphore ext_dep; // external semaphore, not owned by the ra_tex
|
|
};
|
|
|
|
void ra_tex_vk_external_dep(struct ra *ra, struct ra_tex *tex, VkSemaphore dep)
|
|
{
|
|
struct ra_tex_vk *tex_vk = tex->priv;
|
|
assert(!tex_vk->ext_dep);
|
|
tex_vk->ext_dep = dep;
|
|
}
|
|
|
|
// Small helper to ease image barrier creation. if `discard` is set, the contents
|
|
// of the image will be undefined after the barrier
|
|
static void tex_barrier(struct ra *ra, struct vk_cmd *cmd, struct ra_tex *tex,
|
|
VkPipelineStageFlags stage, VkAccessFlags newAccess,
|
|
VkImageLayout newLayout, bool discard)
|
|
{
|
|
struct mpvk_ctx *vk = ra_vk_get(ra);
|
|
struct ra_tex_vk *tex_vk = tex->priv;
|
|
|
|
if (tex_vk->ext_dep) {
|
|
vk_cmd_dep(cmd, tex_vk->ext_dep, stage);
|
|
tex_vk->ext_dep = NULL;
|
|
}
|
|
|
|
VkImageMemoryBarrier imgBarrier = {
|
|
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
|
.oldLayout = tex_vk->current_layout,
|
|
.newLayout = newLayout,
|
|
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
|
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
|
.srcAccessMask = tex_vk->current_access,
|
|
.dstAccessMask = newAccess,
|
|
.image = tex_vk->img,
|
|
.subresourceRange = vk_range,
|
|
};
|
|
|
|
if (discard) {
|
|
imgBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
|
imgBarrier.srcAccessMask = 0;
|
|
}
|
|
|
|
VkEvent event = NULL;
|
|
vk_cmd_wait(vk, cmd, &tex_vk->sig, stage, &event);
|
|
|
|
bool need_trans = tex_vk->current_layout != newLayout ||
|
|
tex_vk->current_access != newAccess;
|
|
|
|
// Transitioning to VK_IMAGE_LAYOUT_UNDEFINED is a pseudo-operation
|
|
// that for us means we don't need to perform the actual transition
|
|
if (need_trans && newLayout != VK_IMAGE_LAYOUT_UNDEFINED) {
|
|
if (event) {
|
|
vkCmdWaitEvents(cmd->buf, 1, &event, tex_vk->sig_stage,
|
|
stage, 0, NULL, 0, NULL, 1, &imgBarrier);
|
|
} else {
|
|
// If we're not using an event, then the source stage is irrelevant
|
|
// because we're coming from a different queue anyway, so we can
|
|
// safely set it to TOP_OF_PIPE.
|
|
imgBarrier.srcAccessMask = 0;
|
|
vkCmdPipelineBarrier(cmd->buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
|
|
stage, 0, 0, NULL, 0, NULL, 1, &imgBarrier);
|
|
}
|
|
}
|
|
|
|
tex_vk->current_layout = newLayout;
|
|
tex_vk->current_access = newAccess;
|
|
}
|
|
|
|
static void tex_signal(struct ra *ra, struct vk_cmd *cmd, struct ra_tex *tex,
|
|
VkPipelineStageFlags stage)
|
|
{
|
|
struct ra_tex_vk *tex_vk = tex->priv;
|
|
struct mpvk_ctx *vk = ra_vk_get(ra);
|
|
assert(!tex_vk->sig);
|
|
|
|
tex_vk->sig = vk_cmd_signal(vk, cmd, stage);
|
|
tex_vk->sig_stage = stage;
|
|
}
|
|
|
|
static void vk_tex_destroy(struct ra *ra, struct ra_tex *tex)
|
|
{
|
|
if (!tex)
|
|
return;
|
|
|
|
struct mpvk_ctx *vk = ra_vk_get(ra);
|
|
struct ra_tex_vk *tex_vk = tex->priv;
|
|
|
|
ra_buf_pool_uninit(ra, &tex_vk->pbo);
|
|
vk_signal_destroy(vk, &tex_vk->sig);
|
|
vkDestroyFramebuffer(vk->dev, tex_vk->framebuffer, MPVK_ALLOCATOR);
|
|
vkDestroyRenderPass(vk->dev, tex_vk->dummyPass, MPVK_ALLOCATOR);
|
|
vkDestroySampler(vk->dev, tex_vk->sampler, MPVK_ALLOCATOR);
|
|
vkDestroyImageView(vk->dev, tex_vk->view, MPVK_ALLOCATOR);
|
|
if (!tex_vk->external_img) {
|
|
vkDestroyImage(vk->dev, tex_vk->img, MPVK_ALLOCATOR);
|
|
vk_free_memslice(vk, tex_vk->mem);
|
|
}
|
|
|
|
talloc_free(tex);
|
|
}
|
|
|
|
MAKE_LAZY_DESTRUCTOR(vk_tex_destroy, struct ra_tex);
|
|
|
|
// Initializes non-VkImage values like the image view, samplers, etc.
|
|
static bool vk_init_image(struct ra *ra, struct ra_tex *tex)
|
|
{
|
|
struct mpvk_ctx *vk = ra_vk_get(ra);
|
|
|
|
struct ra_tex_params *params = &tex->params;
|
|
struct ra_tex_vk *tex_vk = tex->priv;
|
|
assert(tex_vk->img);
|
|
|
|
tex_vk->current_layout = VK_IMAGE_LAYOUT_UNDEFINED;
|
|
tex_vk->current_access = 0;
|
|
|
|
if (params->render_src || params->render_dst) {
|
|
static const VkImageViewType viewType[] = {
|
|
[VK_IMAGE_TYPE_1D] = VK_IMAGE_VIEW_TYPE_1D,
|
|
[VK_IMAGE_TYPE_2D] = VK_IMAGE_VIEW_TYPE_2D,
|
|
[VK_IMAGE_TYPE_3D] = VK_IMAGE_VIEW_TYPE_3D,
|
|
};
|
|
|
|
const struct vk_format *fmt = params->format->priv;
|
|
VkImageViewCreateInfo vinfo = {
|
|
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
|
.image = tex_vk->img,
|
|
.viewType = viewType[tex_vk->type],
|
|
.format = fmt->iformat,
|
|
.subresourceRange = vk_range,
|
|
};
|
|
|
|
VK(vkCreateImageView(vk->dev, &vinfo, MPVK_ALLOCATOR, &tex_vk->view));
|
|
}
|
|
|
|
if (params->render_src) {
|
|
assert(params->format->linear_filter || !params->src_linear);
|
|
VkFilter filter = params->src_linear
|
|
? VK_FILTER_LINEAR
|
|
: VK_FILTER_NEAREST;
|
|
VkSamplerAddressMode wrap = params->src_repeat
|
|
? VK_SAMPLER_ADDRESS_MODE_REPEAT
|
|
: VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
|
|
VkSamplerCreateInfo sinfo = {
|
|
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
|
|
.magFilter = filter,
|
|
.minFilter = filter,
|
|
.addressModeU = wrap,
|
|
.addressModeV = wrap,
|
|
.addressModeW = wrap,
|
|
.maxAnisotropy = 1.0,
|
|
};
|
|
|
|
VK(vkCreateSampler(vk->dev, &sinfo, MPVK_ALLOCATOR, &tex_vk->sampler));
|
|
}
|
|
|
|
if (params->render_dst) {
|
|
// Framebuffers need to be created against a specific render pass
|
|
// layout, so we need to temporarily create a skeleton/dummy render
|
|
// pass for vulkan to figure out the compatibility
|
|
VK(vk_create_render_pass(vk->dev, params->format,
|
|
VK_ATTACHMENT_LOAD_OP_DONT_CARE,
|
|
VK_IMAGE_LAYOUT_UNDEFINED,
|
|
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
|
|
&tex_vk->dummyPass));
|
|
|
|
VkFramebufferCreateInfo finfo = {
|
|
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
|
|
.renderPass = tex_vk->dummyPass,
|
|
.attachmentCount = 1,
|
|
.pAttachments = &tex_vk->view,
|
|
.width = tex->params.w,
|
|
.height = tex->params.h,
|
|
.layers = 1,
|
|
};
|
|
|
|
VK(vkCreateFramebuffer(vk->dev, &finfo, MPVK_ALLOCATOR,
|
|
&tex_vk->framebuffer));
|
|
|
|
// NOTE: Normally we would free the dummyPass again here, but a bug
|
|
// in the nvidia vulkan driver causes a segfault if you do.
|
|
}
|
|
|
|
return true;
|
|
|
|
error:
|
|
return false;
|
|
}
|
|
|
|
static struct ra_tex *vk_tex_create(struct ra *ra,
|
|
const struct ra_tex_params *params)
|
|
{
|
|
struct mpvk_ctx *vk = ra_vk_get(ra);
|
|
assert(!params->format->dummy_format);
|
|
|
|
struct ra_tex *tex = talloc_zero(NULL, struct ra_tex);
|
|
tex->params = *params;
|
|
tex->params.initial_data = NULL;
|
|
|
|
struct ra_tex_vk *tex_vk = tex->priv = talloc_zero(tex, struct ra_tex_vk);
|
|
tex_vk->upload_queue = GRAPHICS;
|
|
|
|
const struct vk_format *fmt = params->format->priv;
|
|
switch (params->dimensions) {
|
|
case 1: tex_vk->type = VK_IMAGE_TYPE_1D; break;
|
|
case 2: tex_vk->type = VK_IMAGE_TYPE_2D; break;
|
|
case 3: tex_vk->type = VK_IMAGE_TYPE_3D; break;
|
|
default: abort();
|
|
}
|
|
|
|
VkImageUsageFlags usage = 0;
|
|
if (params->render_src)
|
|
usage |= VK_IMAGE_USAGE_SAMPLED_BIT;
|
|
if (params->render_dst)
|
|
usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
|
|
if (params->storage_dst)
|
|
usage |= VK_IMAGE_USAGE_STORAGE_BIT;
|
|
if (params->blit_src)
|
|
usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
|
|
if (params->host_mutable || params->blit_dst || params->initial_data)
|
|
usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT;
|
|
|
|
// Always use the transfer pool if available, for efficiency
|
|
if (params->host_mutable && vk->pool_transfer)
|
|
tex_vk->upload_queue = TRANSFER;
|
|
|
|
// Double-check image usage support and fail immediately if invalid
|
|
VkImageFormatProperties iprop;
|
|
VkResult res = vkGetPhysicalDeviceImageFormatProperties(vk->physd,
|
|
fmt->iformat, tex_vk->type, VK_IMAGE_TILING_OPTIMAL, usage, 0,
|
|
&iprop);
|
|
if (res == VK_ERROR_FORMAT_NOT_SUPPORTED) {
|
|
return NULL;
|
|
} else {
|
|
VK_ASSERT(res, "Querying image format properties");
|
|
}
|
|
|
|
VkFormatProperties prop;
|
|
vkGetPhysicalDeviceFormatProperties(vk->physd, fmt->iformat, &prop);
|
|
VkFormatFeatureFlags flags = prop.optimalTilingFeatures;
|
|
|
|
bool has_blit_src = flags & VK_FORMAT_FEATURE_BLIT_SRC_BIT,
|
|
has_src_linear = flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
|
|
|
|
if (params->w > iprop.maxExtent.width ||
|
|
params->h > iprop.maxExtent.height ||
|
|
params->d > iprop.maxExtent.depth ||
|
|
(params->blit_src && !has_blit_src) ||
|
|
(params->src_linear && !has_src_linear))
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
// FIXME: Since we can't keep track of queue family ownership properly,
|
|
// and we don't know in advance what types of queue families this image
|
|
// will belong to, we're forced to share all of our images between all
|
|
// command pools.
|
|
uint32_t qfs[3] = {0};
|
|
for (int i = 0; i < vk->num_pools; i++)
|
|
qfs[i] = vk->pools[i]->qf;
|
|
|
|
VkImageCreateInfo iinfo = {
|
|
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
|
|
.imageType = tex_vk->type,
|
|
.format = fmt->iformat,
|
|
.extent = (VkExtent3D) { params->w, params->h, params->d },
|
|
.mipLevels = 1,
|
|
.arrayLayers = 1,
|
|
.samples = VK_SAMPLE_COUNT_1_BIT,
|
|
.tiling = VK_IMAGE_TILING_OPTIMAL,
|
|
.usage = usage,
|
|
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
|
|
.sharingMode = vk->num_pools > 1 ? VK_SHARING_MODE_CONCURRENT
|
|
: VK_SHARING_MODE_EXCLUSIVE,
|
|
.queueFamilyIndexCount = vk->num_pools,
|
|
.pQueueFamilyIndices = qfs,
|
|
};
|
|
|
|
VK(vkCreateImage(vk->dev, &iinfo, MPVK_ALLOCATOR, &tex_vk->img));
|
|
|
|
VkMemoryPropertyFlags memFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
|
|
VkMemoryRequirements reqs;
|
|
vkGetImageMemoryRequirements(vk->dev, tex_vk->img, &reqs);
|
|
|
|
struct vk_memslice *mem = &tex_vk->mem;
|
|
if (!vk_malloc_generic(vk, reqs, memFlags, mem))
|
|
goto error;
|
|
|
|
VK(vkBindImageMemory(vk->dev, tex_vk->img, mem->vkmem, mem->offset));
|
|
|
|
if (!vk_init_image(ra, tex))
|
|
goto error;
|
|
|
|
if (params->initial_data) {
|
|
struct ra_tex_upload_params ul_params = {
|
|
.tex = tex,
|
|
.invalidate = true,
|
|
.src = params->initial_data,
|
|
.stride = params->w * fmt->bytes,
|
|
};
|
|
if (!ra->fns->tex_upload(ra, &ul_params))
|
|
goto error;
|
|
}
|
|
|
|
return tex;
|
|
|
|
error:
|
|
vk_tex_destroy(ra, tex);
|
|
return NULL;
|
|
}
|
|
|
|
struct ra_tex *ra_vk_wrap_swapchain_img(struct ra *ra, VkImage vkimg,
|
|
VkSwapchainCreateInfoKHR info)
|
|
{
|
|
struct mpvk_ctx *vk = ra_vk_get(ra);
|
|
struct ra_tex *tex = NULL;
|
|
|
|
const struct ra_format *format = NULL;
|
|
for (int i = 0; i < ra->num_formats; i++) {
|
|
const struct vk_format *fmt = ra->formats[i]->priv;
|
|
if (fmt->iformat == vk->surf_format.format) {
|
|
format = ra->formats[i];
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!format) {
|
|
MP_ERR(ra, "Could not find ra_format suitable for wrapped swchain image "
|
|
"with surface format 0x%x\n", vk->surf_format.format);
|
|
goto error;
|
|
}
|
|
|
|
tex = talloc_zero(NULL, struct ra_tex);
|
|
tex->params = (struct ra_tex_params) {
|
|
.format = format,
|
|
.dimensions = 2,
|
|
.w = info.imageExtent.width,
|
|
.h = info.imageExtent.height,
|
|
.d = 1,
|
|
.blit_src = !!(info.imageUsage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT),
|
|
.blit_dst = !!(info.imageUsage & VK_IMAGE_USAGE_TRANSFER_DST_BIT),
|
|
.render_src = !!(info.imageUsage & VK_IMAGE_USAGE_SAMPLED_BIT),
|
|
.render_dst = !!(info.imageUsage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT),
|
|
.storage_dst = !!(info.imageUsage & VK_IMAGE_USAGE_STORAGE_BIT),
|
|
};
|
|
|
|
struct ra_tex_vk *tex_vk = tex->priv = talloc_zero(tex, struct ra_tex_vk);
|
|
tex_vk->type = VK_IMAGE_TYPE_2D;
|
|
tex_vk->external_img = true;
|
|
tex_vk->img = vkimg;
|
|
|
|
if (!vk_init_image(ra, tex))
|
|
goto error;
|
|
|
|
return tex;
|
|
|
|
error:
|
|
vk_tex_destroy(ra, tex);
|
|
return NULL;
|
|
}
|
|
|
|
// For ra_buf.priv
|
|
struct ra_buf_vk {
|
|
struct vk_bufslice slice;
|
|
int refcount; // 1 = object allocated but not in use, > 1 = in use
|
|
bool needsflush;
|
|
enum queue_type update_queue;
|
|
// "current" metadata, can change during course of execution
|
|
VkPipelineStageFlags current_stage;
|
|
VkAccessFlags current_access;
|
|
};
|
|
|
|
static void vk_buf_deref(struct ra *ra, struct ra_buf *buf)
|
|
{
|
|
if (!buf)
|
|
return;
|
|
|
|
struct mpvk_ctx *vk = ra_vk_get(ra);
|
|
struct ra_buf_vk *buf_vk = buf->priv;
|
|
|
|
if (--buf_vk->refcount == 0) {
|
|
vk_free_memslice(vk, buf_vk->slice.mem);
|
|
talloc_free(buf);
|
|
}
|
|
}
|
|
|
|
static void buf_barrier(struct ra *ra, struct vk_cmd *cmd, struct ra_buf *buf,
|
|
VkPipelineStageFlags newStage,
|
|
VkAccessFlags newAccess, int offset, size_t size)
|
|
{
|
|
struct ra_buf_vk *buf_vk = buf->priv;
|
|
|
|
VkBufferMemoryBarrier buffBarrier = {
|
|
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
|
.srcAccessMask = buf_vk->current_access,
|
|
.dstAccessMask = newAccess,
|
|
.buffer = buf_vk->slice.buf,
|
|
.offset = offset,
|
|
.size = size,
|
|
};
|
|
|
|
if (buf_vk->needsflush || buf->params.host_mapped) {
|
|
buffBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
|
|
buf_vk->current_stage = VK_PIPELINE_STAGE_HOST_BIT;
|
|
buf_vk->needsflush = false;
|
|
}
|
|
|
|
if (buffBarrier.srcAccessMask != buffBarrier.dstAccessMask) {
|
|
vkCmdPipelineBarrier(cmd->buf, buf_vk->current_stage, newStage, 0,
|
|
0, NULL, 1, &buffBarrier, 0, NULL);
|
|
}
|
|
|
|
buf_vk->current_stage = newStage;
|
|
buf_vk->current_access = newAccess;
|
|
buf_vk->refcount++;
|
|
vk_cmd_callback(cmd, (vk_cb) vk_buf_deref, ra, buf);
|
|
}
|
|
|
|
#define vk_buf_destroy vk_buf_deref
|
|
MAKE_LAZY_DESTRUCTOR(vk_buf_destroy, struct ra_buf);
|
|
|
|
static void vk_buf_update(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset,
|
|
const void *data, size_t size)
|
|
{
|
|
assert(buf->params.host_mutable || buf->params.initial_data);
|
|
struct ra_buf_vk *buf_vk = buf->priv;
|
|
|
|
// For host-mapped buffers, we can just directly memcpy the buffer contents.
|
|
// Otherwise, we can update the buffer from the GPU using a command buffer.
|
|
if (buf_vk->slice.data) {
|
|
assert(offset + size <= buf->params.size);
|
|
uintptr_t addr = (uintptr_t)buf_vk->slice.data + offset;
|
|
memcpy((void *)addr, data, size);
|
|
buf_vk->needsflush = true;
|
|
} else {
|
|
struct vk_cmd *cmd = vk_require_cmd(ra, buf_vk->update_queue);
|
|
if (!cmd) {
|
|
MP_ERR(ra, "Failed updating buffer!\n");
|
|
return;
|
|
}
|
|
|
|
buf_barrier(ra, cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
|
VK_ACCESS_TRANSFER_WRITE_BIT, offset, size);
|
|
|
|
VkDeviceSize bufOffset = buf_vk->slice.mem.offset + offset;
|
|
assert(bufOffset == MP_ALIGN_UP(bufOffset, 4));
|
|
vkCmdUpdateBuffer(cmd->buf, buf_vk->slice.buf, bufOffset, size, data);
|
|
}
|
|
}
|
|
|
|
static struct ra_buf *vk_buf_create(struct ra *ra,
|
|
const struct ra_buf_params *params)
|
|
{
|
|
struct mpvk_ctx *vk = ra_vk_get(ra);
|
|
|
|
struct ra_buf *buf = talloc_zero(NULL, struct ra_buf);
|
|
buf->params = *params;
|
|
|
|
struct ra_buf_vk *buf_vk = buf->priv = talloc_zero(buf, struct ra_buf_vk);
|
|
buf_vk->current_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
|
|
buf_vk->current_access = 0;
|
|
buf_vk->refcount = 1;
|
|
|
|
VkBufferUsageFlags bufFlags = 0;
|
|
VkMemoryPropertyFlags memFlags = 0;
|
|
VkDeviceSize align = 4; // alignment 4 is needed for buf_update
|
|
|
|
switch (params->type) {
|
|
case RA_BUF_TYPE_TEX_UPLOAD:
|
|
bufFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
|
|
memFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
|
|
// Use TRANSFER-style updates for large enough buffers for efficiency
|
|
if (params->size > 1024*1024) // 1 MB
|
|
buf_vk->update_queue = TRANSFER;
|
|
break;
|
|
case RA_BUF_TYPE_UNIFORM:
|
|
bufFlags |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
|
|
memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
|
|
align = MP_ALIGN_UP(align, vk->limits.minUniformBufferOffsetAlignment);
|
|
break;
|
|
case RA_BUF_TYPE_SHADER_STORAGE:
|
|
bufFlags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
|
|
memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
|
|
align = MP_ALIGN_UP(align, vk->limits.minStorageBufferOffsetAlignment);
|
|
buf_vk->update_queue = COMPUTE;
|
|
break;
|
|
case RA_BUF_TYPE_VERTEX:
|
|
bufFlags |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
|
|
memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
|
|
break;
|
|
default: abort();
|
|
}
|
|
|
|
if (params->host_mutable || params->initial_data) {
|
|
bufFlags |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
|
|
align = MP_ALIGN_UP(align, vk->limits.optimalBufferCopyOffsetAlignment);
|
|
}
|
|
|
|
if (params->host_mapped) {
|
|
memFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
|
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
|
|
VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
|
|
}
|
|
|
|
if (!vk_malloc_buffer(vk, bufFlags, memFlags, params->size, align,
|
|
&buf_vk->slice))
|
|
{
|
|
goto error;
|
|
}
|
|
|
|
if (params->host_mapped)
|
|
buf->data = buf_vk->slice.data;
|
|
|
|
if (params->initial_data)
|
|
vk_buf_update(ra, buf, 0, params->initial_data, params->size);
|
|
|
|
buf->params.initial_data = NULL; // do this after vk_buf_update
|
|
return buf;
|
|
|
|
error:
|
|
vk_buf_destroy(ra, buf);
|
|
return NULL;
|
|
}
|
|
|
|
static bool vk_buf_poll(struct ra *ra, struct ra_buf *buf)
|
|
{
|
|
struct ra_buf_vk *buf_vk = buf->priv;
|
|
return buf_vk->refcount == 1;
|
|
}
|
|
|
|
static bool vk_tex_upload(struct ra *ra,
|
|
const struct ra_tex_upload_params *params)
|
|
{
|
|
struct ra_tex *tex = params->tex;
|
|
struct ra_tex_vk *tex_vk = tex->priv;
|
|
|
|
if (!params->buf)
|
|
return ra_tex_upload_pbo(ra, &tex_vk->pbo, params);
|
|
|
|
assert(!params->src);
|
|
assert(params->buf);
|
|
struct ra_buf *buf = params->buf;
|
|
struct ra_buf_vk *buf_vk = buf->priv;
|
|
|
|
VkBufferImageCopy region = {
|
|
.bufferOffset = buf_vk->slice.mem.offset + params->buf_offset,
|
|
.bufferRowLength = tex->params.w,
|
|
.bufferImageHeight = tex->params.h,
|
|
.imageSubresource = vk_layers,
|
|
.imageExtent = (VkExtent3D){tex->params.w, tex->params.h, tex->params.d},
|
|
};
|
|
|
|
if (tex->params.dimensions == 2) {
|
|
int pix_size = tex->params.format->pixel_size;
|
|
region.bufferRowLength = params->stride / pix_size;
|
|
if (region.bufferRowLength * pix_size != params->stride) {
|
|
MP_ERR(ra, "Texture upload strides must be a multiple of the texel "
|
|
"size!\n");
|
|
goto error;
|
|
}
|
|
|
|
if (params->rc) {
|
|
struct mp_rect *rc = params->rc;
|
|
region.imageOffset = (VkOffset3D){rc->x0, rc->y0, 0};
|
|
region.imageExtent = (VkExtent3D){mp_rect_w(*rc), mp_rect_h(*rc), 1};
|
|
}
|
|
}
|
|
|
|
uint64_t size = region.bufferRowLength * region.bufferImageHeight *
|
|
region.imageExtent.depth;
|
|
|
|
struct vk_cmd *cmd = vk_require_cmd(ra, tex_vk->upload_queue);
|
|
if (!cmd)
|
|
goto error;
|
|
|
|
buf_barrier(ra, cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
|
VK_ACCESS_TRANSFER_READ_BIT, region.bufferOffset, size);
|
|
|
|
tex_barrier(ra, cmd, tex, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
|
VK_ACCESS_TRANSFER_WRITE_BIT,
|
|
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
|
params->invalidate);
|
|
|
|
vkCmdCopyBufferToImage(cmd->buf, buf_vk->slice.buf, tex_vk->img,
|
|
tex_vk->current_layout, 1, ®ion);
|
|
|
|
tex_signal(ra, cmd, tex, VK_PIPELINE_STAGE_TRANSFER_BIT);
|
|
|
|
return true;
|
|
|
|
error:
|
|
return false;
|
|
}
|
|
|
|
#define MPVK_NUM_DS MPVK_MAX_STREAMING_DEPTH
|
|
|
|
// For ra_renderpass.priv
|
|
struct ra_renderpass_vk {
|
|
// Pipeline / render pass
|
|
VkPipeline pipe;
|
|
VkPipelineLayout pipeLayout;
|
|
VkRenderPass renderPass;
|
|
VkImageLayout initialLayout;
|
|
VkImageLayout finalLayout;
|
|
// Descriptor set (bindings)
|
|
VkDescriptorSetLayout dsLayout;
|
|
VkDescriptorPool dsPool;
|
|
VkDescriptorSet dss[MPVK_NUM_DS];
|
|
int dindex;
|
|
// Vertex buffers (vertices)
|
|
struct ra_buf_pool vbo;
|
|
|
|
// For updating
|
|
VkWriteDescriptorSet *dswrite;
|
|
VkDescriptorImageInfo *dsiinfo;
|
|
VkDescriptorBufferInfo *dsbinfo;
|
|
};
|
|
|
|
static void vk_renderpass_destroy(struct ra *ra, struct ra_renderpass *pass)
|
|
{
|
|
if (!pass)
|
|
return;
|
|
|
|
struct mpvk_ctx *vk = ra_vk_get(ra);
|
|
struct ra_renderpass_vk *pass_vk = pass->priv;
|
|
|
|
ra_buf_pool_uninit(ra, &pass_vk->vbo);
|
|
vkDestroyPipeline(vk->dev, pass_vk->pipe, MPVK_ALLOCATOR);
|
|
vkDestroyRenderPass(vk->dev, pass_vk->renderPass, MPVK_ALLOCATOR);
|
|
vkDestroyPipelineLayout(vk->dev, pass_vk->pipeLayout, MPVK_ALLOCATOR);
|
|
vkDestroyDescriptorPool(vk->dev, pass_vk->dsPool, MPVK_ALLOCATOR);
|
|
vkDestroyDescriptorSetLayout(vk->dev, pass_vk->dsLayout, MPVK_ALLOCATOR);
|
|
|
|
talloc_free(pass);
|
|
}
|
|
|
|
MAKE_LAZY_DESTRUCTOR(vk_renderpass_destroy, struct ra_renderpass);
|
|
|
|
static const VkDescriptorType dsType[] = {
|
|
[RA_VARTYPE_TEX] = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
|
[RA_VARTYPE_IMG_W] = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
|
[RA_VARTYPE_BUF_RO] = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
|
[RA_VARTYPE_BUF_RW] = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
};
|
|
|
|
static bool vk_get_input_format(struct ra *ra, struct ra_renderpass_input *inp,
|
|
VkFormat *out_fmt)
|
|
{
|
|
struct mpvk_ctx *vk = ra_vk_get(ra);
|
|
|
|
enum ra_ctype ctype;
|
|
switch (inp->type) {
|
|
case RA_VARTYPE_FLOAT: ctype = RA_CTYPE_FLOAT; break;
|
|
case RA_VARTYPE_BYTE_UNORM: ctype = RA_CTYPE_UNORM; break;
|
|
default: abort();
|
|
}
|
|
|
|
assert(inp->dim_m == 1);
|
|
for (const struct vk_format *fmt = vk_formats; fmt->name; fmt++) {
|
|
if (fmt->ctype != ctype)
|
|
continue;
|
|
if (fmt->components != inp->dim_v)
|
|
continue;
|
|
if (fmt->bytes != ra_renderpass_input_layout(inp).size)
|
|
continue;
|
|
|
|
// Ensure this format is valid for vertex attributes
|
|
VkFormatProperties prop;
|
|
vkGetPhysicalDeviceFormatProperties(vk->physd, fmt->iformat, &prop);
|
|
if (!(prop.bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT))
|
|
continue;
|
|
|
|
*out_fmt = fmt->iformat;
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static const char vk_cache_magic[4] = {'R','A','V','K'};
|
|
static const int vk_cache_version = 2;
|
|
|
|
struct vk_cache_header {
|
|
char magic[sizeof(vk_cache_magic)];
|
|
int cache_version;
|
|
char compiler[SPIRV_NAME_MAX_LEN];
|
|
int compiler_version;
|
|
size_t vert_spirv_len;
|
|
size_t frag_spirv_len;
|
|
size_t comp_spirv_len;
|
|
size_t pipecache_len;
|
|
};
|
|
|
|
static bool vk_use_cached_program(const struct ra_renderpass_params *params,
|
|
const struct spirv_compiler *spirv,
|
|
struct bstr *vert_spirv,
|
|
struct bstr *frag_spirv,
|
|
struct bstr *comp_spirv,
|
|
struct bstr *pipecache)
|
|
{
|
|
struct bstr cache = params->cached_program;
|
|
if (cache.len < sizeof(struct vk_cache_header))
|
|
return false;
|
|
|
|
struct vk_cache_header *header = (struct vk_cache_header *)cache.start;
|
|
cache = bstr_cut(cache, sizeof(*header));
|
|
|
|
if (strncmp(header->magic, vk_cache_magic, sizeof(vk_cache_magic)) != 0)
|
|
return false;
|
|
if (header->cache_version != vk_cache_version)
|
|
return false;
|
|
if (strncmp(header->compiler, spirv->name, sizeof(header->compiler)) != 0)
|
|
return false;
|
|
if (header->compiler_version != spirv->compiler_version)
|
|
return false;
|
|
|
|
#define GET(ptr) \
|
|
if (cache.len < header->ptr##_len) \
|
|
return false; \
|
|
*ptr = bstr_splice(cache, 0, header->ptr##_len); \
|
|
cache = bstr_cut(cache, ptr->len);
|
|
|
|
GET(vert_spirv);
|
|
GET(frag_spirv);
|
|
GET(comp_spirv);
|
|
GET(pipecache);
|
|
return true;
|
|
}
|
|
|
|
static VkResult vk_compile_glsl(struct ra *ra, void *tactx,
|
|
enum glsl_shader type, const char *glsl,
|
|
struct bstr *spirv)
|
|
{
|
|
struct mpvk_ctx *vk = ra_vk_get(ra);
|
|
VkResult ret = VK_SUCCESS;
|
|
int msgl = MSGL_DEBUG;
|
|
|
|
if (!vk->spirv->fns->compile_glsl(vk->spirv, tactx, type, glsl, spirv)) {
|
|
ret = VK_ERROR_INVALID_SHADER_NV;
|
|
msgl = MSGL_ERR;
|
|
}
|
|
|
|
static const char *shader_names[] = {
|
|
[GLSL_SHADER_VERTEX] = "vertex",
|
|
[GLSL_SHADER_FRAGMENT] = "fragment",
|
|
[GLSL_SHADER_COMPUTE] = "compute",
|
|
};
|
|
|
|
if (mp_msg_test(ra->log, msgl)) {
|
|
MP_MSG(ra, msgl, "%s shader source:\n", shader_names[type]);
|
|
mp_log_source(ra->log, msgl, glsl);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
static const VkShaderStageFlags stageFlags[] = {
|
|
[RA_RENDERPASS_TYPE_RASTER] = VK_SHADER_STAGE_FRAGMENT_BIT,
|
|
[RA_RENDERPASS_TYPE_COMPUTE] = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
};
|
|
|
|
static struct ra_renderpass *vk_renderpass_create(struct ra *ra,
|
|
const struct ra_renderpass_params *params)
|
|
{
|
|
struct mpvk_ctx *vk = ra_vk_get(ra);
|
|
bool success = false;
|
|
assert(vk->spirv);
|
|
|
|
struct ra_renderpass *pass = talloc_zero(NULL, struct ra_renderpass);
|
|
pass->params = *ra_renderpass_params_copy(pass, params);
|
|
pass->params.cached_program = (bstr){0};
|
|
struct ra_renderpass_vk *pass_vk = pass->priv =
|
|
talloc_zero(pass, struct ra_renderpass_vk);
|
|
|
|
// temporary allocations/objects
|
|
void *tmp = talloc_new(NULL);
|
|
VkPipelineCache pipeCache = NULL;
|
|
VkShaderModule vert_shader = NULL;
|
|
VkShaderModule frag_shader = NULL;
|
|
VkShaderModule comp_shader = NULL;
|
|
|
|
static int dsCount[RA_VARTYPE_COUNT] = {0};
|
|
VkDescriptorSetLayoutBinding *bindings = NULL;
|
|
int num_bindings = 0;
|
|
|
|
for (int i = 0; i < params->num_inputs; i++) {
|
|
struct ra_renderpass_input *inp = ¶ms->inputs[i];
|
|
switch (inp->type) {
|
|
case RA_VARTYPE_TEX:
|
|
case RA_VARTYPE_IMG_W:
|
|
case RA_VARTYPE_BUF_RO:
|
|
case RA_VARTYPE_BUF_RW: {
|
|
VkDescriptorSetLayoutBinding desc = {
|
|
.binding = inp->binding,
|
|
.descriptorType = dsType[inp->type],
|
|
.descriptorCount = 1,
|
|
.stageFlags = stageFlags[params->type],
|
|
};
|
|
|
|
MP_TARRAY_APPEND(tmp, bindings, num_bindings, desc);
|
|
dsCount[inp->type]++;
|
|
break;
|
|
}
|
|
default: abort();
|
|
}
|
|
}
|
|
|
|
VkDescriptorPoolSize *dsPoolSizes = NULL;
|
|
int poolSizeCount = 0;
|
|
|
|
for (enum ra_vartype t = 0; t < RA_VARTYPE_COUNT; t++) {
|
|
if (dsCount[t] > 0) {
|
|
VkDescriptorPoolSize dssize = {
|
|
.type = dsType[t],
|
|
.descriptorCount = dsCount[t] * MPVK_NUM_DS,
|
|
};
|
|
|
|
MP_TARRAY_APPEND(tmp, dsPoolSizes, poolSizeCount, dssize);
|
|
}
|
|
}
|
|
|
|
VkDescriptorPoolCreateInfo pinfo = {
|
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
|
|
.maxSets = MPVK_NUM_DS,
|
|
.pPoolSizes = dsPoolSizes,
|
|
.poolSizeCount = poolSizeCount,
|
|
};
|
|
|
|
VK(vkCreateDescriptorPool(vk->dev, &pinfo, MPVK_ALLOCATOR, &pass_vk->dsPool));
|
|
|
|
pass_vk->dswrite = talloc_array(pass, VkWriteDescriptorSet, num_bindings);
|
|
pass_vk->dsiinfo = talloc_array(pass, VkDescriptorImageInfo, num_bindings);
|
|
pass_vk->dsbinfo = talloc_array(pass, VkDescriptorBufferInfo, num_bindings);
|
|
|
|
VkDescriptorSetLayoutCreateInfo dinfo = {
|
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
|
.pBindings = bindings,
|
|
.bindingCount = num_bindings,
|
|
};
|
|
|
|
VK(vkCreateDescriptorSetLayout(vk->dev, &dinfo, MPVK_ALLOCATOR,
|
|
&pass_vk->dsLayout));
|
|
|
|
VkDescriptorSetLayout layouts[MPVK_NUM_DS];
|
|
for (int i = 0; i < MPVK_NUM_DS; i++)
|
|
layouts[i] = pass_vk->dsLayout;
|
|
|
|
VkDescriptorSetAllocateInfo ainfo = {
|
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
|
|
.descriptorPool = pass_vk->dsPool,
|
|
.descriptorSetCount = MPVK_NUM_DS,
|
|
.pSetLayouts = layouts,
|
|
};
|
|
|
|
VK(vkAllocateDescriptorSets(vk->dev, &ainfo, pass_vk->dss));
|
|
|
|
VkPipelineLayoutCreateInfo linfo = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
|
.setLayoutCount = 1,
|
|
.pSetLayouts = &pass_vk->dsLayout,
|
|
.pushConstantRangeCount = params->push_constants_size ? 1 : 0,
|
|
.pPushConstantRanges = &(VkPushConstantRange){
|
|
.stageFlags = stageFlags[params->type],
|
|
.offset = 0,
|
|
.size = params->push_constants_size,
|
|
},
|
|
};
|
|
|
|
VK(vkCreatePipelineLayout(vk->dev, &linfo, MPVK_ALLOCATOR,
|
|
&pass_vk->pipeLayout));
|
|
|
|
struct bstr vert = {0}, frag = {0}, comp = {0}, pipecache = {0};
|
|
if (vk_use_cached_program(params, vk->spirv, &vert, &frag, &comp, &pipecache)) {
|
|
MP_VERBOSE(ra, "Using cached SPIR-V and VkPipeline.\n");
|
|
} else {
|
|
pipecache.len = 0;
|
|
switch (params->type) {
|
|
case RA_RENDERPASS_TYPE_RASTER:
|
|
VK(vk_compile_glsl(ra, tmp, GLSL_SHADER_VERTEX,
|
|
params->vertex_shader, &vert));
|
|
VK(vk_compile_glsl(ra, tmp, GLSL_SHADER_FRAGMENT,
|
|
params->frag_shader, &frag));
|
|
comp.len = 0;
|
|
break;
|
|
case RA_RENDERPASS_TYPE_COMPUTE:
|
|
VK(vk_compile_glsl(ra, tmp, GLSL_SHADER_COMPUTE,
|
|
params->compute_shader, &comp));
|
|
frag.len = 0;
|
|
vert.len = 0;
|
|
break;
|
|
}
|
|
}
|
|
|
|
VkPipelineCacheCreateInfo pcinfo = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO,
|
|
.pInitialData = pipecache.start,
|
|
.initialDataSize = pipecache.len,
|
|
};
|
|
|
|
VK(vkCreatePipelineCache(vk->dev, &pcinfo, MPVK_ALLOCATOR, &pipeCache));
|
|
|
|
VkShaderModuleCreateInfo sinfo = {
|
|
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
|
|
};
|
|
|
|
switch (params->type) {
|
|
case RA_RENDERPASS_TYPE_RASTER: {
|
|
sinfo.pCode = (uint32_t *)vert.start;
|
|
sinfo.codeSize = vert.len;
|
|
VK(vkCreateShaderModule(vk->dev, &sinfo, MPVK_ALLOCATOR, &vert_shader));
|
|
|
|
sinfo.pCode = (uint32_t *)frag.start;
|
|
sinfo.codeSize = frag.len;
|
|
VK(vkCreateShaderModule(vk->dev, &sinfo, MPVK_ALLOCATOR, &frag_shader));
|
|
|
|
VkVertexInputAttributeDescription *attrs = talloc_array(tmp,
|
|
VkVertexInputAttributeDescription, params->num_vertex_attribs);
|
|
|
|
for (int i = 0; i < params->num_vertex_attribs; i++) {
|
|
struct ra_renderpass_input *inp = ¶ms->vertex_attribs[i];
|
|
attrs[i] = (VkVertexInputAttributeDescription) {
|
|
.location = i,
|
|
.binding = 0,
|
|
.offset = inp->offset,
|
|
};
|
|
|
|
if (!vk_get_input_format(ra, inp, &attrs[i].format)) {
|
|
MP_ERR(ra, "No suitable VkFormat for vertex attrib '%s'!\n",
|
|
inp->name);
|
|
goto error;
|
|
}
|
|
}
|
|
|
|
// This is the most common case, so optimize towards it. In this case,
|
|
// the renderpass will take care of almost all layout transitions
|
|
pass_vk->initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
|
|
pass_vk->finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
|
|
VkAttachmentLoadOp loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
|
|
|
|
// If we're blending, then we need to explicitly load the previous
|
|
// contents of the color attachment
|
|
if (pass->params.enable_blend)
|
|
loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
|
|
|
|
// If we're invalidating the target, we don't need to load or transition
|
|
if (pass->params.invalidate_target) {
|
|
pass_vk->initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
|
loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
|
|
}
|
|
|
|
VK(vk_create_render_pass(vk->dev, params->target_format, loadOp,
|
|
pass_vk->initialLayout, pass_vk->finalLayout,
|
|
&pass_vk->renderPass));
|
|
|
|
static const VkBlendFactor blendFactors[] = {
|
|
[RA_BLEND_ZERO] = VK_BLEND_FACTOR_ZERO,
|
|
[RA_BLEND_ONE] = VK_BLEND_FACTOR_ONE,
|
|
[RA_BLEND_SRC_ALPHA] = VK_BLEND_FACTOR_SRC_ALPHA,
|
|
[RA_BLEND_ONE_MINUS_SRC_ALPHA] = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
|
|
};
|
|
|
|
VkGraphicsPipelineCreateInfo cinfo = {
|
|
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
|
|
.stageCount = 2,
|
|
.pStages = (VkPipelineShaderStageCreateInfo[]) {
|
|
{
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
|
.stage = VK_SHADER_STAGE_VERTEX_BIT,
|
|
.module = vert_shader,
|
|
.pName = "main",
|
|
}, {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
|
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
|
|
.module = frag_shader,
|
|
.pName = "main",
|
|
}
|
|
},
|
|
.pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
|
|
.vertexBindingDescriptionCount = 1,
|
|
.pVertexBindingDescriptions = &(VkVertexInputBindingDescription) {
|
|
.binding = 0,
|
|
.stride = params->vertex_stride,
|
|
.inputRate = VK_VERTEX_INPUT_RATE_VERTEX,
|
|
},
|
|
.vertexAttributeDescriptionCount = params->num_vertex_attribs,
|
|
.pVertexAttributeDescriptions = attrs,
|
|
},
|
|
.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
|
|
.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
|
|
},
|
|
.pViewportState = &(VkPipelineViewportStateCreateInfo) {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
|
|
.viewportCount = 1,
|
|
.scissorCount = 1,
|
|
},
|
|
.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
|
|
.polygonMode = VK_POLYGON_MODE_FILL,
|
|
.cullMode = VK_CULL_MODE_NONE,
|
|
.lineWidth = 1.0f,
|
|
},
|
|
.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
|
|
.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
|
|
},
|
|
.pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
|
|
.attachmentCount = 1,
|
|
.pAttachments = &(VkPipelineColorBlendAttachmentState) {
|
|
.blendEnable = params->enable_blend,
|
|
.colorBlendOp = VK_BLEND_OP_ADD,
|
|
.srcColorBlendFactor = blendFactors[params->blend_src_rgb],
|
|
.dstColorBlendFactor = blendFactors[params->blend_dst_rgb],
|
|
.alphaBlendOp = VK_BLEND_OP_ADD,
|
|
.srcAlphaBlendFactor = blendFactors[params->blend_src_alpha],
|
|
.dstAlphaBlendFactor = blendFactors[params->blend_dst_alpha],
|
|
.colorWriteMask = VK_COLOR_COMPONENT_R_BIT |
|
|
VK_COLOR_COMPONENT_G_BIT |
|
|
VK_COLOR_COMPONENT_B_BIT |
|
|
VK_COLOR_COMPONENT_A_BIT,
|
|
},
|
|
},
|
|
.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
|
|
.dynamicStateCount = 2,
|
|
.pDynamicStates = (VkDynamicState[]){
|
|
VK_DYNAMIC_STATE_VIEWPORT,
|
|
VK_DYNAMIC_STATE_SCISSOR,
|
|
},
|
|
},
|
|
.layout = pass_vk->pipeLayout,
|
|
.renderPass = pass_vk->renderPass,
|
|
};
|
|
|
|
VK(vkCreateGraphicsPipelines(vk->dev, pipeCache, 1, &cinfo,
|
|
MPVK_ALLOCATOR, &pass_vk->pipe));
|
|
break;
|
|
}
|
|
case RA_RENDERPASS_TYPE_COMPUTE: {
|
|
sinfo.pCode = (uint32_t *)comp.start;
|
|
sinfo.codeSize = comp.len;
|
|
VK(vkCreateShaderModule(vk->dev, &sinfo, MPVK_ALLOCATOR, &comp_shader));
|
|
|
|
VkComputePipelineCreateInfo cinfo = {
|
|
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
|
.stage = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
|
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
.module = comp_shader,
|
|
.pName = "main",
|
|
},
|
|
.layout = pass_vk->pipeLayout,
|
|
};
|
|
|
|
VK(vkCreateComputePipelines(vk->dev, pipeCache, 1, &cinfo,
|
|
MPVK_ALLOCATOR, &pass_vk->pipe));
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Update params->cached_program
|
|
struct bstr cache = {0};
|
|
VK(vkGetPipelineCacheData(vk->dev, pipeCache, &cache.len, NULL));
|
|
cache.start = talloc_size(tmp, cache.len);
|
|
VK(vkGetPipelineCacheData(vk->dev, pipeCache, &cache.len, cache.start));
|
|
|
|
struct vk_cache_header header = {
|
|
.cache_version = vk_cache_version,
|
|
.compiler_version = vk->spirv->compiler_version,
|
|
.vert_spirv_len = vert.len,
|
|
.frag_spirv_len = frag.len,
|
|
.comp_spirv_len = comp.len,
|
|
.pipecache_len = cache.len,
|
|
};
|
|
|
|
for (int i = 0; i < MP_ARRAY_SIZE(header.magic); i++)
|
|
header.magic[i] = vk_cache_magic[i];
|
|
for (int i = 0; i < sizeof(vk->spirv->name); i++)
|
|
header.compiler[i] = vk->spirv->name[i];
|
|
|
|
struct bstr *prog = &pass->params.cached_program;
|
|
bstr_xappend(pass, prog, (struct bstr){ (char *) &header, sizeof(header) });
|
|
bstr_xappend(pass, prog, vert);
|
|
bstr_xappend(pass, prog, frag);
|
|
bstr_xappend(pass, prog, comp);
|
|
bstr_xappend(pass, prog, cache);
|
|
|
|
success = true;
|
|
|
|
error:
|
|
if (!success) {
|
|
vk_renderpass_destroy(ra, pass);
|
|
pass = NULL;
|
|
}
|
|
|
|
vkDestroyShaderModule(vk->dev, vert_shader, MPVK_ALLOCATOR);
|
|
vkDestroyShaderModule(vk->dev, frag_shader, MPVK_ALLOCATOR);
|
|
vkDestroyShaderModule(vk->dev, comp_shader, MPVK_ALLOCATOR);
|
|
vkDestroyPipelineCache(vk->dev, pipeCache, MPVK_ALLOCATOR);
|
|
talloc_free(tmp);
|
|
return pass;
|
|
}
|
|
|
|
static const VkPipelineStageFlags passStages[] = {
|
|
[RA_RENDERPASS_TYPE_RASTER] = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
|
|
[RA_RENDERPASS_TYPE_COMPUTE] = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
|
};
|
|
|
|
static void vk_update_descriptor(struct ra *ra, struct vk_cmd *cmd,
|
|
struct ra_renderpass *pass,
|
|
struct ra_renderpass_input_val val,
|
|
VkDescriptorSet ds, int idx)
|
|
{
|
|
struct ra_renderpass_vk *pass_vk = pass->priv;
|
|
struct ra_renderpass_input *inp = &pass->params.inputs[val.index];
|
|
|
|
VkWriteDescriptorSet *wds = &pass_vk->dswrite[idx];
|
|
*wds = (VkWriteDescriptorSet) {
|
|
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
|
.dstSet = ds,
|
|
.dstBinding = inp->binding,
|
|
.descriptorCount = 1,
|
|
.descriptorType = dsType[inp->type],
|
|
};
|
|
|
|
switch (inp->type) {
|
|
case RA_VARTYPE_TEX: {
|
|
struct ra_tex *tex = *(struct ra_tex **)val.data;
|
|
struct ra_tex_vk *tex_vk = tex->priv;
|
|
|
|
assert(tex->params.render_src);
|
|
tex_barrier(ra, cmd, tex, passStages[pass->params.type],
|
|
VK_ACCESS_SHADER_READ_BIT,
|
|
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, false);
|
|
|
|
VkDescriptorImageInfo *iinfo = &pass_vk->dsiinfo[idx];
|
|
*iinfo = (VkDescriptorImageInfo) {
|
|
.sampler = tex_vk->sampler,
|
|
.imageView = tex_vk->view,
|
|
.imageLayout = tex_vk->current_layout,
|
|
};
|
|
|
|
wds->pImageInfo = iinfo;
|
|
break;
|
|
}
|
|
case RA_VARTYPE_IMG_W: {
|
|
struct ra_tex *tex = *(struct ra_tex **)val.data;
|
|
struct ra_tex_vk *tex_vk = tex->priv;
|
|
|
|
assert(tex->params.storage_dst);
|
|
tex_barrier(ra, cmd, tex, passStages[pass->params.type],
|
|
VK_ACCESS_SHADER_WRITE_BIT,
|
|
VK_IMAGE_LAYOUT_GENERAL, false);
|
|
|
|
VkDescriptorImageInfo *iinfo = &pass_vk->dsiinfo[idx];
|
|
*iinfo = (VkDescriptorImageInfo) {
|
|
.imageView = tex_vk->view,
|
|
.imageLayout = tex_vk->current_layout,
|
|
};
|
|
|
|
wds->pImageInfo = iinfo;
|
|
break;
|
|
}
|
|
case RA_VARTYPE_BUF_RO:
|
|
case RA_VARTYPE_BUF_RW: {
|
|
struct ra_buf *buf = *(struct ra_buf **)val.data;
|
|
struct ra_buf_vk *buf_vk = buf->priv;
|
|
|
|
VkBufferUsageFlags access = VK_ACCESS_SHADER_READ_BIT;
|
|
if (inp->type == RA_VARTYPE_BUF_RW)
|
|
access |= VK_ACCESS_SHADER_WRITE_BIT;
|
|
|
|
buf_barrier(ra, cmd, buf, passStages[pass->params.type],
|
|
access, buf_vk->slice.mem.offset, buf->params.size);
|
|
|
|
VkDescriptorBufferInfo *binfo = &pass_vk->dsbinfo[idx];
|
|
*binfo = (VkDescriptorBufferInfo) {
|
|
.buffer = buf_vk->slice.buf,
|
|
.offset = buf_vk->slice.mem.offset,
|
|
.range = buf->params.size,
|
|
};
|
|
|
|
wds->pBufferInfo = binfo;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void vk_release_descriptor(struct ra *ra, struct vk_cmd *cmd,
|
|
struct ra_renderpass *pass,
|
|
struct ra_renderpass_input_val val)
|
|
{
|
|
struct ra_renderpass_input *inp = &pass->params.inputs[val.index];
|
|
|
|
switch (inp->type) {
|
|
case RA_VARTYPE_IMG_W:
|
|
case RA_VARTYPE_TEX: {
|
|
struct ra_tex *tex = *(struct ra_tex **)val.data;
|
|
tex_signal(ra, cmd, tex, passStages[pass->params.type]);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void vk_renderpass_run(struct ra *ra,
|
|
const struct ra_renderpass_run_params *params)
|
|
{
|
|
struct mpvk_ctx *vk = ra_vk_get(ra);
|
|
struct ra_renderpass *pass = params->pass;
|
|
struct ra_renderpass_vk *pass_vk = pass->priv;
|
|
|
|
static const enum queue_type types[] = {
|
|
[RA_RENDERPASS_TYPE_RASTER] = GRAPHICS,
|
|
[RA_RENDERPASS_TYPE_COMPUTE] = COMPUTE,
|
|
};
|
|
|
|
struct vk_cmd *cmd = vk_require_cmd(ra, types[pass->params.type]);
|
|
if (!cmd)
|
|
goto error;
|
|
|
|
static const VkPipelineBindPoint bindPoint[] = {
|
|
[RA_RENDERPASS_TYPE_RASTER] = VK_PIPELINE_BIND_POINT_GRAPHICS,
|
|
[RA_RENDERPASS_TYPE_COMPUTE] = VK_PIPELINE_BIND_POINT_COMPUTE,
|
|
};
|
|
|
|
vkCmdBindPipeline(cmd->buf, bindPoint[pass->params.type], pass_vk->pipe);
|
|
|
|
VkDescriptorSet ds = pass_vk->dss[pass_vk->dindex++];
|
|
pass_vk->dindex %= MPVK_NUM_DS;
|
|
|
|
for (int i = 0; i < params->num_values; i++)
|
|
vk_update_descriptor(ra, cmd, pass, params->values[i], ds, i);
|
|
|
|
if (params->num_values > 0) {
|
|
vkUpdateDescriptorSets(vk->dev, params->num_values, pass_vk->dswrite,
|
|
0, NULL);
|
|
}
|
|
|
|
vkCmdBindDescriptorSets(cmd->buf, bindPoint[pass->params.type],
|
|
pass_vk->pipeLayout, 0, 1, &ds, 0, NULL);
|
|
|
|
if (pass->params.push_constants_size) {
|
|
vkCmdPushConstants(cmd->buf, pass_vk->pipeLayout,
|
|
stageFlags[pass->params.type], 0,
|
|
pass->params.push_constants_size,
|
|
params->push_constants);
|
|
}
|
|
|
|
switch (pass->params.type) {
|
|
case RA_RENDERPASS_TYPE_COMPUTE:
|
|
vkCmdDispatch(cmd->buf, params->compute_groups[0],
|
|
params->compute_groups[1],
|
|
params->compute_groups[2]);
|
|
break;
|
|
case RA_RENDERPASS_TYPE_RASTER: {
|
|
struct ra_tex *tex = params->target;
|
|
struct ra_tex_vk *tex_vk = tex->priv;
|
|
assert(tex->params.render_dst);
|
|
|
|
struct ra_buf_params buf_params = {
|
|
.type = RA_BUF_TYPE_VERTEX,
|
|
.size = params->vertex_count * pass->params.vertex_stride,
|
|
.host_mutable = true,
|
|
};
|
|
|
|
struct ra_buf *buf = ra_buf_pool_get(ra, &pass_vk->vbo, &buf_params);
|
|
if (!buf) {
|
|
MP_ERR(ra, "Failed allocating vertex buffer!\n");
|
|
goto error;
|
|
}
|
|
struct ra_buf_vk *buf_vk = buf->priv;
|
|
|
|
vk_buf_update(ra, buf, 0, params->vertex_data, buf_params.size);
|
|
|
|
buf_barrier(ra, cmd, buf, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
|
|
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
|
|
buf_vk->slice.mem.offset, buf_params.size);
|
|
|
|
vkCmdBindVertexBuffers(cmd->buf, 0, 1, &buf_vk->slice.buf,
|
|
&buf_vk->slice.mem.offset);
|
|
|
|
tex_barrier(ra, cmd, tex, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
|
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, pass_vk->initialLayout,
|
|
pass->params.invalidate_target);
|
|
|
|
VkViewport viewport = {
|
|
.x = params->viewport.x0,
|
|
.y = params->viewport.y0,
|
|
.width = mp_rect_w(params->viewport),
|
|
.height = mp_rect_h(params->viewport),
|
|
};
|
|
|
|
VkRect2D scissor = {
|
|
.offset = {params->scissors.x0, params->scissors.y0},
|
|
.extent = {mp_rect_w(params->scissors), mp_rect_h(params->scissors)},
|
|
};
|
|
|
|
vkCmdSetViewport(cmd->buf, 0, 1, &viewport);
|
|
vkCmdSetScissor(cmd->buf, 0, 1, &scissor);
|
|
|
|
VkRenderPassBeginInfo binfo = {
|
|
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
|
|
.renderPass = pass_vk->renderPass,
|
|
.framebuffer = tex_vk->framebuffer,
|
|
.renderArea = (VkRect2D){{0, 0}, {tex->params.w, tex->params.h}},
|
|
};
|
|
|
|
vkCmdBeginRenderPass(cmd->buf, &binfo, VK_SUBPASS_CONTENTS_INLINE);
|
|
vkCmdDraw(cmd->buf, params->vertex_count, 1, 0, 0);
|
|
vkCmdEndRenderPass(cmd->buf);
|
|
|
|
// The renderPass implicitly transitions the texture to this layout
|
|
tex_vk->current_layout = pass_vk->finalLayout;
|
|
tex_vk->current_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
|
tex_signal(ra, cmd, tex, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT);
|
|
break;
|
|
}
|
|
default: abort();
|
|
};
|
|
|
|
for (int i = 0; i < params->num_values; i++)
|
|
vk_release_descriptor(ra, cmd, pass, params->values[i]);
|
|
|
|
// flush the work so far into its own command buffer, for better cross-frame
|
|
// granularity
|
|
vk_submit(ra);
|
|
|
|
error:
|
|
return;
|
|
}
|
|
|
|
static void vk_blit(struct ra *ra, struct ra_tex *dst, struct ra_tex *src,
|
|
struct mp_rect *dst_rc, struct mp_rect *src_rc)
|
|
{
|
|
assert(src->params.blit_src);
|
|
assert(dst->params.blit_dst);
|
|
|
|
struct ra_tex_vk *src_vk = src->priv;
|
|
struct ra_tex_vk *dst_vk = dst->priv;
|
|
|
|
struct vk_cmd *cmd = vk_require_cmd(ra, GRAPHICS);
|
|
if (!cmd)
|
|
return;
|
|
|
|
tex_barrier(ra, cmd, src, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
|
VK_ACCESS_TRANSFER_READ_BIT,
|
|
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
|
false);
|
|
|
|
bool discard = dst_rc->x0 == 0 &&
|
|
dst_rc->y0 == 0 &&
|
|
dst_rc->x1 == dst->params.w &&
|
|
dst_rc->y1 == dst->params.h;
|
|
|
|
tex_barrier(ra, cmd, dst, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
|
VK_ACCESS_TRANSFER_WRITE_BIT,
|
|
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
|
discard);
|
|
|
|
// Under certain conditions we can use vkCmdCopyImage instead of
|
|
// vkCmdBlitImage, namely when the blit operation does not require
|
|
// scaling. and the formats are compatible.
|
|
if (src->params.format->pixel_size == dst->params.format->pixel_size &&
|
|
mp_rect_w(*src_rc) == mp_rect_w(*dst_rc) &&
|
|
mp_rect_h(*src_rc) == mp_rect_h(*dst_rc) &&
|
|
mp_rect_w(*src_rc) >= 0 && mp_rect_h(*src_rc) >= 0)
|
|
{
|
|
VkImageCopy region = {
|
|
.srcSubresource = vk_layers,
|
|
.dstSubresource = vk_layers,
|
|
.srcOffset = {src_rc->x0, src_rc->y0, 0},
|
|
.dstOffset = {dst_rc->x0, dst_rc->y0, 0},
|
|
.extent = {mp_rect_w(*src_rc), mp_rect_h(*src_rc), 1},
|
|
};
|
|
|
|
vkCmdCopyImage(cmd->buf, src_vk->img, src_vk->current_layout,
|
|
dst_vk->img, dst_vk->current_layout, 1, ®ion);
|
|
} else {
|
|
VkImageBlit region = {
|
|
.srcSubresource = vk_layers,
|
|
.dstSubresource = vk_layers,
|
|
.srcOffsets = {{src_rc->x0, src_rc->y0, 0},
|
|
{src_rc->x1, src_rc->y1, 1}},
|
|
.dstOffsets = {{dst_rc->x0, dst_rc->y0, 0},
|
|
{dst_rc->x1, dst_rc->y1, 1}},
|
|
};
|
|
|
|
vkCmdBlitImage(cmd->buf, src_vk->img, src_vk->current_layout,
|
|
dst_vk->img, dst_vk->current_layout, 1, ®ion,
|
|
VK_FILTER_NEAREST);
|
|
}
|
|
|
|
tex_signal(ra, cmd, src, VK_PIPELINE_STAGE_TRANSFER_BIT);
|
|
tex_signal(ra, cmd, dst, VK_PIPELINE_STAGE_TRANSFER_BIT);
|
|
}
|
|
|
|
static void vk_clear(struct ra *ra, struct ra_tex *tex, float color[4],
|
|
struct mp_rect *rc)
|
|
{
|
|
struct ra_vk *p = ra->priv;
|
|
struct ra_tex_vk *tex_vk = tex->priv;
|
|
assert(tex->params.blit_dst);
|
|
|
|
struct vk_cmd *cmd = vk_require_cmd(ra, GRAPHICS);
|
|
if (!cmd)
|
|
return;
|
|
|
|
struct mp_rect full = {0, 0, tex->params.w, tex->params.h};
|
|
if (!rc || mp_rect_equals(rc, &full)) {
|
|
// To clear the entire image, we can use the efficient clear command
|
|
tex_barrier(ra, cmd, tex, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
|
VK_ACCESS_TRANSFER_WRITE_BIT,
|
|
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, true);
|
|
|
|
VkClearColorValue clearColor = {0};
|
|
for (int c = 0; c < 4; c++)
|
|
clearColor.float32[c] = color[c];
|
|
|
|
vkCmdClearColorImage(cmd->buf, tex_vk->img, tex_vk->current_layout,
|
|
&clearColor, 1, &vk_range);
|
|
|
|
tex_signal(ra, cmd, tex, VK_PIPELINE_STAGE_TRANSFER_BIT);
|
|
} else {
|
|
// To simulate per-region clearing, we blit from a 1x1 texture instead
|
|
struct ra_tex_upload_params ul_params = {
|
|
.tex = p->clear_tex,
|
|
.invalidate = true,
|
|
.src = &color[0],
|
|
};
|
|
vk_tex_upload(ra, &ul_params);
|
|
vk_blit(ra, tex, p->clear_tex, rc, &(struct mp_rect){0, 0, 1, 1});
|
|
}
|
|
}
|
|
|
|
static int vk_desc_namespace(enum ra_vartype type)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
#define VK_QUERY_POOL_SIZE (MPVK_MAX_STREAMING_DEPTH * 4)
|
|
|
|
struct vk_timer {
|
|
VkQueryPool pool;
|
|
int index;
|
|
uint64_t result;
|
|
};
|
|
|
|
static void vk_timer_destroy(struct ra *ra, ra_timer *ratimer)
|
|
{
|
|
if (!ratimer)
|
|
return;
|
|
|
|
struct mpvk_ctx *vk = ra_vk_get(ra);
|
|
struct vk_timer *timer = ratimer;
|
|
|
|
vkDestroyQueryPool(vk->dev, timer->pool, MPVK_ALLOCATOR);
|
|
|
|
talloc_free(timer);
|
|
}
|
|
|
|
MAKE_LAZY_DESTRUCTOR(vk_timer_destroy, ra_timer);
|
|
|
|
static ra_timer *vk_timer_create(struct ra *ra)
|
|
{
|
|
struct mpvk_ctx *vk = ra_vk_get(ra);
|
|
|
|
struct vk_timer *timer = talloc_zero(NULL, struct vk_timer);
|
|
|
|
struct VkQueryPoolCreateInfo qinfo = {
|
|
.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
|
|
.queryType = VK_QUERY_TYPE_TIMESTAMP,
|
|
.queryCount = VK_QUERY_POOL_SIZE,
|
|
};
|
|
|
|
VK(vkCreateQueryPool(vk->dev, &qinfo, MPVK_ALLOCATOR, &timer->pool));
|
|
|
|
return (ra_timer *)timer;
|
|
|
|
error:
|
|
vk_timer_destroy(ra, timer);
|
|
return NULL;
|
|
}
|
|
|
|
static void vk_timer_record(struct ra *ra, VkQueryPool pool, int index,
|
|
VkPipelineStageFlags stage)
|
|
{
|
|
struct vk_cmd *cmd = vk_require_cmd(ra, GRAPHICS);
|
|
if (!cmd)
|
|
return;
|
|
|
|
vkCmdWriteTimestamp(cmd->buf, stage, pool, index);
|
|
}
|
|
|
|
static void vk_timer_start(struct ra *ra, ra_timer *ratimer)
|
|
{
|
|
struct mpvk_ctx *vk = ra_vk_get(ra);
|
|
struct vk_timer *timer = ratimer;
|
|
|
|
timer->index = (timer->index + 2) % VK_QUERY_POOL_SIZE;
|
|
|
|
uint64_t out[2];
|
|
VkResult res = vkGetQueryPoolResults(vk->dev, timer->pool, timer->index, 2,
|
|
sizeof(out), &out[0], sizeof(uint64_t),
|
|
VK_QUERY_RESULT_64_BIT);
|
|
switch (res) {
|
|
case VK_SUCCESS:
|
|
timer->result = (out[1] - out[0]) * vk->limits.timestampPeriod;
|
|
break;
|
|
case VK_NOT_READY:
|
|
timer->result = 0;
|
|
break;
|
|
default:
|
|
MP_WARN(vk, "Failed reading timer query result: %s\n", vk_err(res));
|
|
return;
|
|
};
|
|
|
|
vk_timer_record(ra, timer->pool, timer->index,
|
|
VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
|
|
}
|
|
|
|
static uint64_t vk_timer_stop(struct ra *ra, ra_timer *ratimer)
|
|
{
|
|
struct vk_timer *timer = ratimer;
|
|
vk_timer_record(ra, timer->pool, timer->index + 1,
|
|
VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
|
|
|
|
return timer->result;
|
|
}
|
|
|
|
static struct ra_fns ra_fns_vk = {
|
|
.destroy = vk_destroy_ra,
|
|
.tex_create = vk_tex_create,
|
|
.tex_destroy = vk_tex_destroy_lazy,
|
|
.tex_upload = vk_tex_upload,
|
|
.buf_create = vk_buf_create,
|
|
.buf_destroy = vk_buf_destroy_lazy,
|
|
.buf_update = vk_buf_update,
|
|
.buf_poll = vk_buf_poll,
|
|
.clear = vk_clear,
|
|
.blit = vk_blit,
|
|
.uniform_layout = std140_layout,
|
|
.push_constant_layout = std430_layout,
|
|
.desc_namespace = vk_desc_namespace,
|
|
.renderpass_create = vk_renderpass_create,
|
|
.renderpass_destroy = vk_renderpass_destroy_lazy,
|
|
.renderpass_run = vk_renderpass_run,
|
|
.timer_create = vk_timer_create,
|
|
.timer_destroy = vk_timer_destroy_lazy,
|
|
.timer_start = vk_timer_start,
|
|
.timer_stop = vk_timer_stop,
|
|
};
|
|
|
|
struct vk_cmd *ra_vk_submit(struct ra *ra, struct ra_tex *tex)
|
|
{
|
|
struct ra_vk *p = ra->priv;
|
|
struct vk_cmd *cmd = vk_require_cmd(ra, GRAPHICS);
|
|
if (!cmd)
|
|
return NULL;
|
|
|
|
struct ra_tex_vk *tex_vk = tex->priv;
|
|
assert(tex_vk->external_img);
|
|
tex_barrier(ra, cmd, tex, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
|
|
VK_ACCESS_MEMORY_READ_BIT, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
|
|
false);
|
|
|
|
// Return this directly instead of going through vk_submit
|
|
p->cmd = NULL;
|
|
return cmd;
|
|
}
|