mpv/video/out/vulkan/context.c

595 lines
19 KiB
C

/*
* This file is part of mpv.
*
* mpv is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* mpv is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with mpv. If not, see <http://www.gnu.org/licenses/>.
*/
#include "options/m_config.h"
#include "video/out/gpu/spirv.h"
#include "context.h"
#include "ra_vk.h"
#include "utils.h"
enum {
SWAP_AUTO = 0,
SWAP_FIFO,
SWAP_FIFO_RELAXED,
SWAP_MAILBOX,
SWAP_IMMEDIATE,
SWAP_COUNT,
};
struct vulkan_opts {
struct mpvk_device_opts dev_opts; // logical device options
char *device; // force a specific GPU
int swap_mode;
};
static int vk_validate_dev(struct mp_log *log, const struct m_option *opt,
struct bstr name, struct bstr param)
{
int ret = M_OPT_INVALID;
VkResult res;
// Create a dummy instance to validate/list the devices
VkInstanceCreateInfo info = {
.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
};
VkInstance inst;
VkPhysicalDevice *devices = NULL;
uint32_t num = 0;
res = vkCreateInstance(&info, MPVK_ALLOCATOR, &inst);
if (res != VK_SUCCESS)
goto done;
res = vkEnumeratePhysicalDevices(inst, &num, NULL);
if (res != VK_SUCCESS)
goto done;
devices = talloc_array(NULL, VkPhysicalDevice, num);
vkEnumeratePhysicalDevices(inst, &num, devices);
if (res != VK_SUCCESS)
goto done;
bool help = bstr_equals0(param, "help");
if (help) {
mp_info(log, "Available vulkan devices:\n");
ret = M_OPT_EXIT;
}
for (int i = 0; i < num; i++) {
VkPhysicalDeviceProperties prop;
vkGetPhysicalDeviceProperties(devices[i], &prop);
if (help) {
mp_info(log, " '%s' (GPU %d, ID %x:%x)\n", prop.deviceName, i,
(unsigned)prop.vendorID, (unsigned)prop.deviceID);
} else if (bstr_equals0(param, prop.deviceName)) {
ret = 0;
goto done;
}
}
if (!help)
mp_err(log, "No device with name '%.*s'!\n", BSTR_P(param));
done:
talloc_free(devices);
return ret;
}
#define OPT_BASE_STRUCT struct vulkan_opts
const struct m_sub_options vulkan_conf = {
.opts = (const struct m_option[]) {
OPT_STRING_VALIDATE("vulkan-device", device, 0, vk_validate_dev),
OPT_CHOICE("vulkan-swap-mode", swap_mode, 0,
({"auto", SWAP_AUTO},
{"fifo", SWAP_FIFO},
{"fifo-relaxed", SWAP_FIFO_RELAXED},
{"mailbox", SWAP_MAILBOX},
{"immediate", SWAP_IMMEDIATE})),
OPT_INTRANGE("vulkan-queue-count", dev_opts.queue_count, 0, 1, 8,
OPTDEF_INT(1)),
OPT_FLAG("vulkan-async-transfer", dev_opts.async_transfer, 0),
OPT_FLAG("vulkan-async-compute", dev_opts.async_compute, 0),
{0}
},
.size = sizeof(struct vulkan_opts),
.defaults = &(struct vulkan_opts) {
.dev_opts = {
.async_transfer = 1,
},
},
};
struct priv {
struct mpvk_ctx *vk;
struct vulkan_opts *opts;
// Swapchain metadata:
int w, h; // current size
VkSwapchainCreateInfoKHR protoInfo; // partially filled-in prototype
VkSwapchainKHR swapchain;
VkSwapchainKHR old_swapchain;
int frames_in_flight;
// state of the images:
struct ra_tex **images; // ra_tex wrappers for the vkimages
int num_images; // size of images
VkSemaphore *sems_in; // pool of semaphores used to synchronize images
VkSemaphore *sems_out; // outgoing semaphores (rendering complete)
int num_sems;
int idx_sems; // index of next free semaphore pair
int last_imgidx; // the image index last acquired (for submit)
// This is used to pre-fetch the next frame at the end of swap_buffers
struct ra_fbo queued_fbo;
};
static const struct ra_swapchain_fns vulkan_swapchain;
struct mpvk_ctx *ra_vk_ctx_get(struct ra_ctx *ctx)
{
if (ctx->swapchain->fns != &vulkan_swapchain)
return NULL;
struct priv *p = ctx->swapchain->priv;
return p->vk;
}
static bool update_swapchain_info(struct priv *p,
VkSwapchainCreateInfoKHR *info)
{
struct mpvk_ctx *vk = p->vk;
// Query the supported capabilities and update this struct as needed
VkSurfaceCapabilitiesKHR caps;
VK(vkGetPhysicalDeviceSurfaceCapabilitiesKHR(vk->physd, vk->surf, &caps));
// Sorted by preference
static const VkCompositeAlphaFlagsKHR alphaModes[] = {
VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR,
VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR,
};
for (int i = 0; i < MP_ARRAY_SIZE(alphaModes); i++) {
if (caps.supportedCompositeAlpha & alphaModes[i]) {
info->compositeAlpha = alphaModes[i];
break;
}
}
if (!info->compositeAlpha) {
MP_ERR(vk, "Failed picking alpha compositing mode (caps: 0x%x)\n",
caps.supportedCompositeAlpha);
goto error;
}
static const VkSurfaceTransformFlagsKHR rotModes[] = {
VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR,
VK_SURFACE_TRANSFORM_INHERIT_BIT_KHR,
};
for (int i = 0; i < MP_ARRAY_SIZE(rotModes); i++) {
if (caps.supportedTransforms & rotModes[i]) {
info->preTransform = rotModes[i];
break;
}
}
if (!info->preTransform) {
MP_ERR(vk, "Failed picking surface transform mode (caps: 0x%x)\n",
caps.supportedTransforms);
goto error;
}
// Image count as required
MP_VERBOSE(vk, "Requested image count: %d (min %d max %d)\n",
(int)info->minImageCount, (int)caps.minImageCount,
(int)caps.maxImageCount);
info->minImageCount = MPMAX(info->minImageCount, caps.minImageCount);
if (caps.maxImageCount)
info->minImageCount = MPMIN(info->minImageCount, caps.maxImageCount);
// Check the extent against the allowed parameters
if (caps.currentExtent.width != info->imageExtent.width &&
caps.currentExtent.width != 0xFFFFFFFF)
{
MP_WARN(vk, "Requested width %d does not match current width %d\n",
(int)info->imageExtent.width, (int)caps.currentExtent.width);
info->imageExtent.width = caps.currentExtent.width;
}
if (caps.currentExtent.height != info->imageExtent.height &&
caps.currentExtent.height != 0xFFFFFFFF)
{
MP_WARN(vk, "Requested height %d does not match current height %d\n",
(int)info->imageExtent.height, (int)caps.currentExtent.height);
info->imageExtent.height = caps.currentExtent.height;
}
if (caps.minImageExtent.width > info->imageExtent.width ||
caps.minImageExtent.height > info->imageExtent.height)
{
MP_ERR(vk, "Requested size %dx%d smaller than device minimum %d%d\n",
(int)info->imageExtent.width, (int)info->imageExtent.height,
(int)caps.minImageExtent.width, (int)caps.minImageExtent.height);
goto error;
}
if (caps.maxImageExtent.width < info->imageExtent.width ||
caps.maxImageExtent.height < info->imageExtent.height)
{
MP_ERR(vk, "Requested size %dx%d larger than device maximum %d%d\n",
(int)info->imageExtent.width, (int)info->imageExtent.height,
(int)caps.maxImageExtent.width, (int)caps.maxImageExtent.height);
goto error;
}
// We just request whatever usage we can, and let the ra_vk decide what
// ra_tex_params that translates to. This makes the images as flexible
// as possible.
info->imageUsage = caps.supportedUsageFlags;
return true;
error:
return false;
}
void ra_vk_ctx_uninit(struct ra_ctx *ctx)
{
if (ctx->ra) {
struct priv *p = ctx->swapchain->priv;
struct mpvk_ctx *vk = p->vk;
mpvk_flush_commands(vk);
mpvk_poll_commands(vk, UINT64_MAX);
for (int i = 0; i < p->num_images; i++)
ra_tex_free(ctx->ra, &p->images[i]);
for (int i = 0; i < p->num_sems; i++) {
vkDestroySemaphore(vk->dev, p->sems_in[i], MPVK_ALLOCATOR);
vkDestroySemaphore(vk->dev, p->sems_out[i], MPVK_ALLOCATOR);
}
vkDestroySwapchainKHR(vk->dev, p->swapchain, MPVK_ALLOCATOR);
ctx->ra->fns->destroy(ctx->ra);
ctx->ra = NULL;
}
talloc_free(ctx->swapchain);
ctx->swapchain = NULL;
}
static const struct ra_swapchain_fns vulkan_swapchain;
bool ra_vk_ctx_init(struct ra_ctx *ctx, struct mpvk_ctx *vk,
VkPresentModeKHR preferred_mode)
{
struct ra_swapchain *sw = ctx->swapchain = talloc_zero(NULL, struct ra_swapchain);
sw->ctx = ctx;
sw->fns = &vulkan_swapchain;
struct priv *p = sw->priv = talloc_zero(sw, struct priv);
p->vk = vk;
p->opts = mp_get_config_group(p, ctx->global, &vulkan_conf);
if (!mpvk_find_phys_device(vk, p->opts->device, ctx->opts.allow_sw))
goto error;
if (!spirv_compiler_init(ctx))
goto error;
vk->spirv = ctx->spirv;
if (!mpvk_pick_surface_format(vk))
goto error;
if (!mpvk_device_init(vk, p->opts->dev_opts))
goto error;
ctx->ra = ra_create_vk(vk, ctx->log);
if (!ctx->ra)
goto error;
static const VkPresentModeKHR present_modes[SWAP_COUNT] = {
[SWAP_FIFO] = VK_PRESENT_MODE_FIFO_KHR,
[SWAP_FIFO_RELAXED] = VK_PRESENT_MODE_FIFO_RELAXED_KHR,
[SWAP_MAILBOX] = VK_PRESENT_MODE_MAILBOX_KHR,
[SWAP_IMMEDIATE] = VK_PRESENT_MODE_IMMEDIATE_KHR,
};
p->protoInfo = (VkSwapchainCreateInfoKHR) {
.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
.surface = vk->surf,
.imageFormat = vk->surf_format.format,
.imageColorSpace = vk->surf_format.colorSpace,
.imageArrayLayers = 1, // non-stereoscopic
.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE,
.minImageCount = ctx->opts.swapchain_depth + 1, // +1 for FB
.presentMode = p->opts->swap_mode ? present_modes[p->opts->swap_mode]
: preferred_mode,
.clipped = true,
};
// Make sure the swapchain present mode is supported
int num_modes;
VK(vkGetPhysicalDeviceSurfacePresentModesKHR(vk->physd, vk->surf,
&num_modes, NULL));
VkPresentModeKHR *modes = talloc_array(NULL, VkPresentModeKHR, num_modes);
VK(vkGetPhysicalDeviceSurfacePresentModesKHR(vk->physd, vk->surf,
&num_modes, modes));
bool supported = false;
for (int i = 0; i < num_modes; i++)
supported |= (modes[i] == p->protoInfo.presentMode);
talloc_free(modes);
if (!supported) {
MP_ERR(ctx, "Requested swap mode unsupported by this device!\n");
goto error;
}
return true;
error:
ra_vk_ctx_uninit(ctx);
return false;
}
static void destroy_swapchain(struct mpvk_ctx *vk, struct priv *p)
{
assert(p->old_swapchain);
vkDestroySwapchainKHR(vk->dev, p->old_swapchain, MPVK_ALLOCATOR);
p->old_swapchain = NULL;
}
bool ra_vk_ctx_resize(struct ra_swapchain *sw, int w, int h)
{
struct priv *p = sw->priv;
if (w == p->w && h == p->h)
return true;
struct ra *ra = sw->ctx->ra;
struct mpvk_ctx *vk = p->vk;
VkImage *vkimages = NULL;
// It's invalid to trigger another swapchain recreation while there's
// more than one swapchain already active, so we need to flush any pending
// asynchronous swapchain release operations that may be ongoing.
while (p->old_swapchain)
mpvk_poll_commands(vk, 100000); // 100μs
VkSwapchainCreateInfoKHR sinfo = p->protoInfo;
sinfo.imageExtent = (VkExtent2D){ w, h };
sinfo.oldSwapchain = p->swapchain;
if (!update_swapchain_info(p, &sinfo))
goto error;
VK(vkCreateSwapchainKHR(vk->dev, &sinfo, MPVK_ALLOCATOR, &p->swapchain));
p->w = w;
p->h = h;
// Freeing the old swapchain while it's still in use is an error, so do
// it asynchronously once the device is idle.
if (sinfo.oldSwapchain) {
p->old_swapchain = sinfo.oldSwapchain;
vk_dev_callback(vk, (vk_cb) destroy_swapchain, vk, p);
}
// Get the new swapchain images
int num;
VK(vkGetSwapchainImagesKHR(vk->dev, p->swapchain, &num, NULL));
vkimages = talloc_array(NULL, VkImage, num);
VK(vkGetSwapchainImagesKHR(vk->dev, p->swapchain, &num, vkimages));
// If needed, allocate some more semaphores
while (num > p->num_sems) {
VkSemaphore sem_in, sem_out;
static const VkSemaphoreCreateInfo seminfo = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
};
VK(vkCreateSemaphore(vk->dev, &seminfo, MPVK_ALLOCATOR, &sem_in));
VK(vkCreateSemaphore(vk->dev, &seminfo, MPVK_ALLOCATOR, &sem_out));
int idx = p->num_sems++;
MP_TARRAY_GROW(p, p->sems_in, idx);
MP_TARRAY_GROW(p, p->sems_out, idx);
p->sems_in[idx] = sem_in;
p->sems_out[idx] = sem_out;
}
// Invalidate the queued texture
p->queued_fbo = (struct ra_fbo) {0};
// Recreate the ra_tex wrappers
for (int i = 0; i < p->num_images; i++)
ra_tex_free(ra, &p->images[i]);
p->num_images = num;
MP_TARRAY_GROW(p, p->images, p->num_images);
for (int i = 0; i < num; i++) {
p->images[i] = ra_vk_wrap_swapchain_img(ra, vkimages[i], sinfo);
if (!p->images[i])
goto error;
}
talloc_free(vkimages);
return true;
error:
talloc_free(vkimages);
vkDestroySwapchainKHR(vk->dev, p->swapchain, MPVK_ALLOCATOR);
p->swapchain = NULL;
return false;
}
static int color_depth(struct ra_swapchain *sw)
{
struct priv *p = sw->priv;
int bits = 0;
if (!p->num_images)
return bits;
// The channel with the most bits is probably the most authoritative about
// the actual color information (consider e.g. a2bgr10). Slight downside
// in that it results in rounding r/b for e.g. rgb565, but we don't pick
// surfaces with fewer than 8 bits anyway.
const struct ra_format *fmt = p->images[0]->params.format;
for (int i = 0; i < fmt->num_components; i++) {
int depth = fmt->component_depth[i];
bits = MPMAX(bits, depth ? depth : fmt->component_size[i]);
}
return bits;
}
static bool start_frame(struct ra_swapchain *sw, struct ra_fbo *out_fbo)
{
struct priv *p = sw->priv;
struct mpvk_ctx *vk = p->vk;
if (!p->swapchain)
return false;
if (p->queued_fbo.tex) {
assert(out_fbo != &p->queued_fbo);
*out_fbo = p->queued_fbo;
p->queued_fbo = (struct ra_fbo) {0};
return true;
}
VkSemaphore sem_in = p->sems_in[p->idx_sems];
MP_TRACE(vk, "vkAcquireNextImageKHR signals %p\n", (void *)sem_in);
for (int attempts = 0; attempts < 2; attempts++) {
uint32_t imgidx = 0;
VkResult res = vkAcquireNextImageKHR(vk->dev, p->swapchain, UINT64_MAX,
sem_in, NULL, &imgidx);
switch (res) {
case VK_SUCCESS:
p->last_imgidx = imgidx;
*out_fbo = (struct ra_fbo) {
.tex = p->images[imgidx],
.flip = false,
};
ra_tex_vk_external_dep(sw->ctx->ra, out_fbo->tex, sem_in);
return true;
case VK_ERROR_OUT_OF_DATE_KHR: {
// In these cases try recreating the swapchain
int w = p->w, h = p->h;
p->w = p->h = 0; // invalidate the current state
if (!ra_vk_ctx_resize(sw, w, h))
return false;
continue;
}
default:
MP_ERR(vk, "Failed acquiring swapchain image: %s\n", vk_err(res));
return false;
}
}
// If we've exhausted the number of attempts to recreate the swapchain,
// just give up silently.
return false;
}
static void present_cb(struct priv *p, void *arg)
{
p->frames_in_flight--;
}
static bool submit_frame(struct ra_swapchain *sw, const struct vo_frame *frame)
{
struct priv *p = sw->priv;
struct ra *ra = sw->ctx->ra;
struct mpvk_ctx *vk = p->vk;
if (!p->swapchain)
return false;
struct vk_cmd *cmd = ra_vk_submit(ra, p->images[p->last_imgidx]);
if (!cmd)
return false;
VkSemaphore sem_out = p->sems_out[p->idx_sems++];
p->idx_sems %= p->num_sems;
vk_cmd_sig(cmd, sem_out);
p->frames_in_flight++;
vk_cmd_callback(cmd, (vk_cb) present_cb, p, NULL);
vk_cmd_queue(vk, cmd);
if (!mpvk_flush_commands(vk))
return false;
// Submit to the same queue that we were currently rendering to
struct vk_cmdpool *pool_gfx = vk->pool_graphics;
VkQueue queue = pool_gfx->queues[pool_gfx->idx_queues];
// Rotate the queues to ensure good parallelism across frames
for (int i = 0; i < vk->num_pools; i++) {
struct vk_cmdpool *pool = vk->pools[i];
pool->idx_queues = (pool->idx_queues + 1) % pool->num_queues;
}
VkPresentInfoKHR pinfo = {
.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
.waitSemaphoreCount = 1,
.pWaitSemaphores = &sem_out,
.swapchainCount = 1,
.pSwapchains = &p->swapchain,
.pImageIndices = &p->last_imgidx,
};
MP_TRACE(vk, "vkQueuePresentKHR waits on %p\n", (void *)sem_out);
VkResult res = vkQueuePresentKHR(queue, &pinfo);
switch (res) {
case VK_SUCCESS:
case VK_SUBOPTIMAL_KHR:
return true;
case VK_ERROR_OUT_OF_DATE_KHR:
// We can silently ignore this error, since the next start_frame will
// recreate the swapchain automatically.
return true;
default:
MP_ERR(vk, "Failed presenting to queue %p: %s\n", (void *)queue,
vk_err(res));
return false;
}
}
static void swap_buffers(struct ra_swapchain *sw)
{
struct priv *p = sw->priv;
while (p->frames_in_flight >= sw->ctx->opts.swapchain_depth)
mpvk_poll_commands(p->vk, 100000); // 100μs
// Also try and block until the next hardware buffer swap early. this
// prevents start_frame from blocking later, thus slightly improving the
// frame timing stats. (since mpv assumes most blocking will happen in
// swap_buffers)
start_frame(sw, &p->queued_fbo);
}
static const struct ra_swapchain_fns vulkan_swapchain = {
.color_depth = color_depth,
.start_frame = start_frame,
.submit_frame = submit_frame,
.swap_buffers = swap_buffers,
};