From 61e685594dce7fd5bf79c3fae65d962a1ea5e924 Mon Sep 17 00:00:00 2001 From: Philip Langdale Date: Sat, 12 Mar 2022 11:21:29 -0800 Subject: [PATCH] hwdec_vulkan: add Vulkan HW Interop Vulkan Video Decoding has finally become a reality, as it's now showing up in shipping drivers, and the ffmpeg support has been merged. With that in mind, this change introduces HW interop support for ffmpeg Vulkan frames. The implementation is functionally complete - it can display frames produced by hardware decoding, and it can work with ffmpeg vulkan filters. There are still various caveats due to gaps and bugs in drivers, so YMMV, as always. Primary testing has been done on Intel, AMD, and nvidia hardware on Linux with basic Windows testing on nvidia. Notable caveats: * Due to driver bugs, video decoding on nvidia does not work right now, unless you use the Vulkan Beta driver. It can be worked around, but requires ffmpeg changes that are not considered acceptable to merge. * Even if those work-arounds are applied, Vulkan filters will not work on video that was decoded by Vulkan, due to additional bugs in the nvidia drivers. The filters do work correctly on content decoded some other way, and then uploaded to Vulkan (eg: Decode with nvdec, upload with --vf=format=vulkan) * Vulkan filters can only be used with drivers that support VK_EXT_descriptor_buffer which doesn't include Intel ANV as yet. There is an MR outstanding for this. * When dealing with 1080p content, there may be some visual distortion in the bottom lines of frames due to chroma scaling incorporating the extra hidden lines at the bottom of the frame (1080p content is actually stored as 1088 lines), depending on the hardware/driver combination and the scaling algorithm. This cannot be easily addressed as the mechanical fix for it violates the Vulkan spec, and probably requires a spec change to resolve properly. All of these caveats will be fixed in either drivers or ffmpeg, and so will not require mpv changes (unless something unexpected happens) If you want to run on nvidia with the non-beta drivers, you can this ffmpeg tree with the work-around patches: * https://github.com/philipl/FFmpeg/tree/vulkan-nvidia-workarounds --- DOCS/man/options.rst | 5 +- DOCS/man/vf.rst | 8 + filters/f_hwtransfer.c | 6 + meson.build | 14 +- meson_options.txt | 1 + video/fmt-conversion.c | 3 + video/img_format.h | 4 + video/out/gpu/hwdec.c | 4 + video/out/hwdec/hwdec_vulkan.c | 332 +++++++++++++++++++++++++++++++++ wscript | 11 ++ wscript_build.py | 1 + 11 files changed, 387 insertions(+), 2 deletions(-) create mode 100644 video/out/hwdec/hwdec_vulkan.c diff --git a/DOCS/man/options.rst b/DOCS/man/options.rst index 1c4981d0b3..cd7bd5dd0b 100644 --- a/DOCS/man/options.rst +++ b/DOCS/man/options.rst @@ -1255,6 +1255,8 @@ Video :nvdec-copy: copies video back to system RAM (Any platform CUDA is available) :drm: requires ``--vo=gpu`` (Linux only) :drm-copy: copies video back to system RAM (Linux only) + :vulkan: requires ``--vo=gpu-next`` (Any platform with Vulkan Video Decoding) + :vulkan-copy: copies video back to system RAM (Any platform with Vulkan Video Decoding) Other hwdecs (only use if you know you have to): @@ -1310,7 +1312,8 @@ Video .. note:: Most non-copy methods only work with the OpenGL GPU backend. Currently, - only the ``vaapi``, ``nvdec`` and ``cuda`` methods work with Vulkan. + only the ``vaapi``, ``nvdec``, ``cuda`` and ``vulkan`` methods work with + Vulkan. The ``vaapi`` mode, if used with ``--vo=gpu``, requires Mesa 11, and most likely works with Intel and AMD GPUs only. It also requires the opengl EGL diff --git a/DOCS/man/vf.rst b/DOCS/man/vf.rst index 027ac6f55f..f6842490cd 100644 --- a/DOCS/man/vf.rst +++ b/DOCS/man/vf.rst @@ -141,6 +141,11 @@ Available mpv-only filters are: For a list of available formats, use ``--vf=format=fmt=help``. + .. note:: + + Conversion between hardware formats is supported in some cases. + eg: ``cuda`` to ``vulkan``, or ``vaapi`` to ``vulkan``. + ```` Force conversion of color parameters (default: no). @@ -164,6 +169,9 @@ Available mpv-only filters are: If input and output video parameters are the same, conversion is always skipped. + When converting between hardware formats, this parameter has no effect, + and the only conversion that is done is the format conversion. + .. admonition:: Examples ``mpv test.mkv --vf=format:colormatrix=ycgco`` diff --git a/filters/f_hwtransfer.c b/filters/f_hwtransfer.c index 44f13b391b..9488c5536c 100644 --- a/filters/f_hwtransfer.c +++ b/filters/f_hwtransfer.c @@ -50,6 +50,12 @@ struct hwmap_pairs { // We cannot discover which pairs of hardware formats need to use hwmap to // convert between the formats, so we need a lookup table. static const struct hwmap_pairs hwmap_pairs[] = { +#if HAVE_VULKAN_INTEROP + { + .first_fmt = IMGFMT_VAAPI, + .second_fmt = IMGFMT_VULKAN, + }, +#endif { .first_fmt = IMGFMT_DRMPRIME, .second_fmt = IMGFMT_VAAPI, diff --git a/meson.build b/meson.build index 959710bb83..d03fe823a4 100644 --- a/meson.build +++ b/meson.build @@ -411,7 +411,7 @@ endif if darwin path_source = files('osdep/path-darwin.c') sources += path_source + files('osdep/timer-darwin.c') - + endif if posix and not darwin @@ -933,6 +933,8 @@ if features['libplacebo-next'] 'video/out/gpu_next/context.c') endif +features += {'libplacebo-decode': features['libplacebo'] and libplacebo.version().version_compare('>=5.275.0')} + sdl2_video = get_option('sdl2-video').require( features['sdl2'], error_message: 'sdl2 was not found!', @@ -1309,6 +1311,16 @@ if features['cuda-interop'] and features['vulkan'] sources += files('video/out/hwdec/hwdec_cuda_vk.c') endif +vulkan_interop = get_option('vulkan-interop').require( + vulkan.found() and features['libplacebo-decode'] and + libavutil.version().version_compare('>=58.11.100'), + error_message: 'Vulkan Interop requires vulkan, libplacebo >= 5.275.0, and libavutil >= 58.11.100', +) +features += {'vulkan-interop': vulkan_interop.allowed()} +if vulkan_interop.allowed() + sources += files('video/out/hwdec/hwdec_vulkan.c') +endif + d3d_hwaccel = get_option('d3d-hwaccel').require( win32, error_message: 'the os is not win32!', diff --git a/meson_options.txt b/meson_options.txt index 1324a090c1..65047b110f 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -104,6 +104,7 @@ option('gl-dxinterop-d3d9', type: 'feature', value: 'auto', description: 'OpenGL option('ios-gl', type: 'feature', value: 'auto', description: 'iOS OpenGL ES hardware decoding interop support') option('rpi-mmal', type: 'feature', value: 'auto', description: 'Raspberry Pi MMAL hwaccel') option('videotoolbox-gl', type: 'feature', value: 'auto', description: 'Videotoolbox with OpenGL') +option('vulkan-interop', type: 'feature', value: 'auto', description: 'Vulkan graphics interop') # macOS features option('macos-10-11-features', type: 'feature', value: 'auto', description: 'macOS 10.11 SDK Features') diff --git a/video/fmt-conversion.c b/video/fmt-conversion.c index 0639e399ed..aa7d857341 100644 --- a/video/fmt-conversion.c +++ b/video/fmt-conversion.c @@ -66,6 +66,9 @@ static const struct { {IMGFMT_CUDA, AV_PIX_FMT_CUDA}, {IMGFMT_P010, AV_PIX_FMT_P010}, {IMGFMT_DRMPRIME, AV_PIX_FMT_DRM_PRIME}, +#if HAVE_VULKAN_INTEROP + {IMGFMT_VULKAN, AV_PIX_FMT_VULKAN}, +#endif {0, AV_PIX_FMT_NONE} }; diff --git a/video/img_format.h b/video/img_format.h index 79358031f6..2091e00598 100644 --- a/video/img_format.h +++ b/video/img_format.h @@ -20,6 +20,7 @@ #include +#include "config.h" #include "osdep/endian.h" #include "misc/bstr.h" #include "video/csputils.h" @@ -318,6 +319,9 @@ enum mp_imgfmt { IMGFMT_VDPAU_OUTPUT, // VdpOutputSurface IMGFMT_VAAPI, IMGFMT_VIDEOTOOLBOX, // CVPixelBufferRef +#if HAVE_VULKAN_INTEROP + IMGFMT_VULKAN, // VKImage +#endif IMGFMT_DRMPRIME, // AVDRMFrameDescriptor // Generic pass-through of AV_PIX_FMT_*. Used for formats which don't have diff --git a/video/out/gpu/hwdec.c b/video/out/gpu/hwdec.c index 8b1ab8c961..1e3edb0d8d 100644 --- a/video/out/gpu/hwdec.c +++ b/video/out/gpu/hwdec.c @@ -38,6 +38,7 @@ extern const struct ra_hwdec_driver ra_hwdec_rpi_overlay; extern const struct ra_hwdec_driver ra_hwdec_drmprime; extern const struct ra_hwdec_driver ra_hwdec_drmprime_overlay; extern const struct ra_hwdec_driver ra_hwdec_aimagereader; +extern const struct ra_hwdec_driver ra_hwdec_vulkan; const struct ra_hwdec_driver *const ra_hwdec_drivers[] = { #if HAVE_VAAPI_EGL || HAVE_VAAPI_LIBPLACEBO @@ -79,6 +80,9 @@ const struct ra_hwdec_driver *const ra_hwdec_drivers[] = { #if HAVE_ANDROID_MEDIA_NDK &ra_hwdec_aimagereader, #endif +#if HAVE_VULKAN_INTEROP + &ra_hwdec_vulkan, +#endif NULL }; diff --git a/video/out/hwdec/hwdec_vulkan.c b/video/out/hwdec/hwdec_vulkan.c new file mode 100644 index 0000000000..f85b9f4829 --- /dev/null +++ b/video/out/hwdec/hwdec_vulkan.c @@ -0,0 +1,332 @@ +/* + * Copyright (c) 2022 Philip Langdale + * + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#include "config.h" +#include "video/out/gpu/hwdec.h" +#include "video/out/vulkan/context.h" +#include "video/out/placebo/ra_pl.h" + +#include +#include + +struct vulkan_hw_priv { + struct mp_hwdec_ctx hwctx; + pl_gpu gpu; +}; + +struct vulkan_mapper_priv { + struct mp_image layout; + AVVkFrame *vkf; + pl_tex tex[4]; +}; + +static void lock_queue(struct AVHWDeviceContext *ctx, + uint32_t queue_family, uint32_t index) +{ + pl_vulkan vulkan = ctx->user_opaque; + vulkan->lock_queue(vulkan, queue_family, index); +} + +static void unlock_queue(struct AVHWDeviceContext *ctx, + uint32_t queue_family, uint32_t index) +{ + pl_vulkan vulkan = ctx->user_opaque; + vulkan->unlock_queue(vulkan, queue_family, index); +} + +static int vulkan_init(struct ra_hwdec *hw) +{ + AVBufferRef *hw_device_ctx = NULL; + int ret = 0; + struct vulkan_hw_priv *p = hw->priv; + + struct mpvk_ctx *vk = ra_vk_ctx_get(hw->ra_ctx); + if (!vk) { + MP_ERR(hw, "This is not a libplacebo vulkan gpu api context.\n"); + return 0; + } + + p->gpu = ra_pl_get(hw->ra_ctx->ra); + if (!p->gpu) { + MP_ERR(hw, "Failed to obtain pl_gpu.\n"); + return 0; + } + + /* + * libplacebo initialises all queues, but we still need to discover which + * one is the decode queue. + */ + uint32_t num_qf = 0; + VkQueueFamilyProperties *qf = NULL; + vkGetPhysicalDeviceQueueFamilyProperties(vk->vulkan->phys_device, &num_qf, NULL); + if (!num_qf) + goto error; + + qf = talloc_array(NULL, VkQueueFamilyProperties, num_qf); + vkGetPhysicalDeviceQueueFamilyProperties(vk->vulkan->phys_device, &num_qf, qf); + + int decode_index = -1, decode_count = 0; + for (int i = 0; i < num_qf; i++) { + /* + * Pick the first discovered decode queue that we find. Maybe a day will + * come when this needs to be smarter, but I'm sure a bunch of other + * things will have to change too. + */ + if ((qf[i].queueFlags) & VK_QUEUE_VIDEO_DECODE_BIT_KHR) { + decode_index = i; + decode_count = qf[i].queueCount; + } + } + + hw_device_ctx = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_VULKAN); + if (!hw_device_ctx) + goto error; + + AVHWDeviceContext *device_ctx = (void *)hw_device_ctx->data; + AVVulkanDeviceContext *device_hwctx = device_ctx->hwctx; + + device_ctx->user_opaque = (void *)vk->vulkan; + device_hwctx->lock_queue = lock_queue; + device_hwctx->unlock_queue = unlock_queue; + device_hwctx->get_proc_addr = vk->vkinst->get_proc_addr; + device_hwctx->inst = vk->vkinst->instance; + device_hwctx->phys_dev = vk->vulkan->phys_device; + device_hwctx->act_dev = vk->vulkan->device; + device_hwctx->device_features = *vk->vulkan->features; + device_hwctx->enabled_inst_extensions = vk->vkinst->extensions; + device_hwctx->nb_enabled_inst_extensions = vk->vkinst->num_extensions; + device_hwctx->enabled_dev_extensions = vk->vulkan->extensions; + device_hwctx->nb_enabled_dev_extensions = vk->vulkan->num_extensions; + device_hwctx->queue_family_index = vk->vulkan->queue_graphics.index; + device_hwctx->nb_graphics_queues = vk->vulkan->queue_graphics.count; + device_hwctx->queue_family_tx_index = vk->vulkan->queue_transfer.index; + device_hwctx->nb_tx_queues = vk->vulkan->queue_transfer.count; + device_hwctx->queue_family_comp_index = vk->vulkan->queue_compute.index; + device_hwctx->nb_comp_queues = vk->vulkan->queue_compute.count; + device_hwctx->queue_family_decode_index = decode_index; + device_hwctx->nb_decode_queues = decode_count; + + ret = av_hwdevice_ctx_init(hw_device_ctx); + if (ret < 0) { + MP_ERR(hw, "av_hwdevice_ctx_init failed\n"); + goto error; + } + + p->hwctx = (struct mp_hwdec_ctx) { + .driver_name = hw->driver->name, + .av_device_ref = hw_device_ctx, + .hw_imgfmt = IMGFMT_VULKAN, + }; + hwdec_devices_add(hw->devs, &p->hwctx); + + talloc_free(qf); + return 0; + + error: + talloc_free(qf); + av_buffer_unref(&hw_device_ctx); + return -1; +} + +static void vulkan_uninit(struct ra_hwdec *hw) +{ + struct vulkan_hw_priv *p = hw->priv; + + hwdec_devices_remove(hw->devs, &p->hwctx); + av_buffer_unref(&p->hwctx.av_device_ref); +} + +static int mapper_init(struct ra_hwdec_mapper *mapper) +{ + struct vulkan_mapper_priv *p = mapper->priv; + + mapper->dst_params = mapper->src_params; + mapper->dst_params.imgfmt = mapper->src_params.hw_subfmt; + mapper->dst_params.hw_subfmt = 0; + + mp_image_set_params(&p->layout, &mapper->dst_params); + + struct ra_imgfmt_desc desc = {0}; + if (!ra_get_imgfmt_desc(mapper->ra, mapper->dst_params.imgfmt, &desc)) + return -1; + + return 0; +} + +static void mapper_uninit(struct ra_hwdec_mapper *mapper) +{ + +} + +static void mapper_unmap(struct ra_hwdec_mapper *mapper) +{ + struct vulkan_hw_priv *p_owner = mapper->owner->priv; + struct vulkan_mapper_priv *p = mapper->priv; + if (!mapper->src) + goto end; + + AVHWFramesContext *hwfc = (AVHWFramesContext *) mapper->src->hwctx->data;; + const AVVulkanFramesContext *vkfc = hwfc->hwctx;; + AVVkFrame *vkf = p->vkf; + + int num_images; + for (num_images = 0; (vkf->img[num_images] != NULL); num_images++); + + for (int i = 0; (p->tex[i] != NULL); i++) { + pl_tex *tex = &p->tex[i]; + if (!*tex) + continue; + + // If we have multiple planes and one image, then that is a multiplane + // frame. Anything else is treated as one-image-per-plane. + int index = p->layout.num_planes > 1 && num_images == 1 ? 0 : i; + + // Update AVVkFrame state to reflect current layout + bool ok = pl_vulkan_hold_ex(p_owner->gpu, pl_vulkan_hold_params( + .tex = *tex, + .out_layout = &vkf->layout[index], + .qf = VK_QUEUE_FAMILY_IGNORED, + .semaphore = (pl_vulkan_sem) { + .sem = vkf->sem[index], + .value = vkf->sem_value[index] + 1, + }, + )); + + vkf->access[index] = 0; + vkf->sem_value[index] += !!ok; + *tex = NULL; + } + + vkfc->unlock_frame(hwfc, vkf); + + end: + for (int i = 0; i < p->layout.num_planes; i++) + ra_tex_free(mapper->ra, &mapper->tex[i]); + + p->vkf = NULL; +} + +static int mapper_map(struct ra_hwdec_mapper *mapper) +{ + bool result = false; + struct vulkan_hw_priv *p_owner = mapper->owner->priv; + struct vulkan_mapper_priv *p = mapper->priv; + pl_vulkan vk = pl_vulkan_get(p_owner->gpu); + if (!vk) + return -1; + + AVHWFramesContext *hwfc = (AVHWFramesContext *) mapper->src->hwctx->data; + const AVVulkanFramesContext *vkfc = hwfc->hwctx; + AVVkFrame *vkf = (AVVkFrame *) mapper->src->planes[0]; + + /* + * We need to use the dimensions from the HW Frames Context for the + * textures, as the underlying images may be larger than the logical frame + * size. This most often happens with 1080p content where the actual frame + * height is 1088. + */ + struct mp_image raw_layout; + mp_image_setfmt(&raw_layout, p->layout.params.imgfmt); + mp_image_set_size(&raw_layout, hwfc->width, hwfc->height); + + int num_images; + for (num_images = 0; (vkf->img[num_images] != NULL); num_images++); + const VkFormat *vk_fmt = av_vkfmt_from_pixfmt(hwfc->sw_format); + + vkfc->lock_frame(hwfc, vkf); + + for (int i = 0; i < p->layout.num_planes; i++) { + pl_tex *tex = &p->tex[i]; + VkImageAspectFlags aspect = VK_IMAGE_ASPECT_COLOR_BIT; + int index = i; + + // If we have multiple planes and one image, then that is a multiplane + // frame. Anything else is treated as one-image-per-plane. + if (p->layout.num_planes > 1 && num_images == 1) { + index = 0; + + switch (i) { + case 0: + aspect = VK_IMAGE_ASPECT_PLANE_0_BIT_KHR; + break; + case 1: + aspect = VK_IMAGE_ASPECT_PLANE_1_BIT_KHR; + break; + case 2: + aspect = VK_IMAGE_ASPECT_PLANE_2_BIT_KHR; + break; + default: + goto error; + } + } + + *tex = pl_vulkan_wrap(p_owner->gpu, pl_vulkan_wrap_params( + .image = vkf->img[index], + .width = mp_image_plane_w(&raw_layout, i), + .height = mp_image_plane_h(&raw_layout, i), + .format = vk_fmt[i], + .usage = vkfc->usage, + .aspect = aspect, + )); + if (!*tex) + goto error; + + pl_vulkan_release_ex(p_owner->gpu, pl_vulkan_release_params( + .tex = p->tex[i], + .layout = vkf->layout[index], + .qf = VK_QUEUE_FAMILY_IGNORED, + .semaphore = (pl_vulkan_sem) { + .sem = vkf->sem[index], + .value = vkf->sem_value[index], + }, + )); + + struct ra_tex *ratex = talloc_ptrtype(NULL, ratex); + result = mppl_wrap_tex(mapper->ra, *tex, ratex); + if (!result) { + pl_tex_destroy(p_owner->gpu, tex); + talloc_free(ratex); + goto error; + } + mapper->tex[i] = ratex; + } + + p->vkf = vkf; + return 0; + + error: + vkfc->unlock_frame(hwfc, vkf); + mapper_unmap(mapper); + return -1; +} + +const struct ra_hwdec_driver ra_hwdec_vulkan = { + .name = "vulkan", + .imgfmts = {IMGFMT_VULKAN, 0}, + .priv_size = sizeof(struct vulkan_hw_priv), + .init = vulkan_init, + .uninit = vulkan_uninit, + .mapper = &(const struct ra_hwdec_mapper_driver){ + .priv_size = sizeof(struct vulkan_mapper_priv), + .init = mapper_init, + .uninit = mapper_uninit, + .map = mapper_map, + .unmap = mapper_unmap, + }, +}; diff --git a/wscript b/wscript index 25578e7235..f40fdad9d1 100644 --- a/wscript +++ b/wscript @@ -797,6 +797,12 @@ video_output_features = [ 'deps': 'libplacebo', 'func': check_preprocessor('libplacebo/config.h', 'PL_API_VER >= 264', use='libplacebo'), + }, { + 'name': 'libplacebo-decode', + 'desc': 'libplacebo v5.275.0+, needed for Vulkan video decode', + 'deps': 'libplacebo', + 'func': check_preprocessor('libplacebo/config.h', 'PL_API_VER >= 275', + use='libplacebo'), }, { 'name': '--vulkan', 'desc': 'Vulkan context support', @@ -808,6 +814,11 @@ video_output_features = [ 'deps': 'vulkan', 'func': check_statement('vulkan/vulkan_core.h', 'vkCreateDisplayPlaneSurfaceKHR(0, 0, 0, 0)', use='vulkan') + }, { + 'name': '--vulkan-interop', + 'desc': 'Vulkan graphics interop', + 'deps': 'vulkan && libplacebo-next', + 'func': check_pkg_config('libavutil', '>= 58.11.100'), }, { 'name': 'vaapi-libplacebo', 'desc': 'VAAPI libplacebo', diff --git a/wscript_build.py b/wscript_build.py index 823f875c91..8366ba76dc 100644 --- a/wscript_build.py +++ b/wscript_build.py @@ -486,6 +486,7 @@ def build(ctx): ( "video/out/hwdec/hwdec_drmprime.c", "drm" ), ( "video/out/hwdec/hwdec_drmprime_overlay.c","drm" ), ( "video/out/hwdec/hwdec_vaapi.c", "vaapi-egl || vaapi-libplacebo" ), + ( "video/out/hwdec/hwdec_vulkan.c", "vulkan-interop" ), ( "video/out/hwdec/dmabuf_interop_gl.c", "dmabuf-interop-gl" ), ( "video/out/hwdec/dmabuf_interop_pl.c", "dmabuf-interop-pl" ), ( "video/out/hwdec/dmabuf_interop_wl.c", "dmabuf-wayland" ),