mpv/video/out/gpu/hwdec.c

354 lines
11 KiB
C
Raw Normal View History

/*
* This file is part of mpv.
*
* mpv is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* mpv is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with mpv. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stddef.h>
#include <string.h>
#include "config.h"
#include "common/common.h"
#include "common/msg.h"
#include "options/m_config.h"
#include "hwdec.h"
extern const struct ra_hwdec_driver ra_hwdec_vaapi;
extern const struct ra_hwdec_driver ra_hwdec_videotoolbox;
extern const struct ra_hwdec_driver ra_hwdec_vdpau;
extern const struct ra_hwdec_driver ra_hwdec_dxva2egl;
extern const struct ra_hwdec_driver ra_hwdec_d3d11egl;
extern const struct ra_hwdec_driver ra_hwdec_dxva2gldx;
vo_gpu: d3d11: initial implementation This is a new RA/vo_gpu backend that uses Direct3D 11. The GLSL generated by vo_gpu is cross-compiled to HLSL with SPIRV-Cross. What works: - All of mpv's internal shaders should work, including compute shaders. - Some external shaders have been tested and work, including RAVU and adaptive-sharpen. - Non-dumb mode works, even on very old hardware. Most features work at feature level 9_3 and all features work at feature level 10_0. Some features also work at feature level 9_1 and 9_2, but without high-bit- depth FBOs, it's not very useful. (Hardware this old is probably not fast enough for advanced features anyway.) Note: This is more compatible than ANGLE, which requires 9_3 to work at all (GLES 2.0,) and 10_1 for non-dumb-mode (GLES 3.0.) - Hardware decoding with D3D11VA, including decoding of 10-bit formats without truncation to 8-bit. What doesn't work / can be improved: - PBO upload and direct rendering does not work yet. Direct rendering requires persistent-mapped PBOs because the decoder needs to be able to read data from images that have already been decoded and uploaded. Unfortunately, it seems like persistent-mapped PBOs are fundamentally incompatible with D3D11, which requires all resources to use driver- managed memory and requires memory to be unmapped (and hence pointers to be invalidated) when a resource is used in a draw or copy operation. However it might be possible to use D3D11's limited multithreading capabilities to emulate some features of PBOs, like asynchronous texture uploading. - The blit() and clear() operations don't have equivalents in the D3D11 API that handle all cases, so in most cases, they have to be emulated with a shader. This is currently done inside ra_d3d11, but ideally it would be done in generic code, so it can take advantage of mpv's shader generation utilities. - SPIRV-Cross is used through a NIH C-compatible wrapper library, since it does not expose a C interface itself. The library is available here: https://github.com/rossy/crossc - The D3D11 context could be made to support more modern DXGI features in future. For example, it should be possible to add support for high-bit-depth and HDR output with DXGI 1.5/1.6.
2017-09-07 10:18:06 +00:00
extern const struct ra_hwdec_driver ra_hwdec_d3d11va;
extern const struct ra_hwdec_driver ra_hwdec_dxva2dxgi;
extern const struct ra_hwdec_driver ra_hwdec_cuda;
extern const struct ra_hwdec_driver ra_hwdec_rpi_overlay;
hwdec/drmprime: add drmprime hwdec-interop In the confusing landscape of hardware video decoding APIs, we have had a long standing support gap for the v4l2 based APIs implemented for the various SoCs from Rockship, Amlogic, Allwinner, etc. While VAAPI is the defacto default for desktop GPUs, the developers who work on these SoCs (who are not the vendors!) have preferred to implement kernel APIs rather than maintain a userspace driver as VAAPI would require. While there are two v4l2 APIs (m2m and requests), and multiple forks of ffmpeg where support for those APIs languishes without reaching upstream, we can at least say that these APIs export frames as DRMPrime dmabufs, and that they use the ffmpeg drm hwcontext. With those two constants, it is possible for us to write a hwdec-interop without worrying about the mess underneath - for the most part. Accordingly, this change implements a hwdec-interop for any decoder that produces frames as DRMPrime dmabufs. The bulk of the heavy lifting is done by the dmabuf interop code we already had from supporting vaapi, and which I refactored for reusability in a previous set of changes. When we combine that with the fact that we can't probe for supported formats, the new code in this change is pretty simple. This change also includes the hwcontext_fns that are required for us to be able to configure the hwcontext used by `hwdec=drm-copy`. This is technically unrelated, but it seemed a good time to fill this gap. From a testing perspective, I have directly tested on a RockPRO64, while others have tested with different flavours of Rockchip and on Amlogic, providing m2m coverage. I have some other SoCs that I need to spin up to test with, but I don't expect big surprises, and when we inevitably need to account for new special cases down the line, we can do so - we won't be able to support every possible configuration blindly.
2022-07-31 20:47:23 +00:00
extern const struct ra_hwdec_driver ra_hwdec_drmprime;
extern const struct ra_hwdec_driver ra_hwdec_drmprime_overlay;
extern const struct ra_hwdec_driver ra_hwdec_aimagereader;
vo_gpu: make it possible to load multiple hwdec interop drivers Make the VO<->decoder interface capable of supporting multiple hwdec APIs at once. The main gain is that this simplifies autoprobing a lot. Before this change, it could happen that the VO loaded the "wrong" hwdec API, and the decoder was stuck with the choice (breaking hw decoding). With the change applied, the VO simply loads all available APIs, so autoprobing trickery is left entirely to the decoder. In the past, we were quite careful about not accidentally loading the wrong interop drivers. This was in part to make sure autoprobing works, but also because libva had this obnoxious bug of dumping garbage to stderr when using the API. libva was fixed, so this is not a problem anymore. The --opengl-hwdec-interop option is changed in various ways (again...), and renamed to --gpu-hwdec-interop. It does not have much use anymore, other than debugging. It's notable that the order in the hwdec interop array ra_hwdec_drivers[] still matters if multiple drivers support the same image formats, so the option can explicitly force one, if that should ever be necessary, or more likely, for debugging. One example are the ra_hwdec_d3d11egl and ra_hwdec_d3d11eglrgb drivers, which both support d3d11 input. vo_gpu now always loads the interop lazily by default, but when it does, it loads them all. vo_opengl_cb now always loads them when the GL context handle is initialized. I don't expect that this causes any problems. It's now possible to do things like changing between vdpau and nvdec decoding at runtime. This is also preparation for cleaning up vd_lavc.c hwdec autoprobing. It's another reason why hwdec_devices_request_all() does not take a hwdec type anymore.
2017-12-01 04:05:00 +00:00
const struct ra_hwdec_driver *const ra_hwdec_drivers[] = {
#if HAVE_VAAPI_EGL || HAVE_VAAPI_LIBPLACEBO
&ra_hwdec_vaapi,
#endif
#if HAVE_VIDEOTOOLBOX_GL || HAVE_IOS_GL
&ra_hwdec_videotoolbox,
#endif
#if HAVE_D3D_HWACCEL
#if HAVE_EGL_ANGLE
&ra_hwdec_d3d11egl,
#if HAVE_D3D9_HWACCEL
&ra_hwdec_dxva2egl,
#endif
#endif
vo_gpu: d3d11: initial implementation This is a new RA/vo_gpu backend that uses Direct3D 11. The GLSL generated by vo_gpu is cross-compiled to HLSL with SPIRV-Cross. What works: - All of mpv's internal shaders should work, including compute shaders. - Some external shaders have been tested and work, including RAVU and adaptive-sharpen. - Non-dumb mode works, even on very old hardware. Most features work at feature level 9_3 and all features work at feature level 10_0. Some features also work at feature level 9_1 and 9_2, but without high-bit- depth FBOs, it's not very useful. (Hardware this old is probably not fast enough for advanced features anyway.) Note: This is more compatible than ANGLE, which requires 9_3 to work at all (GLES 2.0,) and 10_1 for non-dumb-mode (GLES 3.0.) - Hardware decoding with D3D11VA, including decoding of 10-bit formats without truncation to 8-bit. What doesn't work / can be improved: - PBO upload and direct rendering does not work yet. Direct rendering requires persistent-mapped PBOs because the decoder needs to be able to read data from images that have already been decoded and uploaded. Unfortunately, it seems like persistent-mapped PBOs are fundamentally incompatible with D3D11, which requires all resources to use driver- managed memory and requires memory to be unmapped (and hence pointers to be invalidated) when a resource is used in a draw or copy operation. However it might be possible to use D3D11's limited multithreading capabilities to emulate some features of PBOs, like asynchronous texture uploading. - The blit() and clear() operations don't have equivalents in the D3D11 API that handle all cases, so in most cases, they have to be emulated with a shader. This is currently done inside ra_d3d11, but ideally it would be done in generic code, so it can take advantage of mpv's shader generation utilities. - SPIRV-Cross is used through a NIH C-compatible wrapper library, since it does not expose a C interface itself. The library is available here: https://github.com/rossy/crossc - The D3D11 context could be made to support more modern DXGI features in future. For example, it should be possible to add support for high-bit-depth and HDR output with DXGI 1.5/1.6.
2017-09-07 10:18:06 +00:00
#if HAVE_D3D11
&ra_hwdec_d3d11va,
#if HAVE_D3D9_HWACCEL
&ra_hwdec_dxva2dxgi,
#endif
vo_gpu: d3d11: initial implementation This is a new RA/vo_gpu backend that uses Direct3D 11. The GLSL generated by vo_gpu is cross-compiled to HLSL with SPIRV-Cross. What works: - All of mpv's internal shaders should work, including compute shaders. - Some external shaders have been tested and work, including RAVU and adaptive-sharpen. - Non-dumb mode works, even on very old hardware. Most features work at feature level 9_3 and all features work at feature level 10_0. Some features also work at feature level 9_1 and 9_2, but without high-bit- depth FBOs, it's not very useful. (Hardware this old is probably not fast enough for advanced features anyway.) Note: This is more compatible than ANGLE, which requires 9_3 to work at all (GLES 2.0,) and 10_1 for non-dumb-mode (GLES 3.0.) - Hardware decoding with D3D11VA, including decoding of 10-bit formats without truncation to 8-bit. What doesn't work / can be improved: - PBO upload and direct rendering does not work yet. Direct rendering requires persistent-mapped PBOs because the decoder needs to be able to read data from images that have already been decoded and uploaded. Unfortunately, it seems like persistent-mapped PBOs are fundamentally incompatible with D3D11, which requires all resources to use driver- managed memory and requires memory to be unmapped (and hence pointers to be invalidated) when a resource is used in a draw or copy operation. However it might be possible to use D3D11's limited multithreading capabilities to emulate some features of PBOs, like asynchronous texture uploading. - The blit() and clear() operations don't have equivalents in the D3D11 API that handle all cases, so in most cases, they have to be emulated with a shader. This is currently done inside ra_d3d11, but ideally it would be done in generic code, so it can take advantage of mpv's shader generation utilities. - SPIRV-Cross is used through a NIH C-compatible wrapper library, since it does not expose a C interface itself. The library is available here: https://github.com/rossy/crossc - The D3D11 context could be made to support more modern DXGI features in future. For example, it should be possible to add support for high-bit-depth and HDR output with DXGI 1.5/1.6.
2017-09-07 10:18:06 +00:00
#endif
#endif
#if HAVE_GL_DXINTEROP_D3D9
&ra_hwdec_dxva2gldx,
#endif
#if HAVE_CUDA_INTEROP
&ra_hwdec_cuda,
#endif
#if HAVE_VDPAU_GL_X11
&ra_hwdec_vdpau,
#endif
2019-07-14 13:56:10 +00:00
#if HAVE_RPI_MMAL
&ra_hwdec_rpi_overlay,
#endif
Remove remains of Libav compatibility Libav seems rather dead: no release for 2 years, no new git commits in master for almost a year (with one exception ~6 months ago). From what I can tell, some developers resigned themselves to the horrifying idea to post patches to ffmpeg-devel instead, while the rest of the developers went on to greener pastures. Libav was a better project than FFmpeg. Unfortunately, FFmpeg won, because it managed to keep the name and website. Libav was pushed more and more into obscurity: while there was initially a big push for Libav, FFmpeg just remained "in place" and visible for most people. FFmpeg was slowly draining all manpower and energy from Libav. A big part of this was that FFmpeg stole code from Libav (regular merges of the entire Libav git tree), making it some sort of Frankenstein mirror of Libav, think decaying zombie with additional legs ("features") nailed to it. "Stealing" surely is the wrong word; I'm just aping the language that some of the FFmpeg members used to use. All that is in the past now, I'm probably the only person left who is annoyed by this, and with this commit I'm putting this decade long problem finally to an end. I just thought I'd express my annoyance about this fucking shitshow one last time. The most intrusive change in this commit is the resample filter, which originally used libavresample. Since the FFmpeg developer refused to enable libavresample by default for drama reasons, and the API was slightly different, so the filter used some big preprocessor mess to make it compatible to libswresample. All that falls away now. The simplification to the build system is also significant.
2020-02-16 14:14:55 +00:00
#if HAVE_DRM
&ra_hwdec_drmprime_overlay,
&ra_hwdec_drmprime,
#endif
#if HAVE_ANDROID_MEDIA_NDK
&ra_hwdec_aimagereader,
#endif
NULL
};
vo_gpu: make it possible to load multiple hwdec interop drivers Make the VO<->decoder interface capable of supporting multiple hwdec APIs at once. The main gain is that this simplifies autoprobing a lot. Before this change, it could happen that the VO loaded the "wrong" hwdec API, and the decoder was stuck with the choice (breaking hw decoding). With the change applied, the VO simply loads all available APIs, so autoprobing trickery is left entirely to the decoder. In the past, we were quite careful about not accidentally loading the wrong interop drivers. This was in part to make sure autoprobing works, but also because libva had this obnoxious bug of dumping garbage to stderr when using the API. libva was fixed, so this is not a problem anymore. The --opengl-hwdec-interop option is changed in various ways (again...), and renamed to --gpu-hwdec-interop. It does not have much use anymore, other than debugging. It's notable that the order in the hwdec interop array ra_hwdec_drivers[] still matters if multiple drivers support the same image formats, so the option can explicitly force one, if that should ever be necessary, or more likely, for debugging. One example are the ra_hwdec_d3d11egl and ra_hwdec_d3d11eglrgb drivers, which both support d3d11 input. vo_gpu now always loads the interop lazily by default, but when it does, it loads them all. vo_opengl_cb now always loads them when the GL context handle is initialized. I don't expect that this causes any problems. It's now possible to do things like changing between vdpau and nvdec decoding at runtime. This is also preparation for cleaning up vd_lavc.c hwdec autoprobing. It's another reason why hwdec_devices_request_all() does not take a hwdec type anymore.
2017-12-01 04:05:00 +00:00
struct ra_hwdec *ra_hwdec_load_driver(struct ra *ra, struct mp_log *log,
struct mpv_global *global,
struct mp_hwdec_devices *devs,
const struct ra_hwdec_driver *drv,
bool is_auto)
{
struct ra_hwdec *hwdec = talloc(NULL, struct ra_hwdec);
*hwdec = (struct ra_hwdec) {
.driver = drv,
.log = mp_log_new(hwdec, log, drv->name),
.global = global,
.ra = ra,
.devs = devs,
.probing = is_auto,
.priv = talloc_zero_size(hwdec, drv->priv_size),
};
mp_verbose(log, "Loading hwdec driver '%s'\n", drv->name);
if (hwdec->driver->init(hwdec) < 0) {
ra_hwdec_uninit(hwdec);
mp_verbose(log, "Loading failed.\n");
return NULL;
}
return hwdec;
}
void ra_hwdec_uninit(struct ra_hwdec *hwdec)
{
if (hwdec)
hwdec->driver->uninit(hwdec);
talloc_free(hwdec);
}
bool ra_hwdec_test_format(struct ra_hwdec *hwdec, int imgfmt)
{
for (int n = 0; hwdec->driver->imgfmts[n]; n++) {
if (hwdec->driver->imgfmts[n] == imgfmt)
return true;
}
return false;
}
struct ra_hwdec_mapper *ra_hwdec_mapper_create(struct ra_hwdec *hwdec,
const struct mp_image_params *params)
{
assert(ra_hwdec_test_format(hwdec, params->imgfmt));
struct ra_hwdec_mapper *mapper = talloc_ptrtype(NULL, mapper);
*mapper = (struct ra_hwdec_mapper){
.owner = hwdec,
.driver = hwdec->driver->mapper,
.log = hwdec->log,
.ra = hwdec->ra,
.priv = talloc_zero_size(mapper, hwdec->driver->mapper->priv_size),
.src_params = *params,
.dst_params = *params,
};
if (mapper->driver->init(mapper) < 0)
ra_hwdec_mapper_free(&mapper);
return mapper;
}
void ra_hwdec_mapper_free(struct ra_hwdec_mapper **mapper)
{
struct ra_hwdec_mapper *p = *mapper;
if (p) {
ra_hwdec_mapper_unmap(p);
p->driver->uninit(p);
talloc_free(p);
}
*mapper = NULL;
}
void ra_hwdec_mapper_unmap(struct ra_hwdec_mapper *mapper)
{
if (mapper->driver->unmap)
mapper->driver->unmap(mapper);
// Clean up after the image if the mapper didn't already
mp_image_unrefp(&mapper->src);
}
int ra_hwdec_mapper_map(struct ra_hwdec_mapper *mapper, struct mp_image *img)
{
ra_hwdec_mapper_unmap(mapper);
mp_image_setrefp(&mapper->src, img);
if (mapper->driver->map(mapper) < 0) {
ra_hwdec_mapper_unmap(mapper);
return -1;
}
return 0;
}
static int ra_hwdec_validate_opt_full(struct mp_log *log, bool include_modes,
const m_option_t *opt,
struct bstr name, const char **value)
{
struct bstr param = bstr0(*value);
bool help = bstr_equals0(param, "help");
if (help)
mp_info(log, "Available hwdecs:\n");
for (int n = 0; ra_hwdec_drivers[n]; n++) {
const struct ra_hwdec_driver *drv = ra_hwdec_drivers[n];
if (help) {
mp_info(log, " %s\n", drv->name);
} else if (bstr_equals0(param, drv->name)) {
return 1;
}
}
if (help) {
if (include_modes) {
mp_info(log, " auto (behavior depends on context)\n"
" all (load all hwdecs)\n"
" no (do not load any and block loading on demand)\n");
}
return M_OPT_EXIT;
}
if (!param.len)
return 1; // "" is treated specially
if (include_modes &&
(bstr_equals0(param, "all") || bstr_equals0(param, "auto") ||
bstr_equals0(param, "no")))
return 1;
mp_fatal(log, "No hwdec backend named '%.*s' found!\n", BSTR_P(param));
return M_OPT_INVALID;
}
int ra_hwdec_validate_opt(struct mp_log *log, const m_option_t *opt,
struct bstr name, const char **value)
{
return ra_hwdec_validate_opt_full(log, true, opt, name, value);
}
int ra_hwdec_validate_drivers_only_opt(struct mp_log *log,
const m_option_t *opt,
struct bstr name, const char **value)
{
return ra_hwdec_validate_opt_full(log, false, opt, name, value);
}
static void load_add_hwdec(struct ra_hwdec_ctx *ctx, struct mp_hwdec_devices *devs,
const struct ra_hwdec_driver *drv, bool is_auto)
{
// Don't load duplicate hwdecs
for (int j = 0; j < ctx->num_hwdecs; j++) {
if (ctx->hwdecs[j]->driver == drv)
return;
}
struct ra_hwdec *hwdec =
ra_hwdec_load_driver(ctx->ra, ctx->log, ctx->global, devs, drv, is_auto);
if (hwdec)
MP_TARRAY_APPEND(NULL, ctx->hwdecs, ctx->num_hwdecs, hwdec);
}
static void load_hwdecs_all(struct ra_hwdec_ctx *ctx, struct mp_hwdec_devices *devs)
{
if (!ctx->loading_done) {
for (int n = 0; ra_hwdec_drivers[n]; n++)
load_add_hwdec(ctx, devs, ra_hwdec_drivers[n], true);
ctx->loading_done = true;
}
}
void ra_hwdec_ctx_init(struct ra_hwdec_ctx *ctx, struct mp_hwdec_devices *devs,
const char *type, bool load_all_by_default)
{
assert(ctx->ra);
/*
* By default, or if the option value is "auto", we will not pre-emptively
* load any interops, and instead allow them to be loaded on-demand.
*
* If the option value is "no", then no interops will be loaded now, and
* no interops will be loaded, even if requested later.
*
* If the option value is "all", then all interops will be loaded now, and
* obviously no interops will need to be loaded later.
*
* Finally, if a specific interop is requested, it will be loaded now, and
* other interops can be loaded, if requested later.
*/
if (!type || !type[0] || strcmp(type, "auto") == 0) {
if (!load_all_by_default)
return;
type = "all";
}
if (strcmp(type, "no") == 0) {
// do nothing, just block further loading
} else if (strcmp(type, "all") == 0) {
load_hwdecs_all(ctx, devs);
} else {
for (int n = 0; ra_hwdec_drivers[n]; n++) {
const struct ra_hwdec_driver *drv = ra_hwdec_drivers[n];
if (strcmp(type, drv->name) == 0) {
load_add_hwdec(ctx, devs, drv, false);
break;
}
}
}
ctx->loading_done = true;
}
void ra_hwdec_ctx_uninit(struct ra_hwdec_ctx *ctx)
{
for (int n = 0; n < ctx->num_hwdecs; n++)
ra_hwdec_uninit(ctx->hwdecs[n]);
talloc_free(ctx->hwdecs);
memset(ctx, 0, sizeof(*ctx));
}
void ra_hwdec_ctx_load_fmt(struct ra_hwdec_ctx *ctx, struct mp_hwdec_devices *devs,
struct hwdec_imgfmt_request *params)
{
int imgfmt = params->imgfmt;
if (ctx->loading_done) {
/*
* If we previously marked interop loading as done (for reasons
* discussed above), then do not load any other interops regardless
* of imgfmt.
*/
return;
}
if (imgfmt == IMGFMT_NONE) {
MP_VERBOSE(ctx, "Loading hwdec drivers for all formats\n");
load_hwdecs_all(ctx, devs);
return;
}
MP_VERBOSE(ctx, "Loading hwdec drivers for format: '%s'\n",
mp_imgfmt_to_name(imgfmt));
for (int i = 0; ra_hwdec_drivers[i]; i++) {
bool matched_fmt = false;
const struct ra_hwdec_driver *drv = ra_hwdec_drivers[i];
for (int j = 0; drv->imgfmts[j]; j++) {
if (imgfmt == drv->imgfmts[j]) {
matched_fmt = true;
break;
}
}
if (!matched_fmt) {
continue;
}
load_add_hwdec(ctx, devs, drv, params->probing);
}
}
struct ra_hwdec *ra_hwdec_get(struct ra_hwdec_ctx *ctx, int imgfmt)
{
for (int n = 0; n < ctx->num_hwdecs; n++) {
if (ra_hwdec_test_format(ctx->hwdecs[n], imgfmt))
return ctx->hwdecs[n];
}
return NULL;
}
int ra_hwdec_driver_get_imgfmt_for_name(const char *name)
{
for (int i = 0; ra_hwdec_drivers[i]; i++) {
if (!strcmp(ra_hwdec_drivers[i]->name, name)) {
return ra_hwdec_drivers[i]->imgfmts[0];
}
}
return IMGFMT_NONE;
}