mpv/video/out/gpu/hwdec.c

323 lines
9.6 KiB
C
Raw Normal View History

/*
* This file is part of mpv.
*
* mpv is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* mpv is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with mpv. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stddef.h>
#include <string.h>
#include "config.h"
#include "common/common.h"
#include "common/msg.h"
#include "options/m_config.h"
#include "hwdec.h"
extern const struct ra_hwdec_driver ra_hwdec_vaegl;
extern const struct ra_hwdec_driver ra_hwdec_vaglx;
extern const struct ra_hwdec_driver ra_hwdec_videotoolbox;
extern const struct ra_hwdec_driver ra_hwdec_vdpau;
extern const struct ra_hwdec_driver ra_hwdec_dxva2egl;
extern const struct ra_hwdec_driver ra_hwdec_d3d11egl;
extern const struct ra_hwdec_driver ra_hwdec_dxva2gldx;
extern const struct ra_hwdec_driver ra_hwdec_dxva2;
vo_gpu: d3d11: initial implementation This is a new RA/vo_gpu backend that uses Direct3D 11. The GLSL generated by vo_gpu is cross-compiled to HLSL with SPIRV-Cross. What works: - All of mpv's internal shaders should work, including compute shaders. - Some external shaders have been tested and work, including RAVU and adaptive-sharpen. - Non-dumb mode works, even on very old hardware. Most features work at feature level 9_3 and all features work at feature level 10_0. Some features also work at feature level 9_1 and 9_2, but without high-bit- depth FBOs, it's not very useful. (Hardware this old is probably not fast enough for advanced features anyway.) Note: This is more compatible than ANGLE, which requires 9_3 to work at all (GLES 2.0,) and 10_1 for non-dumb-mode (GLES 3.0.) - Hardware decoding with D3D11VA, including decoding of 10-bit formats without truncation to 8-bit. What doesn't work / can be improved: - PBO upload and direct rendering does not work yet. Direct rendering requires persistent-mapped PBOs because the decoder needs to be able to read data from images that have already been decoded and uploaded. Unfortunately, it seems like persistent-mapped PBOs are fundamentally incompatible with D3D11, which requires all resources to use driver- managed memory and requires memory to be unmapped (and hence pointers to be invalidated) when a resource is used in a draw or copy operation. However it might be possible to use D3D11's limited multithreading capabilities to emulate some features of PBOs, like asynchronous texture uploading. - The blit() and clear() operations don't have equivalents in the D3D11 API that handle all cases, so in most cases, they have to be emulated with a shader. This is currently done inside ra_d3d11, but ideally it would be done in generic code, so it can take advantage of mpv's shader generation utilities. - SPIRV-Cross is used through a NIH C-compatible wrapper library, since it does not expose a C interface itself. The library is available here: https://github.com/rossy/crossc - The D3D11 context could be made to support more modern DXGI features in future. For example, it should be possible to add support for high-bit-depth and HDR output with DXGI 1.5/1.6.
2017-09-07 10:18:06 +00:00
extern const struct ra_hwdec_driver ra_hwdec_d3d11va;
extern const struct ra_hwdec_driver ra_hwdec_dxva2dxgi;
extern const struct ra_hwdec_driver ra_hwdec_cuda;
extern const struct ra_hwdec_driver ra_hwdec_cuda_nvdec;
extern const struct ra_hwdec_driver ra_hwdec_rpi_overlay;
extern const struct ra_hwdec_driver ra_hwdec_drmprime_drm;
vo_gpu: make it possible to load multiple hwdec interop drivers Make the VO<->decoder interface capable of supporting multiple hwdec APIs at once. The main gain is that this simplifies autoprobing a lot. Before this change, it could happen that the VO loaded the "wrong" hwdec API, and the decoder was stuck with the choice (breaking hw decoding). With the change applied, the VO simply loads all available APIs, so autoprobing trickery is left entirely to the decoder. In the past, we were quite careful about not accidentally loading the wrong interop drivers. This was in part to make sure autoprobing works, but also because libva had this obnoxious bug of dumping garbage to stderr when using the API. libva was fixed, so this is not a problem anymore. The --opengl-hwdec-interop option is changed in various ways (again...), and renamed to --gpu-hwdec-interop. It does not have much use anymore, other than debugging. It's notable that the order in the hwdec interop array ra_hwdec_drivers[] still matters if multiple drivers support the same image formats, so the option can explicitly force one, if that should ever be necessary, or more likely, for debugging. One example are the ra_hwdec_d3d11egl and ra_hwdec_d3d11eglrgb drivers, which both support d3d11 input. vo_gpu now always loads the interop lazily by default, but when it does, it loads them all. vo_opengl_cb now always loads them when the GL context handle is initialized. I don't expect that this causes any problems. It's now possible to do things like changing between vdpau and nvdec decoding at runtime. This is also preparation for cleaning up vd_lavc.c hwdec autoprobing. It's another reason why hwdec_devices_request_all() does not take a hwdec type anymore.
2017-12-01 04:05:00 +00:00
const struct ra_hwdec_driver *const ra_hwdec_drivers[] = {
#if HAVE_VAAPI_EGL || HAVE_VAAPI_VULKAN
&ra_hwdec_vaegl,
#endif
#if HAVE_VIDEOTOOLBOX_GL || HAVE_IOS_GL
&ra_hwdec_videotoolbox,
#endif
#if HAVE_D3D_HWACCEL
#if HAVE_EGL_ANGLE
&ra_hwdec_d3d11egl,
#if HAVE_D3D9_HWACCEL
&ra_hwdec_dxva2egl,
#endif
#endif
vo_gpu: d3d11: initial implementation This is a new RA/vo_gpu backend that uses Direct3D 11. The GLSL generated by vo_gpu is cross-compiled to HLSL with SPIRV-Cross. What works: - All of mpv's internal shaders should work, including compute shaders. - Some external shaders have been tested and work, including RAVU and adaptive-sharpen. - Non-dumb mode works, even on very old hardware. Most features work at feature level 9_3 and all features work at feature level 10_0. Some features also work at feature level 9_1 and 9_2, but without high-bit- depth FBOs, it's not very useful. (Hardware this old is probably not fast enough for advanced features anyway.) Note: This is more compatible than ANGLE, which requires 9_3 to work at all (GLES 2.0,) and 10_1 for non-dumb-mode (GLES 3.0.) - Hardware decoding with D3D11VA, including decoding of 10-bit formats without truncation to 8-bit. What doesn't work / can be improved: - PBO upload and direct rendering does not work yet. Direct rendering requires persistent-mapped PBOs because the decoder needs to be able to read data from images that have already been decoded and uploaded. Unfortunately, it seems like persistent-mapped PBOs are fundamentally incompatible with D3D11, which requires all resources to use driver- managed memory and requires memory to be unmapped (and hence pointers to be invalidated) when a resource is used in a draw or copy operation. However it might be possible to use D3D11's limited multithreading capabilities to emulate some features of PBOs, like asynchronous texture uploading. - The blit() and clear() operations don't have equivalents in the D3D11 API that handle all cases, so in most cases, they have to be emulated with a shader. This is currently done inside ra_d3d11, but ideally it would be done in generic code, so it can take advantage of mpv's shader generation utilities. - SPIRV-Cross is used through a NIH C-compatible wrapper library, since it does not expose a C interface itself. The library is available here: https://github.com/rossy/crossc - The D3D11 context could be made to support more modern DXGI features in future. For example, it should be possible to add support for high-bit-depth and HDR output with DXGI 1.5/1.6.
2017-09-07 10:18:06 +00:00
#if HAVE_D3D11
&ra_hwdec_d3d11va,
#if HAVE_D3D9_HWACCEL
&ra_hwdec_dxva2dxgi,
#endif
vo_gpu: d3d11: initial implementation This is a new RA/vo_gpu backend that uses Direct3D 11. The GLSL generated by vo_gpu is cross-compiled to HLSL with SPIRV-Cross. What works: - All of mpv's internal shaders should work, including compute shaders. - Some external shaders have been tested and work, including RAVU and adaptive-sharpen. - Non-dumb mode works, even on very old hardware. Most features work at feature level 9_3 and all features work at feature level 10_0. Some features also work at feature level 9_1 and 9_2, but without high-bit- depth FBOs, it's not very useful. (Hardware this old is probably not fast enough for advanced features anyway.) Note: This is more compatible than ANGLE, which requires 9_3 to work at all (GLES 2.0,) and 10_1 for non-dumb-mode (GLES 3.0.) - Hardware decoding with D3D11VA, including decoding of 10-bit formats without truncation to 8-bit. What doesn't work / can be improved: - PBO upload and direct rendering does not work yet. Direct rendering requires persistent-mapped PBOs because the decoder needs to be able to read data from images that have already been decoded and uploaded. Unfortunately, it seems like persistent-mapped PBOs are fundamentally incompatible with D3D11, which requires all resources to use driver- managed memory and requires memory to be unmapped (and hence pointers to be invalidated) when a resource is used in a draw or copy operation. However it might be possible to use D3D11's limited multithreading capabilities to emulate some features of PBOs, like asynchronous texture uploading. - The blit() and clear() operations don't have equivalents in the D3D11 API that handle all cases, so in most cases, they have to be emulated with a shader. This is currently done inside ra_d3d11, but ideally it would be done in generic code, so it can take advantage of mpv's shader generation utilities. - SPIRV-Cross is used through a NIH C-compatible wrapper library, since it does not expose a C interface itself. The library is available here: https://github.com/rossy/crossc - The D3D11 context could be made to support more modern DXGI features in future. For example, it should be possible to add support for high-bit-depth and HDR output with DXGI 1.5/1.6.
2017-09-07 10:18:06 +00:00
#endif
#endif
#if HAVE_GL_DXINTEROP_D3D9
&ra_hwdec_dxva2gldx,
#endif
#if HAVE_CUDA_INTEROP
&ra_hwdec_cuda,
#endif
#if HAVE_VDPAU_GL_X11
&ra_hwdec_vdpau,
#endif
2019-07-14 13:56:10 +00:00
#if HAVE_RPI_MMAL
&ra_hwdec_rpi_overlay,
#endif
Remove remains of Libav compatibility Libav seems rather dead: no release for 2 years, no new git commits in master for almost a year (with one exception ~6 months ago). From what I can tell, some developers resigned themselves to the horrifying idea to post patches to ffmpeg-devel instead, while the rest of the developers went on to greener pastures. Libav was a better project than FFmpeg. Unfortunately, FFmpeg won, because it managed to keep the name and website. Libav was pushed more and more into obscurity: while there was initially a big push for Libav, FFmpeg just remained "in place" and visible for most people. FFmpeg was slowly draining all manpower and energy from Libav. A big part of this was that FFmpeg stole code from Libav (regular merges of the entire Libav git tree), making it some sort of Frankenstein mirror of Libav, think decaying zombie with additional legs ("features") nailed to it. "Stealing" surely is the wrong word; I'm just aping the language that some of the FFmpeg members used to use. All that is in the past now, I'm probably the only person left who is annoyed by this, and with this commit I'm putting this decade long problem finally to an end. I just thought I'd express my annoyance about this fucking shitshow one last time. The most intrusive change in this commit is the resample filter, which originally used libavresample. Since the FFmpeg developer refused to enable libavresample by default for drama reasons, and the API was slightly different, so the filter used some big preprocessor mess to make it compatible to libswresample. All that falls away now. The simplification to the build system is also significant.
2020-02-16 14:14:55 +00:00
#if HAVE_DRM
&ra_hwdec_drmprime_drm,
#endif
NULL
};
vo_gpu: make it possible to load multiple hwdec interop drivers Make the VO<->decoder interface capable of supporting multiple hwdec APIs at once. The main gain is that this simplifies autoprobing a lot. Before this change, it could happen that the VO loaded the "wrong" hwdec API, and the decoder was stuck with the choice (breaking hw decoding). With the change applied, the VO simply loads all available APIs, so autoprobing trickery is left entirely to the decoder. In the past, we were quite careful about not accidentally loading the wrong interop drivers. This was in part to make sure autoprobing works, but also because libva had this obnoxious bug of dumping garbage to stderr when using the API. libva was fixed, so this is not a problem anymore. The --opengl-hwdec-interop option is changed in various ways (again...), and renamed to --gpu-hwdec-interop. It does not have much use anymore, other than debugging. It's notable that the order in the hwdec interop array ra_hwdec_drivers[] still matters if multiple drivers support the same image formats, so the option can explicitly force one, if that should ever be necessary, or more likely, for debugging. One example are the ra_hwdec_d3d11egl and ra_hwdec_d3d11eglrgb drivers, which both support d3d11 input. vo_gpu now always loads the interop lazily by default, but when it does, it loads them all. vo_opengl_cb now always loads them when the GL context handle is initialized. I don't expect that this causes any problems. It's now possible to do things like changing between vdpau and nvdec decoding at runtime. This is also preparation for cleaning up vd_lavc.c hwdec autoprobing. It's another reason why hwdec_devices_request_all() does not take a hwdec type anymore.
2017-12-01 04:05:00 +00:00
struct ra_hwdec *ra_hwdec_load_driver(struct ra *ra, struct mp_log *log,
struct mpv_global *global,
struct mp_hwdec_devices *devs,
const struct ra_hwdec_driver *drv,
bool is_auto)
{
struct ra_hwdec *hwdec = talloc(NULL, struct ra_hwdec);
*hwdec = (struct ra_hwdec) {
.driver = drv,
.log = mp_log_new(hwdec, log, drv->name),
.global = global,
.ra = ra,
.devs = devs,
.probing = is_auto,
.priv = talloc_zero_size(hwdec, drv->priv_size),
};
mp_verbose(log, "Loading hwdec driver '%s'\n", drv->name);
if (hwdec->driver->init(hwdec) < 0) {
ra_hwdec_uninit(hwdec);
mp_verbose(log, "Loading failed.\n");
return NULL;
}
return hwdec;
}
void ra_hwdec_uninit(struct ra_hwdec *hwdec)
{
if (hwdec)
hwdec->driver->uninit(hwdec);
talloc_free(hwdec);
}
bool ra_hwdec_test_format(struct ra_hwdec *hwdec, int imgfmt)
{
for (int n = 0; hwdec->driver->imgfmts[n]; n++) {
if (hwdec->driver->imgfmts[n] == imgfmt)
return true;
}
return false;
}
struct ra_hwdec_mapper *ra_hwdec_mapper_create(struct ra_hwdec *hwdec,
struct mp_image_params *params)
{
assert(ra_hwdec_test_format(hwdec, params->imgfmt));
struct ra_hwdec_mapper *mapper = talloc_ptrtype(NULL, mapper);
*mapper = (struct ra_hwdec_mapper){
.owner = hwdec,
.driver = hwdec->driver->mapper,
.log = hwdec->log,
.ra = hwdec->ra,
.priv = talloc_zero_size(mapper, hwdec->driver->mapper->priv_size),
.src_params = *params,
.dst_params = *params,
};
if (mapper->driver->init(mapper) < 0)
ra_hwdec_mapper_free(&mapper);
return mapper;
}
void ra_hwdec_mapper_free(struct ra_hwdec_mapper **mapper)
{
struct ra_hwdec_mapper *p = *mapper;
if (p) {
ra_hwdec_mapper_unmap(p);
p->driver->uninit(p);
talloc_free(p);
}
*mapper = NULL;
}
void ra_hwdec_mapper_unmap(struct ra_hwdec_mapper *mapper)
{
if (mapper->driver->unmap)
mapper->driver->unmap(mapper);
// Clean up after the image if the mapper didn't already
mp_image_unrefp(&mapper->src);
}
int ra_hwdec_mapper_map(struct ra_hwdec_mapper *mapper, struct mp_image *img)
{
ra_hwdec_mapper_unmap(mapper);
mp_image_setrefp(&mapper->src, img);
if (mapper->driver->map(mapper) < 0) {
ra_hwdec_mapper_unmap(mapper);
return -1;
}
return 0;
}
int ra_hwdec_validate_opt(struct mp_log *log, const m_option_t *opt,
struct bstr name, const char **value)
{
struct bstr param = bstr0(*value);
bool help = bstr_equals0(param, "help");
if (help)
mp_info(log, "Available hwdecs:\n");
for (int n = 0; ra_hwdec_drivers[n]; n++) {
const struct ra_hwdec_driver *drv = ra_hwdec_drivers[n];
if (help) {
mp_info(log, " %s\n", drv->name);
} else if (bstr_equals0(param, drv->name)) {
return 1;
}
}
if (help) {
mp_info(log, " auto (behavior depends on context)\n"
" all (load all hwdecs)\n"
" no (do not load any and block loading on demand)\n");
return M_OPT_EXIT;
}
if (!param.len)
return 1; // "" is treated specially
if (bstr_equals0(param, "all") || bstr_equals0(param, "auto") ||
bstr_equals0(param, "no"))
return 1;
mp_fatal(log, "No hwdec backend named '%.*s' found!\n", BSTR_P(param));
return M_OPT_INVALID;
}
static void load_add_hwdec(struct ra_hwdec_ctx *ctx, struct mp_hwdec_devices *devs,
const struct ra_hwdec_driver *drv, bool is_auto)
{
// Don't load duplicate hwdecs
for (int j = 0; j < ctx->num_hwdecs; j++) {
if (ctx->hwdecs[j]->driver == drv)
return;
}
struct ra_hwdec *hwdec =
ra_hwdec_load_driver(ctx->ra, ctx->log, ctx->global, devs, drv, is_auto);
if (hwdec)
MP_TARRAY_APPEND(NULL, ctx->hwdecs, ctx->num_hwdecs, hwdec);
}
static void load_hwdecs_all(struct ra_hwdec_ctx *ctx, struct mp_hwdec_devices *devs)
{
if (!ctx->loading_done) {
for (int n = 0; ra_hwdec_drivers[n]; n++)
load_add_hwdec(ctx, devs, ra_hwdec_drivers[n], true);
ctx->loading_done = true;
}
}
void ra_hwdec_ctx_init(struct ra_hwdec_ctx *ctx, struct mp_hwdec_devices *devs,
const char *type, bool load_all_by_default)
{
assert(ctx->ra);
/*
* By default, or if the option value is "auto", we will not pre-emptively
* load any interops, and instead allow them to be loaded on-demand.
*
* If the option value is "no", then no interops will be loaded now, and
* no interops will be loaded, even if requested later.
*
* If the option value is "all", then all interops will be loaded now, and
* obviously no interops will need to be loaded later.
*
* Finally, if a specific interop is requested, it will be loaded now, and
* no other interop will be loaded, even if requested later.
*/
if (!type || !type[0] || strcmp(type, "auto") == 0) {
if (!load_all_by_default)
return;
type = "all";
}
if (strcmp(type, "no") == 0) {
// do nothing, just block further loading
} else if (strcmp(type, "all") == 0) {
load_hwdecs_all(ctx, devs);
} else {
for (int n = 0; ra_hwdec_drivers[n]; n++) {
const struct ra_hwdec_driver *drv = ra_hwdec_drivers[n];
if (strcmp(type, drv->name) == 0) {
load_add_hwdec(ctx, devs, drv, false);
break;
}
}
}
ctx->loading_done = true;
}
void ra_hwdec_ctx_uninit(struct ra_hwdec_ctx *ctx)
{
for (int n = 0; n < ctx->num_hwdecs; n++)
ra_hwdec_uninit(ctx->hwdecs[n]);
talloc_free(ctx->hwdecs);
memset(ctx, 0, sizeof(*ctx));
}
void ra_hwdec_ctx_load_fmt(struct ra_hwdec_ctx *ctx, struct mp_hwdec_devices *devs,
int imgfmt)
{
if (ctx->loading_done) {
/*
* If we previously marked interop loading as done (for reasons
* discussed above), then do not load any other interops regardless
* of imgfmt.
*/
return;
}
if (imgfmt == IMGFMT_NONE) {
MP_VERBOSE(ctx, "Loading hwdec drivers for all formats\n");
load_hwdecs_all(ctx, devs);
return;
}
MP_VERBOSE(ctx, "Loading hwdec drivers for format: '%s'\n",
mp_imgfmt_to_name(imgfmt));
for (int i = 0; ra_hwdec_drivers[i]; i++) {
bool matched_fmt = false;
const struct ra_hwdec_driver *drv = ra_hwdec_drivers[i];
for (int j = 0; drv->imgfmts[j]; j++) {
if (imgfmt == drv->imgfmts[j]) {
matched_fmt = true;
break;
}
}
if (!matched_fmt) {
continue;
}
load_add_hwdec(ctx, devs, drv, false);
}
}
struct ra_hwdec *ra_hwdec_get(struct ra_hwdec_ctx *ctx, int imgfmt)
{
for (int n = 0; n < ctx->num_hwdecs; n++) {
if (ra_hwdec_test_format(ctx->hwdecs[n], imgfmt))
return ctx->hwdecs[n];
}
return NULL;
}