mpv/video/out/opengl/hwdec.c

119 lines
3.5 KiB
C
Raw Normal View History

/*
* This file is part of mpv.
*
* mpv is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* mpv is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with mpv. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stddef.h>
#include <string.h>
#include "config.h"
#include "common/common.h"
#include "common/msg.h"
#include "hwdec.h"
extern const struct gl_hwdec_driver gl_hwdec_vaegl;
extern const struct gl_hwdec_driver gl_hwdec_vaglx;
extern const struct gl_hwdec_driver gl_hwdec_videotoolbox;
extern const struct gl_hwdec_driver gl_hwdec_vdpau;
extern const struct gl_hwdec_driver gl_hwdec_dxva2egl;
extern const struct gl_hwdec_driver gl_hwdec_d3d11egl;
video: remove d3d11 video processor use from OpenGL interop We now have a video filter that uses the d3d11 video processor, so it makes no sense to have one in the VO interop code. The VO uses it for formats not directly supported by ANGLE (so the video data is converted to a RGB texture, which ANGLE can take in). Change this so that the video filter is automatically inserted if needed. Move the code that maps RGB surfaces to its own inteorp backend. Add a bunch of new image formats, which are used to enforce the new constraints, and to automatically insert the filter only when needed. The added vf mechanism to auto-insert the d3d11vpp filter is very dumb and primitive, and will work only for this specific purpose. The format negotiation mechanism in the filter chain is generally not very pretty, and mostly broken as well. (libavfilter has a different mechanism, and these mechanisms don't match well, so vf_lavfi uses some sort of hack. It only works because hwaccel and non-hwaccel formats are strictly separated.) The RGB interop is now only used with older ANGLE versions. The only reason I'm keeping it is because it's relatively isolated (uses only existing mechanisms and adds no new concepts), and because I want to be able to compare the behavior of the old code with the new one for testing. It will be removed eventually. If ANGLE has NV12 interop, P010 is now handled by converting to NV12 with the video processor, instead of converting it to RGB and using the old mechanism to import that as a texture.
2016-05-29 15:13:22 +00:00
extern const struct gl_hwdec_driver gl_hwdec_d3d11eglrgb;
extern const struct gl_hwdec_driver gl_hwdec_dxva2gldx;
extern const struct gl_hwdec_driver gl_hwdec_dxva2;
hwdec/opengl: Add support for CUDA and cuvid/NvDecode Nvidia's "NvDecode" API (up until recently called "cuvid" is a cross platform, but nvidia proprietary API that exposes their hardware video decoding capabilities. It is analogous to their DXVA or VDPAU support on Windows or Linux but without using platform specific API calls. As a rule, you'd rather use DXVA or VDPAU as these are more mature and well supported APIs, but on Linux, VDPAU is falling behind the hardware capabilities, and there's no sign that nvidia are making the investments to update it. Most concretely, this means that there is no VP8/9 or HEVC Main10 support in VDPAU. On the other hand, NvDecode does export vp8/9 and partial support for HEVC Main10 (more on that below). ffmpeg already has support in the form of the "cuvid" family of decoders. Due to the design of the API, it is best exposed as a full decoder rather than an hwaccel. As such, there are decoders like h264_cuvid, hevc_cuvid, etc. These decoders support two output paths today - in both cases, NV12 frames are returned, either in CUDA device memory or regular system memory. In the case of the system memory path, the decoders can be used as-is in mpv today with a command line like: mpv --vd=lavc:h264_cuvid foobar.mp4 Doing this will take advantage of hardware decoding, but the cost of the memcpy to system memory adds up, especially for high resolution video (4K etc). To avoid that, we need an hwdec that takes advantage of CUDA's OpenGL interop to copy from device memory into OpenGL textures. That is what this change implements. The process is relatively simple as only basic device context aquisition needs to be done by us - the CUDA buffer pool is managed by the decoder - thankfully. The hwdec looks a bit like the vdpau interop one - the hwdec maintains a single set of plane textures and each output frame is repeatedly mapped into these textures to pass on. The frames are always in NV12 format, at least until 10bit output supports emerges. The only slightly interesting part of the copying process is that CUDA works by associating PBOs, so we need to define these for each of the textures. TODO Items: * I need to add a download_image function for screenshots. This would do the same copy to system memory that the decoder's system memory output does. * There are items to investigate on the ffmpeg side. There appears to be a problem with timestamps for some content. Final note: I mentioned HEVC Main10. While there is no 10bit output support, NvDecode can return dithered 8bit NV12 so you can take advantage of the hardware acceleration. This particular mode requires compiling ffmpeg with a modified header (or possibly the CUDA 8 RC) and is not upstream in ffmpeg yet. Usage: You will need to specify vo=opengl and hwdec=cuda. Note that hwdec=auto will probably not work as it will try to use vdpau first. mpv --hwdec=cuda --vo=opengl foobar.mp4 If you want to use filters that require frames in system memory, just use the decoder directly without the hwdec, as documented above.
2016-09-04 22:23:55 +00:00
extern const struct gl_hwdec_driver gl_hwdec_cuda;
extern const struct gl_hwdec_driver gl_hwdec_rpi_overlay;
static const struct gl_hwdec_driver *const mpgl_hwdec_drivers[] = {
#if HAVE_VAAPI_EGL
&gl_hwdec_vaegl,
#endif
#if HAVE_VAAPI_GLX
&gl_hwdec_vaglx,
#endif
#if HAVE_VDPAU_GL_X11
&gl_hwdec_vdpau,
#endif
#if HAVE_VIDEOTOOLBOX_GL
&gl_hwdec_videotoolbox,
#endif
#if HAVE_D3D_HWACCEL
#if HAVE_EGL_ANGLE
&gl_hwdec_d3d11egl,
video: remove d3d11 video processor use from OpenGL interop We now have a video filter that uses the d3d11 video processor, so it makes no sense to have one in the VO interop code. The VO uses it for formats not directly supported by ANGLE (so the video data is converted to a RGB texture, which ANGLE can take in). Change this so that the video filter is automatically inserted if needed. Move the code that maps RGB surfaces to its own inteorp backend. Add a bunch of new image formats, which are used to enforce the new constraints, and to automatically insert the filter only when needed. The added vf mechanism to auto-insert the d3d11vpp filter is very dumb and primitive, and will work only for this specific purpose. The format negotiation mechanism in the filter chain is generally not very pretty, and mostly broken as well. (libavfilter has a different mechanism, and these mechanisms don't match well, so vf_lavfi uses some sort of hack. It only works because hwaccel and non-hwaccel formats are strictly separated.) The RGB interop is now only used with older ANGLE versions. The only reason I'm keeping it is because it's relatively isolated (uses only existing mechanisms and adds no new concepts), and because I want to be able to compare the behavior of the old code with the new one for testing. It will be removed eventually. If ANGLE has NV12 interop, P010 is now handled by converting to NV12 with the video processor, instead of converting it to RGB and using the old mechanism to import that as a texture.
2016-05-29 15:13:22 +00:00
&gl_hwdec_d3d11eglrgb,
&gl_hwdec_dxva2egl,
#endif
#if HAVE_GL_DXINTEROP
&gl_hwdec_dxva2gldx,
#endif
&gl_hwdec_dxva2,
hwdec/opengl: Add support for CUDA and cuvid/NvDecode Nvidia's "NvDecode" API (up until recently called "cuvid" is a cross platform, but nvidia proprietary API that exposes their hardware video decoding capabilities. It is analogous to their DXVA or VDPAU support on Windows or Linux but without using platform specific API calls. As a rule, you'd rather use DXVA or VDPAU as these are more mature and well supported APIs, but on Linux, VDPAU is falling behind the hardware capabilities, and there's no sign that nvidia are making the investments to update it. Most concretely, this means that there is no VP8/9 or HEVC Main10 support in VDPAU. On the other hand, NvDecode does export vp8/9 and partial support for HEVC Main10 (more on that below). ffmpeg already has support in the form of the "cuvid" family of decoders. Due to the design of the API, it is best exposed as a full decoder rather than an hwaccel. As such, there are decoders like h264_cuvid, hevc_cuvid, etc. These decoders support two output paths today - in both cases, NV12 frames are returned, either in CUDA device memory or regular system memory. In the case of the system memory path, the decoders can be used as-is in mpv today with a command line like: mpv --vd=lavc:h264_cuvid foobar.mp4 Doing this will take advantage of hardware decoding, but the cost of the memcpy to system memory adds up, especially for high resolution video (4K etc). To avoid that, we need an hwdec that takes advantage of CUDA's OpenGL interop to copy from device memory into OpenGL textures. That is what this change implements. The process is relatively simple as only basic device context aquisition needs to be done by us - the CUDA buffer pool is managed by the decoder - thankfully. The hwdec looks a bit like the vdpau interop one - the hwdec maintains a single set of plane textures and each output frame is repeatedly mapped into these textures to pass on. The frames are always in NV12 format, at least until 10bit output supports emerges. The only slightly interesting part of the copying process is that CUDA works by associating PBOs, so we need to define these for each of the textures. TODO Items: * I need to add a download_image function for screenshots. This would do the same copy to system memory that the decoder's system memory output does. * There are items to investigate on the ffmpeg side. There appears to be a problem with timestamps for some content. Final note: I mentioned HEVC Main10. While there is no 10bit output support, NvDecode can return dithered 8bit NV12 so you can take advantage of the hardware acceleration. This particular mode requires compiling ffmpeg with a modified header (or possibly the CUDA 8 RC) and is not upstream in ffmpeg yet. Usage: You will need to specify vo=opengl and hwdec=cuda. Note that hwdec=auto will probably not work as it will try to use vdpau first. mpv --hwdec=cuda --vo=opengl foobar.mp4 If you want to use filters that require frames in system memory, just use the decoder directly without the hwdec, as documented above.
2016-09-04 22:23:55 +00:00
#endif
#if HAVE_CUDA_GL
&gl_hwdec_cuda,
#endif
#if HAVE_RPI
&gl_hwdec_rpi_overlay,
#endif
NULL
};
static struct gl_hwdec *load_hwdec_driver(struct mp_log *log, GL *gl,
struct mpv_global *global,
struct mp_hwdec_devices *devs,
const struct gl_hwdec_driver *drv,
bool is_auto)
{
struct gl_hwdec *hwdec = talloc(NULL, struct gl_hwdec);
*hwdec = (struct gl_hwdec) {
.driver = drv,
.log = mp_log_new(hwdec, log, drv->name),
.global = global,
.gl = gl,
.devs = devs,
.probing = is_auto,
};
mp_verbose(log, "Loading hwdec driver '%s'\n", drv->name);
if (hwdec->driver->create(hwdec) < 0) {
talloc_free(hwdec);
mp_verbose(log, "Loading failed.\n");
return NULL;
}
return hwdec;
}
struct gl_hwdec *gl_hwdec_load_api(struct mp_log *log, GL *gl,
struct mpv_global *g,
struct mp_hwdec_devices *devs,
enum hwdec_type api)
{
bool is_auto = HWDEC_IS_AUTO(api);
for (int n = 0; mpgl_hwdec_drivers[n]; n++) {
const struct gl_hwdec_driver *drv = mpgl_hwdec_drivers[n];
if (is_auto || api == drv->api) {
struct gl_hwdec *r = load_hwdec_driver(log, gl, g, devs, drv, is_auto);
if (r)
return r;
}
}
return NULL;
}
void gl_hwdec_uninit(struct gl_hwdec *hwdec)
{
if (hwdec)
hwdec->driver->destroy(hwdec);
talloc_free(hwdec);
}