mpv/video/hwdec.h

107 lines
3.7 KiB
C
Raw Normal View History

#ifndef MP_HWDEC_H_
#define MP_HWDEC_H_
#include "options/m_option.h"
struct mp_image_pool;
// keep in sync with --hwdec option (see mp_hwdec_names)
enum hwdec_type {
HWDEC_NONE = 0,
HWDEC_AUTO,
HWDEC_AUTO_COPY,
HWDEC_VDPAU,
HWDEC_VDPAU_COPY,
HWDEC_VIDEOTOOLBOX,
HWDEC_VIDEOTOOLBOX_COPY,
HWDEC_VAAPI,
HWDEC_VAAPI_COPY,
HWDEC_DXVA2,
HWDEC_DXVA2_COPY,
HWDEC_D3D11VA,
HWDEC_D3D11VA_COPY,
HWDEC_RPI,
HWDEC_RPI_COPY,
HWDEC_MEDIACODEC,
hwdec/opengl: Add support for CUDA and cuvid/NvDecode Nvidia's "NvDecode" API (up until recently called "cuvid" is a cross platform, but nvidia proprietary API that exposes their hardware video decoding capabilities. It is analogous to their DXVA or VDPAU support on Windows or Linux but without using platform specific API calls. As a rule, you'd rather use DXVA or VDPAU as these are more mature and well supported APIs, but on Linux, VDPAU is falling behind the hardware capabilities, and there's no sign that nvidia are making the investments to update it. Most concretely, this means that there is no VP8/9 or HEVC Main10 support in VDPAU. On the other hand, NvDecode does export vp8/9 and partial support for HEVC Main10 (more on that below). ffmpeg already has support in the form of the "cuvid" family of decoders. Due to the design of the API, it is best exposed as a full decoder rather than an hwaccel. As such, there are decoders like h264_cuvid, hevc_cuvid, etc. These decoders support two output paths today - in both cases, NV12 frames are returned, either in CUDA device memory or regular system memory. In the case of the system memory path, the decoders can be used as-is in mpv today with a command line like: mpv --vd=lavc:h264_cuvid foobar.mp4 Doing this will take advantage of hardware decoding, but the cost of the memcpy to system memory adds up, especially for high resolution video (4K etc). To avoid that, we need an hwdec that takes advantage of CUDA's OpenGL interop to copy from device memory into OpenGL textures. That is what this change implements. The process is relatively simple as only basic device context aquisition needs to be done by us - the CUDA buffer pool is managed by the decoder - thankfully. The hwdec looks a bit like the vdpau interop one - the hwdec maintains a single set of plane textures and each output frame is repeatedly mapped into these textures to pass on. The frames are always in NV12 format, at least until 10bit output supports emerges. The only slightly interesting part of the copying process is that CUDA works by associating PBOs, so we need to define these for each of the textures. TODO Items: * I need to add a download_image function for screenshots. This would do the same copy to system memory that the decoder's system memory output does. * There are items to investigate on the ffmpeg side. There appears to be a problem with timestamps for some content. Final note: I mentioned HEVC Main10. While there is no 10bit output support, NvDecode can return dithered 8bit NV12 so you can take advantage of the hardware acceleration. This particular mode requires compiling ffmpeg with a modified header (or possibly the CUDA 8 RC) and is not upstream in ffmpeg yet. Usage: You will need to specify vo=opengl and hwdec=cuda. Note that hwdec=auto will probably not work as it will try to use vdpau first. mpv --hwdec=cuda --vo=opengl foobar.mp4 If you want to use filters that require frames in system memory, just use the decoder directly without the hwdec, as documented above.
2016-09-04 22:23:55 +00:00
HWDEC_CUDA,
HWDEC_CUDA_COPY,
HWDEC_CRYSTALHD,
};
#define HWDEC_IS_AUTO(x) ((x) == HWDEC_AUTO || (x) == HWDEC_AUTO_COPY)
// hwdec_type names (options.c)
extern const struct m_opt_choice_alternatives mp_hwdec_names[];
struct mp_hwdec_ctx {
enum hwdec_type type; // (never HWDEC_NONE or HWDEC_IS_AUTO)
const char *driver_name; // NULL if unknown/not loaded
// This is never NULL. Its meaning depends on the .type field:
// HWDEC_VDPAU: struct mp_vaapi_ctx*
// HWDEC_VIDEOTOOLBOX: struct mp_vt_ctx*
// HWDEC_VAAPI: struct mp_vaapi_ctx*
// HWDEC_D3D11VA: ID3D11Device*
// HWDEC_DXVA2: IDirect3DDevice9*
// HWDEC_DXVA2_COPY: IDirect3DDevice9*
// HWDEC_CUDA: CUcontext*
void *ctx;
// Optional.
// Allocates a software image from the pool, downloads the hw image from
// mpi, and returns it.
// pool can be NULL (then just use straight allocation).
// Return NULL on error or if mpi has the wrong format.
struct mp_image *(*download_image)(struct mp_hwdec_ctx *ctx,
struct mp_image *mpi,
struct mp_image_pool *swpool);
};
struct mp_vt_ctx {
void *priv;
uint32_t (*get_vt_fmt)(struct mp_vt_ctx *ctx);
};
// Used to communicate hardware decoder device handles from VO to video decoder.
struct mp_hwdec_devices;
struct mp_hwdec_devices *hwdec_devices_create(void);
void hwdec_devices_destroy(struct mp_hwdec_devices *devs);
// Return the device context for the given API type. Returns NULL if none
// available. Logically, the returned pointer remains valid until VO
// uninitialization is started (all users of it must be uninitialized before).
// hwdec_devices_request() may be used before this to lazily load devices.
struct mp_hwdec_ctx *hwdec_devices_get(struct mp_hwdec_devices *devs,
enum hwdec_type type);
// For code which still strictly assumes there is 1 (or none) device.
struct mp_hwdec_ctx *hwdec_devices_get_first(struct mp_hwdec_devices *devs);
// Add this to the list of internal devices. Adding the same pointer twice must
// be avoided.
void hwdec_devices_add(struct mp_hwdec_devices *devs, struct mp_hwdec_ctx *ctx);
// Remove this from the list of internal devices. Idempotent/ignores entries
// not added yet.
void hwdec_devices_remove(struct mp_hwdec_devices *devs, struct mp_hwdec_ctx *ctx);
// Can be used to enable lazy loading of an API with hwdec_devices_request().
// If used at all, this must be set/unset during initialization/uninitialization,
// as concurrent use with hwdec_devices_request() is a race condition.
void hwdec_devices_set_loader(struct mp_hwdec_devices *devs,
void (*load_api)(void *ctx, enum hwdec_type type), void *load_api_ctx);
// Cause VO to lazily load the requested device, and will block until this is
// done (even if not available).
void hwdec_devices_request(struct mp_hwdec_devices *devs, enum hwdec_type type);
// Convenience function:
// - return NULL if devs==NULL
// - call hwdec_devices_request(devs, type)
// - call hwdec_devices_get(devs, type)
// - then return the mp_hwdec_ctx.ctx field
void *hwdec_devices_load(struct mp_hwdec_devices *devs, enum hwdec_type type);
#endif