2013-11-23 20:26:31 +00:00
|
|
|
#ifndef MP_HWDEC_H_
|
|
|
|
#define MP_HWDEC_H_
|
|
|
|
|
2015-07-07 12:25:37 +00:00
|
|
|
#include "options/m_option.h"
|
|
|
|
|
2015-01-22 14:32:23 +00:00
|
|
|
struct mp_image_pool;
|
|
|
|
|
2015-07-07 12:25:37 +00:00
|
|
|
// keep in sync with --hwdec option (see mp_hwdec_names)
|
2015-02-02 21:43:05 +00:00
|
|
|
enum hwdec_type {
|
|
|
|
HWDEC_NONE = 0,
|
2016-04-27 11:37:55 +00:00
|
|
|
HWDEC_AUTO,
|
2016-05-11 14:18:58 +00:00
|
|
|
HWDEC_AUTO_COPY,
|
2016-04-27 11:37:55 +00:00
|
|
|
HWDEC_VDPAU,
|
2016-10-20 14:43:02 +00:00
|
|
|
HWDEC_VDPAU_COPY,
|
2016-04-27 11:37:55 +00:00
|
|
|
HWDEC_VIDEOTOOLBOX,
|
2016-07-14 18:14:22 +00:00
|
|
|
HWDEC_VIDEOTOOLBOX_COPY,
|
2016-04-27 11:37:55 +00:00
|
|
|
HWDEC_VAAPI,
|
|
|
|
HWDEC_VAAPI_COPY,
|
|
|
|
HWDEC_DXVA2,
|
|
|
|
HWDEC_DXVA2_COPY,
|
2016-04-27 11:49:47 +00:00
|
|
|
HWDEC_D3D11VA,
|
2016-04-27 11:37:55 +00:00
|
|
|
HWDEC_D3D11VA_COPY,
|
|
|
|
HWDEC_RPI,
|
2016-09-30 09:35:25 +00:00
|
|
|
HWDEC_RPI_COPY,
|
2017-07-06 17:54:40 +00:00
|
|
|
HWDEC_MEDIACODEC,
|
2017-07-06 17:40:40 +00:00
|
|
|
HWDEC_MEDIACODEC_COPY,
|
hwdec/opengl: Add support for CUDA and cuvid/NvDecode
Nvidia's "NvDecode" API (up until recently called "cuvid" is a cross
platform, but nvidia proprietary API that exposes their hardware
video decoding capabilities. It is analogous to their DXVA or VDPAU
support on Windows or Linux but without using platform specific API
calls.
As a rule, you'd rather use DXVA or VDPAU as these are more mature
and well supported APIs, but on Linux, VDPAU is falling behind the
hardware capabilities, and there's no sign that nvidia are making
the investments to update it.
Most concretely, this means that there is no VP8/9 or HEVC Main10
support in VDPAU. On the other hand, NvDecode does export vp8/9 and
partial support for HEVC Main10 (more on that below).
ffmpeg already has support in the form of the "cuvid" family of
decoders. Due to the design of the API, it is best exposed as a full
decoder rather than an hwaccel. As such, there are decoders like
h264_cuvid, hevc_cuvid, etc.
These decoders support two output paths today - in both cases, NV12
frames are returned, either in CUDA device memory or regular system
memory.
In the case of the system memory path, the decoders can be used
as-is in mpv today with a command line like:
mpv --vd=lavc:h264_cuvid foobar.mp4
Doing this will take advantage of hardware decoding, but the cost
of the memcpy to system memory adds up, especially for high
resolution video (4K etc).
To avoid that, we need an hwdec that takes advantage of CUDA's
OpenGL interop to copy from device memory into OpenGL textures.
That is what this change implements.
The process is relatively simple as only basic device context
aquisition needs to be done by us - the CUDA buffer pool is managed
by the decoder - thankfully.
The hwdec looks a bit like the vdpau interop one - the hwdec
maintains a single set of plane textures and each output frame
is repeatedly mapped into these textures to pass on.
The frames are always in NV12 format, at least until 10bit output
supports emerges.
The only slightly interesting part of the copying process is that
CUDA works by associating PBOs, so we need to define these for
each of the textures.
TODO Items:
* I need to add a download_image function for screenshots. This
would do the same copy to system memory that the decoder's
system memory output does.
* There are items to investigate on the ffmpeg side. There appears
to be a problem with timestamps for some content.
Final note: I mentioned HEVC Main10. While there is no 10bit output
support, NvDecode can return dithered 8bit NV12 so you can take
advantage of the hardware acceleration.
This particular mode requires compiling ffmpeg with a modified
header (or possibly the CUDA 8 RC) and is not upstream in ffmpeg
yet.
Usage:
You will need to specify vo=opengl and hwdec=cuda.
Note that hwdec=auto will probably not work as it will try to use
vdpau first.
mpv --hwdec=cuda --vo=opengl foobar.mp4
If you want to use filters that require frames in system memory,
just use the decoder directly without the hwdec, as documented
above.
2016-09-04 22:23:55 +00:00
|
|
|
HWDEC_CUDA,
|
2016-09-11 03:12:27 +00:00
|
|
|
HWDEC_CUDA_COPY,
|
2016-10-09 16:18:14 +00:00
|
|
|
HWDEC_CRYSTALHD,
|
2015-02-02 21:43:05 +00:00
|
|
|
};
|
|
|
|
|
2016-05-11 14:18:58 +00:00
|
|
|
#define HWDEC_IS_AUTO(x) ((x) == HWDEC_AUTO || (x) == HWDEC_AUTO_COPY)
|
|
|
|
|
2015-07-07 12:25:37 +00:00
|
|
|
// hwdec_type names (options.c)
|
|
|
|
extern const struct m_opt_choice_alternatives mp_hwdec_names[];
|
|
|
|
|
2015-01-22 14:32:23 +00:00
|
|
|
struct mp_hwdec_ctx {
|
2016-05-11 14:18:58 +00:00
|
|
|
enum hwdec_type type; // (never HWDEC_NONE or HWDEC_IS_AUTO)
|
2016-05-04 14:55:26 +00:00
|
|
|
const char *driver_name; // NULL if unknown/not loaded
|
2015-02-02 21:43:05 +00:00
|
|
|
|
2016-05-09 17:42:03 +00:00
|
|
|
// This is never NULL. Its meaning depends on the .type field:
|
2017-03-23 10:16:02 +00:00
|
|
|
// HWDEC_VDPAU: struct mp_vdpau_ctx*
|
2017-02-17 12:54:17 +00:00
|
|
|
// HWDEC_VIDEOTOOLBOX: non-NULL dummy pointer
|
2016-05-09 17:42:03 +00:00
|
|
|
// HWDEC_VAAPI: struct mp_vaapi_ctx*
|
|
|
|
// HWDEC_D3D11VA: ID3D11Device*
|
|
|
|
// HWDEC_DXVA2: IDirect3DDevice9*
|
2016-10-08 23:51:15 +00:00
|
|
|
// HWDEC_CUDA: CUcontext*
|
2016-05-09 17:42:03 +00:00
|
|
|
void *ctx;
|
2015-01-22 16:47:14 +00:00
|
|
|
|
2017-01-16 14:31:54 +00:00
|
|
|
// libavutil-wrapped context, if available.
|
2017-05-24 12:32:23 +00:00
|
|
|
struct AVBufferRef *av_device_ref; // AVHWDeviceContext*
|
2017-01-16 14:31:54 +00:00
|
|
|
|
vo_opengl, vaapi: properly probe 10 bit rendering support
There are going to be users who have a Mesa installation which do not
support 10 bit, but a GPU which can decode to 10 bit. So it's probably
better not to hardcode whether it is supported.
Introduce a more general way to signal supported formats from renderer
to decoder. Obviously this is imperfect, because it still isn't part of
proper format negotation (for example, what if there's a vavpp filter,
which accepts anything). Still slightly better than before.
I don't know any way to probe for vaapi dmabuf/EGL dmabuf support
properly (in particular testing specific formats, not just general
availability). So we stay with the current approach and try to create
and map dummy surfaces on init to probe for support. Overdo it and check
all formats that AVHWFramesConstraints reports, instead of only NV12 and
P010 surfaces.
Since we can support unknown formats now, add explicitly checks to the
EGL/dmabuf mapper code to reject unsupported formats. I also noticed
that libavutil signals support for RGB0/BGR0, but couldn't get it to
work. Remove the DRM formats that are unused/didn't work the way I tried
to use them.
With this, 10 bit decoding + rendering should work, provided you have
a capable CPU and a patched Mesa. The required Mesa patch adds support
for the R16 and GR32 formats. It was sent by a Kodi developer to the
Mesa developer mailing list and was not accepted yet.
2017-01-13 12:36:02 +00:00
|
|
|
// List of IMGFMT_s, terminated with 0. NULL if N/A.
|
|
|
|
int *supported_formats;
|
|
|
|
|
2017-02-20 07:39:55 +00:00
|
|
|
// Hint to generic code: it's using a wrapper API
|
|
|
|
bool emulated;
|
|
|
|
|
vdpau: crappy hack to allow initializing hw decoding after preemption
If vo_opengl is used, and vo_opengl already created the vdpau interop
(for whatever reasons), and then preemption happens, and then you try to
enable hw decoding, it failed. The reason was that preemption recovery
is not run at any point before libavcodec accesses the vdpau device.
The actual impact was that with libmpv + opengl-cb use, hardware
decoding was permanently broken after display mode switching (something
that caused the display to get preempted at least with older drivers).
With mpv CLI, you can for example enable hw decoding during playback,
then disable it, VT switch to console, switch back to X, and try to
enable hw decoding again.
This is mostly because libav* does not deal with preemption, and NVIDIA
driver preemption behavior being horrible garbage. In addition to being
misdesigned API, the preemption callback is not called before you try to
access vdpau API, and then only with _some_ accesses.
In summary, the preemption callback was never called, neither before nor
after libavcodec tried to init the decoder. So we have to get
mp_vdpau_handle_preemption() called before libavcodec accesses it. This
in turn will do a dummy API access which usually triggers the preemption
callback immediately (with NVIDIA's drivers).
In addition, we have to update the AVHWDeviceContext's device. In theory
it could change (in practice it usually seems to use handle "0").
Creating a new device would cause chaos, as we don't have a concept of
switching the device context on the fly. So we simply update it
directly. I'm fairly sure this violates the libav* API, but it's the
best we can do.
2017-05-19 13:24:38 +00:00
|
|
|
// Optional. Crap for vdpau. Makes sure preemption recovery is run if needed.
|
|
|
|
void (*restore_device)(struct mp_hwdec_ctx *ctx);
|
|
|
|
|
2017-02-20 07:39:55 +00:00
|
|
|
// Optional. Do not set for VO-bound devices.
|
|
|
|
void (*destroy)(struct mp_hwdec_ctx *ctx);
|
2015-01-22 14:32:23 +00:00
|
|
|
};
|
|
|
|
|
2016-05-09 17:42:03 +00:00
|
|
|
// Used to communicate hardware decoder device handles from VO to video decoder.
|
|
|
|
struct mp_hwdec_devices;
|
|
|
|
|
|
|
|
struct mp_hwdec_devices *hwdec_devices_create(void);
|
|
|
|
void hwdec_devices_destroy(struct mp_hwdec_devices *devs);
|
|
|
|
|
|
|
|
// Return the device context for the given API type. Returns NULL if none
|
|
|
|
// available. Logically, the returned pointer remains valid until VO
|
|
|
|
// uninitialization is started (all users of it must be uninitialized before).
|
|
|
|
// hwdec_devices_request() may be used before this to lazily load devices.
|
|
|
|
struct mp_hwdec_ctx *hwdec_devices_get(struct mp_hwdec_devices *devs,
|
|
|
|
enum hwdec_type type);
|
|
|
|
|
|
|
|
// For code which still strictly assumes there is 1 (or none) device.
|
|
|
|
struct mp_hwdec_ctx *hwdec_devices_get_first(struct mp_hwdec_devices *devs);
|
|
|
|
|
|
|
|
// Add this to the list of internal devices. Adding the same pointer twice must
|
|
|
|
// be avoided.
|
|
|
|
void hwdec_devices_add(struct mp_hwdec_devices *devs, struct mp_hwdec_ctx *ctx);
|
|
|
|
|
|
|
|
// Remove this from the list of internal devices. Idempotent/ignores entries
|
|
|
|
// not added yet.
|
|
|
|
void hwdec_devices_remove(struct mp_hwdec_devices *devs, struct mp_hwdec_ctx *ctx);
|
|
|
|
|
|
|
|
// Can be used to enable lazy loading of an API with hwdec_devices_request().
|
|
|
|
// If used at all, this must be set/unset during initialization/uninitialization,
|
|
|
|
// as concurrent use with hwdec_devices_request() is a race condition.
|
|
|
|
void hwdec_devices_set_loader(struct mp_hwdec_devices *devs,
|
|
|
|
void (*load_api)(void *ctx, enum hwdec_type type), void *load_api_ctx);
|
|
|
|
|
|
|
|
// Cause VO to lazily load the requested device, and will block until this is
|
|
|
|
// done (even if not available).
|
|
|
|
void hwdec_devices_request(struct mp_hwdec_devices *devs, enum hwdec_type type);
|
|
|
|
|
|
|
|
// Convenience function:
|
|
|
|
// - return NULL if devs==NULL
|
|
|
|
// - call hwdec_devices_request(devs, type)
|
|
|
|
// - call hwdec_devices_get(devs, type)
|
|
|
|
// - then return the mp_hwdec_ctx.ctx field
|
|
|
|
void *hwdec_devices_load(struct mp_hwdec_devices *devs, enum hwdec_type type);
|
2013-11-23 20:26:31 +00:00
|
|
|
|
2017-10-16 15:06:01 +00:00
|
|
|
struct mp_image;
|
video: add mp_image_params.hw_flags and add an example
It seems this will be useful for Rokchip DRM hwcontext integration.
DRM hwcontexts have additional internal structure which can be different
depending on the decoder, and which is not part of the generic hwcontext
API. Rockchip has 1 layer, which EGL interop happens to translate to a
RGB texture, while VAAPI (mapped as DRM hwcontext) will use multiple
layers. Both will use sw_format=nv12, and thus are indistinguishable on
the mp_image_params level. But this is needed to initialize the EGL
mapping and the vo_gpu video renderer correctly.
We hope that the layer count is enough to tell whether EGL will
translate the data to a RGB texture (vs. 2 texture resembling raw nv12
data). For that we introduce MP_IMAGE_HW_FLAG_OPAQUE.
This commit adds the flag, infrastructure to set it, and an "example"
for D3D11.
The D3D11 addition is quite useless at this point. But later we want to
get rid of d3d11_update_image_attribs() anyway, while we still need a
way to force d3d11vpp filter insertion, so maybe it has some
justification (who knows). In any case it makes testing this easier.
Obviously it also adds some basic support for triggering the opaque
format for decoding, which will use a driver-specific format, but which
is not supported in shaders. The opaque flag is not used to determine
whether d3d11vpp needs to be inserted, though.
2017-10-16 12:44:59 +00:00
|
|
|
|
|
|
|
// Per AV_HWDEVICE_TYPE_* functions, queryable via hwdec_get_hwcontext_fns().
|
|
|
|
// For now, all entries are strictly optional.
|
|
|
|
struct hwcontext_fns {
|
|
|
|
int av_hwdevice_type;
|
2017-10-16 14:56:24 +00:00
|
|
|
// Set any mp_image fields that require hwcontext specific code, such as
|
|
|
|
// fields or flags not present in AVFrame or AVHWFramesContext in a
|
|
|
|
// portable way. This is called directly after img is converted from an
|
|
|
|
// AVFrame, with all other fields already set. img.hwctx will be set, and
|
|
|
|
// use the correct AV_HWDEVICE_TYPE_.
|
|
|
|
void (*complete_image_params)(struct mp_image *img);
|
video: add mp_image_params.hw_flags and add an example
It seems this will be useful for Rokchip DRM hwcontext integration.
DRM hwcontexts have additional internal structure which can be different
depending on the decoder, and which is not part of the generic hwcontext
API. Rockchip has 1 layer, which EGL interop happens to translate to a
RGB texture, while VAAPI (mapped as DRM hwcontext) will use multiple
layers. Both will use sw_format=nv12, and thus are indistinguishable on
the mp_image_params level. But this is needed to initialize the EGL
mapping and the vo_gpu video renderer correctly.
We hope that the layer count is enough to tell whether EGL will
translate the data to a RGB texture (vs. 2 texture resembling raw nv12
data). For that we introduce MP_IMAGE_HW_FLAG_OPAQUE.
This commit adds the flag, infrastructure to set it, and an "example"
for D3D11.
The D3D11 addition is quite useless at this point. But later we want to
get rid of d3d11_update_image_attribs() anyway, while we still need a
way to force d3d11vpp filter insertion, so maybe it has some
justification (who knows). In any case it makes testing this easier.
Obviously it also adds some basic support for triggering the opaque
format for decoding, which will use a driver-specific format, but which
is not supported in shaders. The opaque flag is not used to determine
whether d3d11vpp needs to be inserted, though.
2017-10-16 12:44:59 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
// The parameter is of type enum AVHWDeviceType (as in int to avoid extensive
|
|
|
|
// recursive includes). May return NULL for unknown device types.
|
|
|
|
const struct hwcontext_fns *hwdec_get_hwcontext_fns(int av_hwdevice_type);
|
|
|
|
|
2013-11-23 20:26:31 +00:00
|
|
|
#endif
|