mirror of
https://github.com/mpv-player/mpv
synced 2025-02-17 21:27:08 +00:00
vo_opengl: hwdec_cuda: Support separate decode and display devices
In a multi GPU scenario, it may be desirable to use different GPUs for decode and display responsibilities. For example, if a secondary GPU has better video decoding capabilities. In such a scenario, we need to initialise a separate context for each GPU, and use the display context in hwdec_cuda, while passing the decode context to avcodec. Once that's done, the actually hand-off between the two GPUs is transparent to us (It happens during the cuMemcpy2D operation which copies the decoded frame from a cuda buffer to the OpenGL texture). In the end, the bulk of the work is around introducing a new configuration option to specify the decode device.
This commit is contained in:
parent
7e889e5e63
commit
7424651b96
@ -4802,6 +4802,16 @@ The following video options are currently all specific to ``--vo=opengl`` and
|
||||
This option might be silently removed in the future, if ANGLE fixes shader
|
||||
compilation speed.
|
||||
|
||||
``--cuda-decode-device=<auto|0..>``
|
||||
Choose the GPU device used for decoding when using the ``cuda`` hwdec.
|
||||
|
||||
By default, the device that is being used to provide OpenGL output will
|
||||
also be used for decoding (and in the vast majority of cases, only one
|
||||
GPU will be present).
|
||||
|
||||
Note that when using the ``cuda-copy`` hwdec, a different option must be
|
||||
passed: ``--vd-lavc-o=gpu=<0..>``.
|
||||
|
||||
Miscellaneous
|
||||
-------------
|
||||
|
||||
|
@ -728,6 +728,11 @@ const m_option_t mp_opts[] = {
|
||||
({"no", -1}, {"auto", 0}, {"windowed", 1}, {"yes", 2})),
|
||||
#endif
|
||||
|
||||
#if HAVE_CUDA_HWACCEL
|
||||
OPT_CHOICE_OR_INT("cuda-decode-device", cuda_device, 0,
|
||||
0, INT_MAX, ({"auto", -1})),
|
||||
#endif
|
||||
|
||||
#if HAVE_ENCODING
|
||||
OPT_SUBSTRUCT("", encode_opts, encode_config, 0),
|
||||
#endif
|
||||
@ -973,6 +978,8 @@ const struct MPOpts mp_default_opts = {
|
||||
"Performer", "Title", "Track", "icy-title", "service_name",
|
||||
NULL
|
||||
},
|
||||
|
||||
.cuda_device = -1,
|
||||
};
|
||||
|
||||
#endif /* MPLAYER_CFG_MPLAYER_H */
|
||||
|
@ -336,6 +336,8 @@ typedef struct MPOpts {
|
||||
struct angle_opts *angle_opts;
|
||||
struct cocoa_opts *cocoa_opts;
|
||||
struct dvd_opts *dvd_opts;
|
||||
|
||||
int cuda_device;
|
||||
} MPOpts;
|
||||
|
||||
struct dvd_opts {
|
||||
|
@ -94,6 +94,7 @@ typedef CUresult CUDAAPI tcuCtxCreate_v2(CUcontext *pctx, unsigned int flags, CU
|
||||
typedef CUresult CUDAAPI tcuCtxPushCurrent_v2(CUcontext *pctx);
|
||||
typedef CUresult CUDAAPI tcuCtxPopCurrent_v2(CUcontext *pctx);
|
||||
typedef CUresult CUDAAPI tcuCtxDestroy_v2(CUcontext ctx);
|
||||
typedef CUresult CUDAAPI tcuDeviceGet(CUdevice *pdevice, int ordinal);
|
||||
typedef CUresult CUDAAPI tcuMemcpy2D_v2(const CUDA_MEMCPY2D *pcopy);
|
||||
typedef CUresult CUDAAPI tcuGetErrorName(CUresult error, const char** pstr);
|
||||
typedef CUresult CUDAAPI tcuGetErrorString(CUresult error, const char** pstr);
|
||||
@ -110,6 +111,7 @@ typedef CUresult CUDAAPI tcuGraphicsSubResourceGetMappedArray(CUarray* pArray, C
|
||||
FN(cuCtxPushCurrent_v2, tcuCtxPushCurrent_v2) \
|
||||
FN(cuCtxPopCurrent_v2, tcuCtxPopCurrent_v2) \
|
||||
FN(cuCtxDestroy_v2, tcuCtxDestroy_v2) \
|
||||
FN(cuDeviceGet, tcuDeviceGet) \
|
||||
FN(cuMemcpy2D_v2, tcuMemcpy2D_v2) \
|
||||
FN(cuGetErrorName, tcuGetErrorName) \
|
||||
FN(cuGetErrorString, tcuGetErrorString) \
|
||||
@ -130,6 +132,7 @@ CUDA_FNS(CUDA_EXT_DECL)
|
||||
#define cuCtxPushCurrent mpv_cuCtxPushCurrent_v2
|
||||
#define cuCtxPopCurrent mpv_cuCtxPopCurrent_v2
|
||||
#define cuCtxDestroy mpv_cuCtxDestroy_v2
|
||||
#define cuDeviceGet mpv_cuDeviceGet
|
||||
#define cuMemcpy2D mpv_cuMemcpy2D_v2
|
||||
#define cuGetErrorName mpv_cuGetErrorName
|
||||
#define cuGetErrorString mpv_cuGetErrorString
|
||||
|
@ -34,6 +34,7 @@
|
||||
|
||||
#include "formats.h"
|
||||
#include "hwdec.h"
|
||||
#include "options/m_config.h"
|
||||
#include "video.h"
|
||||
|
||||
struct priv {
|
||||
@ -44,7 +45,8 @@ struct priv {
|
||||
CUarray cu_array[4];
|
||||
int plane_bytes[4];
|
||||
|
||||
CUcontext cuda_ctx;
|
||||
CUcontext display_ctx;
|
||||
CUcontext decode_ctx;
|
||||
};
|
||||
|
||||
static int check_cu(struct gl_hwdec *hw, CUresult err, const char *func)
|
||||
@ -72,8 +74,7 @@ static int check_cu(struct gl_hwdec *hw, CUresult err, const char *func)
|
||||
|
||||
static int cuda_create(struct gl_hwdec *hw)
|
||||
{
|
||||
CUdevice device;
|
||||
CUcontext cuda_ctx = NULL;
|
||||
CUdevice display_dev;
|
||||
AVBufferRef *hw_device_ctx = NULL;
|
||||
CUcontext dummy;
|
||||
unsigned int device_count;
|
||||
@ -97,16 +98,43 @@ static int cuda_create(struct gl_hwdec *hw)
|
||||
if (ret < 0)
|
||||
goto error;
|
||||
|
||||
ret = CHECK_CU(cuGLGetDevices(&device_count, &device, 1,
|
||||
// Allocate display context
|
||||
ret = CHECK_CU(cuGLGetDevices(&device_count, &display_dev, 1,
|
||||
CU_GL_DEVICE_LIST_ALL));
|
||||
if (ret < 0)
|
||||
goto error;
|
||||
|
||||
ret = CHECK_CU(cuCtxCreate(&cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, device));
|
||||
ret = CHECK_CU(cuCtxCreate(&p->display_ctx, CU_CTX_SCHED_BLOCKING_SYNC,
|
||||
display_dev));
|
||||
if (ret < 0)
|
||||
goto error;
|
||||
|
||||
p->cuda_ctx = cuda_ctx;
|
||||
p->decode_ctx = p->display_ctx;
|
||||
|
||||
int decode_dev_idx = -1;
|
||||
mp_read_option_raw(hw->global, "cuda-decode-device", &m_option_type_choice,
|
||||
&decode_dev_idx);
|
||||
|
||||
if (decode_dev_idx > -1) {
|
||||
CUdevice decode_dev;
|
||||
ret = CHECK_CU(cuDeviceGet(&decode_dev, decode_dev_idx));
|
||||
if (ret < 0)
|
||||
goto error;
|
||||
|
||||
if (decode_dev != display_dev) {
|
||||
MP_INFO(hw, "Using separate decoder and display devices\n");
|
||||
|
||||
// Pop the display context. We won't use it again during init()
|
||||
ret = CHECK_CU(cuCtxPopCurrent(&dummy));
|
||||
if (ret < 0)
|
||||
goto error;
|
||||
|
||||
ret = CHECK_CU(cuCtxCreate(&p->decode_ctx, CU_CTX_SCHED_BLOCKING_SYNC,
|
||||
decode_dev));
|
||||
if (ret < 0)
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
hw_device_ctx = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_CUDA);
|
||||
if (!hw_device_ctx)
|
||||
@ -115,7 +143,7 @@ static int cuda_create(struct gl_hwdec *hw)
|
||||
AVHWDeviceContext *device_ctx = (void *)hw_device_ctx->data;
|
||||
|
||||
AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
|
||||
device_hwctx->cuda_ctx = cuda_ctx;
|
||||
device_hwctx->cuda_ctx = p->decode_ctx;
|
||||
|
||||
ret = av_hwdevice_ctx_init(hw_device_ctx);
|
||||
if (ret < 0) {
|
||||
@ -129,7 +157,7 @@ static int cuda_create(struct gl_hwdec *hw)
|
||||
|
||||
p->hwctx = (struct mp_hwdec_ctx) {
|
||||
.type = HWDEC_CUDA,
|
||||
.ctx = cuda_ctx,
|
||||
.ctx = p->decode_ctx,
|
||||
.av_device_ref = hw_device_ctx,
|
||||
};
|
||||
p->hwctx.driver_name = hw->driver->name;
|
||||
@ -162,7 +190,7 @@ static int reinit(struct gl_hwdec *hw, struct mp_image_params *params)
|
||||
return -1;
|
||||
}
|
||||
|
||||
ret = CHECK_CU(cuCtxPushCurrent(p->cuda_ctx));
|
||||
ret = CHECK_CU(cuCtxPushCurrent(p->display_ctx));
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
@ -219,7 +247,7 @@ static void destroy(struct gl_hwdec *hw)
|
||||
CUcontext dummy;
|
||||
|
||||
// Don't bail if any CUDA calls fail. This is all best effort.
|
||||
CHECK_CU(cuCtxPushCurrent(p->cuda_ctx));
|
||||
CHECK_CU(cuCtxPushCurrent(p->display_ctx));
|
||||
for (int n = 0; n < 4; n++) {
|
||||
if (p->cu_res[n] > 0)
|
||||
CHECK_CU(cuGraphicsUnregisterResource(p->cu_res[n]));
|
||||
@ -227,7 +255,11 @@ static void destroy(struct gl_hwdec *hw)
|
||||
}
|
||||
CHECK_CU(cuCtxPopCurrent(&dummy));
|
||||
|
||||
CHECK_CU(cuCtxDestroy(p->cuda_ctx));
|
||||
if (p->decode_ctx != p->display_ctx) {
|
||||
CHECK_CU(cuCtxDestroy(p->decode_ctx));
|
||||
}
|
||||
|
||||
CHECK_CU(cuCtxDestroy(p->display_ctx));
|
||||
|
||||
gl->DeleteTextures(4, p->gl_textures);
|
||||
|
||||
@ -242,7 +274,7 @@ static int map_frame(struct gl_hwdec *hw, struct mp_image *hw_image,
|
||||
CUcontext dummy;
|
||||
int ret = 0, eret = 0;
|
||||
|
||||
ret = CHECK_CU(cuCtxPushCurrent(p->cuda_ctx));
|
||||
ret = CHECK_CU(cuCtxPushCurrent(p->display_ctx));
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user