vo_gpu: hwdec: Use ffnvcodec to load CUDA symbols

The CUDA dynamic loader was broken out of ffmpeg into its own repo
and package. This gives us an opportunity to re-use it in mpv and
remove our custom loader logic.
This commit is contained in:
Philip Langdale 2018-04-15 09:06:34 -07:00 committed by Jan Ekström
parent 46d2f1f08d
commit 07915b1227
5 changed files with 50 additions and 246 deletions

View File

@ -1,63 +0,0 @@
/*
* This file is part of mpv.
*
* mpv is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* mpv is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with mpv. If not, see <http://www.gnu.org/licenses/>.
*/
#include "cuda_dynamic.h"
#include <pthread.h>
#if defined(_WIN32)
# include <windows.h>
# define dlopen(filename, flags) LoadLibrary(TEXT(filename))
# define dlsym(handle, symbol) (void *)GetProcAddress(handle, symbol)
# define dlclose(handle) FreeLibrary(handle)
#else
# include <dlfcn.h>
#endif
#if defined(_WIN32) || defined(__CYGWIN__)
# define CUDA_LIBNAME "nvcuda.dll"
#else
# define CUDA_LIBNAME "libcuda.so.1"
#endif
#define CUDA_DECL(NAME, TYPE) \
TYPE *mpv_ ## NAME;
CUDA_FNS(CUDA_DECL)
static bool cuda_loaded = false;
static pthread_once_t cuda_load_once = PTHREAD_ONCE_INIT;
static void cuda_do_load(void)
{
void *lib = dlopen(CUDA_LIBNAME, RTLD_LAZY);
if (!lib) {
return;
}
#define CUDA_LOAD_SYMBOL(NAME, TYPE) \
mpv_ ## NAME = dlsym(lib, #NAME); if (!mpv_ ## NAME) return;
CUDA_FNS(CUDA_LOAD_SYMBOL)
cuda_loaded = true;
}
bool cuda_load(void)
{
pthread_once(&cuda_load_once, cuda_do_load);
return cuda_loaded;
}

View File

@ -1,148 +0,0 @@
/*
* This file is part of mpv.
*
* It is based on an equivalent file in ffmpeg that was
* constructed from documentation, rather than from any
* original cuda headers.
*
* mpv is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* mpv is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with mpv. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef MPV_CUDA_DYNAMIC_H
#define MPV_CUDA_DYNAMIC_H
#include <stdbool.h>
#include <stddef.h>
#include "gl_headers.h"
#define CUDA_VERSION 7050
#if defined(_WIN32) || defined(__CYGWIN__)
#define CUDAAPI __stdcall
#else
#define CUDAAPI
#endif
#define CU_CTX_SCHED_BLOCKING_SYNC 4
typedef int CUdevice;
typedef struct CUarray_st *CUarray;
typedef struct CUgraphicsResource_st *CUgraphicsResource;
typedef struct CUstream_st *CUstream;
typedef void* CUcontext;
#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
typedef unsigned long long CUdeviceptr;
#else
typedef unsigned int CUdeviceptr;
#endif
typedef enum cudaError_enum {
CUDA_SUCCESS = 0
} CUresult;
typedef enum CUmemorytype_enum {
CU_MEMORYTYPE_HOST = 1,
CU_MEMORYTYPE_DEVICE = 2,
CU_MEMORYTYPE_ARRAY = 3
} CUmemorytype;
typedef struct CUDA_MEMCPY2D_st {
size_t srcXInBytes;
size_t srcY;
CUmemorytype srcMemoryType;
const void *srcHost;
CUdeviceptr srcDevice;
CUarray srcArray;
size_t srcPitch;
size_t dstXInBytes;
size_t dstY;
CUmemorytype dstMemoryType;
void *dstHost;
CUdeviceptr dstDevice;
CUarray dstArray;
size_t dstPitch;
size_t WidthInBytes;
size_t Height;
} CUDA_MEMCPY2D;
typedef enum CUGLDeviceList_enum {
CU_GL_DEVICE_LIST_ALL = 1,
CU_GL_DEVICE_LIST_CURRENT_FRAME = 2,
CU_GL_DEVICE_LIST_NEXT_FRAME = 3,
} CUGLDeviceList;
#define CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD 2
typedef CUresult CUDAAPI tcuInit(unsigned int Flags);
typedef CUresult CUDAAPI tcuCtxCreate_v2(CUcontext *pctx, unsigned int flags, CUdevice dev);
typedef CUresult CUDAAPI tcuCtxPushCurrent_v2(CUcontext *pctx);
typedef CUresult CUDAAPI tcuCtxPopCurrent_v2(CUcontext *pctx);
typedef CUresult CUDAAPI tcuCtxDestroy_v2(CUcontext ctx);
typedef CUresult CUDAAPI tcuDeviceGet(CUdevice *pdevice, int ordinal);
typedef CUresult CUDAAPI tcuMemcpy2D_v2(const CUDA_MEMCPY2D *pcopy);
typedef CUresult CUDAAPI tcuGetErrorName(CUresult error, const char** pstr);
typedef CUresult CUDAAPI tcuGetErrorString(CUresult error, const char** pstr);
typedef CUresult CUDAAPI tcuGLGetDevices_v2(unsigned int* pCudaDeviceCount, CUdevice* pCudaDevices, unsigned int cudaDeviceCount, CUGLDeviceList deviceList);
typedef CUresult CUDAAPI tcuGraphicsGLRegisterImage(CUgraphicsResource* pCudaResource, GLuint image, GLenum target, unsigned int Flags);
typedef CUresult CUDAAPI tcuGraphicsUnregisterResource(CUgraphicsResource resource);
typedef CUresult CUDAAPI tcuGraphicsMapResources(unsigned int count, CUgraphicsResource* resources, CUstream hStream);
typedef CUresult CUDAAPI tcuGraphicsUnmapResources(unsigned int count, CUgraphicsResource* resources, CUstream hStream);
typedef CUresult CUDAAPI tcuGraphicsSubResourceGetMappedArray(CUarray* pArray, CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel);
#define CUDA_FNS(FN) \
FN(cuInit, tcuInit) \
FN(cuCtxCreate_v2, tcuCtxCreate_v2) \
FN(cuCtxPushCurrent_v2, tcuCtxPushCurrent_v2) \
FN(cuCtxPopCurrent_v2, tcuCtxPopCurrent_v2) \
FN(cuCtxDestroy_v2, tcuCtxDestroy_v2) \
FN(cuDeviceGet, tcuDeviceGet) \
FN(cuMemcpy2D_v2, tcuMemcpy2D_v2) \
FN(cuGetErrorName, tcuGetErrorName) \
FN(cuGetErrorString, tcuGetErrorString) \
FN(cuGLGetDevices_v2, tcuGLGetDevices_v2) \
FN(cuGraphicsGLRegisterImage, tcuGraphicsGLRegisterImage) \
FN(cuGraphicsUnregisterResource, tcuGraphicsUnregisterResource) \
FN(cuGraphicsMapResources, tcuGraphicsMapResources) \
FN(cuGraphicsUnmapResources, tcuGraphicsUnmapResources) \
FN(cuGraphicsSubResourceGetMappedArray, tcuGraphicsSubResourceGetMappedArray) \
#define CUDA_EXT_DECL(NAME, TYPE) \
extern TYPE *mpv_ ## NAME;
CUDA_FNS(CUDA_EXT_DECL)
#define cuInit mpv_cuInit
#define cuCtxCreate mpv_cuCtxCreate_v2
#define cuCtxPushCurrent mpv_cuCtxPushCurrent_v2
#define cuCtxPopCurrent mpv_cuCtxPopCurrent_v2
#define cuCtxDestroy mpv_cuCtxDestroy_v2
#define cuDeviceGet mpv_cuDeviceGet
#define cuMemcpy2D mpv_cuMemcpy2D_v2
#define cuGetErrorName mpv_cuGetErrorName
#define cuGetErrorString mpv_cuGetErrorString
#define cuGLGetDevices mpv_cuGLGetDevices_v2
#define cuGraphicsGLRegisterImage mpv_cuGraphicsGLRegisterImage
#define cuGraphicsUnregisterResource mpv_cuGraphicsUnregisterResource
#define cuGraphicsMapResources mpv_cuGraphicsMapResources
#define cuGraphicsUnmapResources mpv_cuGraphicsUnmapResources
#define cuGraphicsSubResourceGetMappedArray mpv_cuGraphicsSubResourceGetMappedArray
bool cuda_load(void);
#endif // MPV_CUDA_DYNAMIC_H

View File

@ -27,8 +27,7 @@
* when decoding 10bit streams (there is some hardware dithering going on).
*/
#include "cuda_dynamic.h"
#include <ffnvcodec/dynlink_loader.h>
#include <libavutil/hwcontext.h>
#include <libavutil/hwcontext_cuda.h>
@ -39,6 +38,7 @@
struct priv_owner {
struct mp_hwdec_ctx hwctx;
CudaFunctions *cu;
CUcontext display_ctx;
CUcontext decode_ctx;
};
@ -56,13 +56,15 @@ static int check_cu(struct ra_hwdec *hw, CUresult err, const char *func)
const char *err_name;
const char *err_string;
struct priv_owner *p = hw->priv;
MP_TRACE(hw, "Calling %s\n", func);
if (err == CUDA_SUCCESS)
return 0;
cuGetErrorName(err, &err_name);
cuGetErrorString(err, &err_string);
p->cu->cuGetErrorName(err, &err_name);
p->cu->cuGetErrorString(err, &err_string);
MP_ERR(hw, "%s failed", func);
if (err_name && err_string)
@ -82,6 +84,7 @@ static int cuda_init(struct ra_hwdec *hw)
unsigned int device_count;
int ret = 0;
struct priv_owner *p = hw->priv;
CudaFunctions *cu;
if (!ra_is_gl(hw->ra))
return -1;
@ -92,24 +95,25 @@ static int cuda_init(struct ra_hwdec *hw)
return -1;
}
bool loaded = cuda_load();
if (!loaded) {
ret = cuda_load_functions(&p->cu, NULL);
if (ret != 0) {
MP_VERBOSE(hw, "Failed to load CUDA symbols\n");
return -1;
}
cu = p->cu;
ret = CHECK_CU(cuInit(0));
ret = CHECK_CU(cu->cuInit(0));
if (ret < 0)
goto error;
// Allocate display context
ret = CHECK_CU(cuGLGetDevices(&device_count, &display_dev, 1,
CU_GL_DEVICE_LIST_ALL));
ret = CHECK_CU(cu->cuGLGetDevices(&device_count, &display_dev, 1,
CU_GL_DEVICE_LIST_ALL));
if (ret < 0)
goto error;
ret = CHECK_CU(cuCtxCreate(&p->display_ctx, CU_CTX_SCHED_BLOCKING_SYNC,
display_dev));
ret = CHECK_CU(cu->cuCtxCreate(&p->display_ctx, CU_CTX_SCHED_BLOCKING_SYNC,
display_dev));
if (ret < 0)
goto error;
@ -121,7 +125,7 @@ static int cuda_init(struct ra_hwdec *hw)
if (decode_dev_idx > -1) {
CUdevice decode_dev;
ret = CHECK_CU(cuDeviceGet(&decode_dev, decode_dev_idx));
ret = CHECK_CU(cu->cuDeviceGet(&decode_dev, decode_dev_idx));
if (ret < 0)
goto error;
@ -129,12 +133,12 @@ static int cuda_init(struct ra_hwdec *hw)
MP_INFO(hw, "Using separate decoder and display devices\n");
// Pop the display context. We won't use it again during init()
ret = CHECK_CU(cuCtxPopCurrent(&dummy));
ret = CHECK_CU(cu->cuCtxPopCurrent(&dummy));
if (ret < 0)
goto error;
ret = CHECK_CU(cuCtxCreate(&p->decode_ctx, CU_CTX_SCHED_BLOCKING_SYNC,
decode_dev));
ret = CHECK_CU(cu->cuCtxCreate(&p->decode_ctx, CU_CTX_SCHED_BLOCKING_SYNC,
decode_dev));
if (ret < 0)
goto error;
}
@ -155,7 +159,7 @@ static int cuda_init(struct ra_hwdec *hw)
goto error;
}
ret = CHECK_CU(cuCtxPopCurrent(&dummy));
ret = CHECK_CU(cu->cuCtxPopCurrent(&dummy));
if (ret < 0)
goto error;
@ -168,7 +172,7 @@ static int cuda_init(struct ra_hwdec *hw)
error:
av_buffer_unref(&hw_device_ctx);
CHECK_CU(cuCtxPopCurrent(&dummy));
CHECK_CU(cu->cuCtxPopCurrent(&dummy));
return -1;
}
@ -176,15 +180,18 @@ static int cuda_init(struct ra_hwdec *hw)
static void cuda_uninit(struct ra_hwdec *hw)
{
struct priv_owner *p = hw->priv;
CudaFunctions *cu = p->cu;
hwdec_devices_remove(hw->devs, &p->hwctx);
av_buffer_unref(&p->hwctx.av_device_ref);
if (p->decode_ctx && p->decode_ctx != p->display_ctx)
CHECK_CU(cuCtxDestroy(p->decode_ctx));
CHECK_CU(cu->cuCtxDestroy(p->decode_ctx));
if (p->display_ctx)
CHECK_CU(cuCtxDestroy(p->display_ctx));
CHECK_CU(cu->cuCtxDestroy(p->display_ctx));
cuda_free_functions(&p->cu);
}
#undef CHECK_CU
@ -195,6 +202,7 @@ static int mapper_init(struct ra_hwdec_mapper *mapper)
struct priv_owner *p_owner = mapper->owner->priv;
struct priv *p = mapper->priv;
CUcontext dummy;
CudaFunctions *cu = p_owner->cu;
int ret = 0, eret = 0;
p->display_ctx = p_owner->display_ctx;
@ -212,7 +220,7 @@ static int mapper_init(struct ra_hwdec_mapper *mapper)
return -1;
}
ret = CHECK_CU(cuCtxPushCurrent(p->display_ctx));
ret = CHECK_CU(cu->cuCtxPushCurrent(p->display_ctx));
if (ret < 0)
return ret;
@ -239,27 +247,27 @@ static int mapper_init(struct ra_hwdec_mapper *mapper)
GLenum target;
ra_gl_get_raw_tex(mapper->ra, mapper->tex[n], &texture, &target);
ret = CHECK_CU(cuGraphicsGLRegisterImage(&p->cu_res[n], texture, target,
CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD));
ret = CHECK_CU(cu->cuGraphicsGLRegisterImage(&p->cu_res[n], texture, target,
CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD));
if (ret < 0)
goto error;
ret = CHECK_CU(cuGraphicsMapResources(1, &p->cu_res[n], 0));
ret = CHECK_CU(cu->cuGraphicsMapResources(1, &p->cu_res[n], 0));
if (ret < 0)
goto error;
ret = CHECK_CU(cuGraphicsSubResourceGetMappedArray(&p->cu_array[n], p->cu_res[n],
0, 0));
ret = CHECK_CU(cu->cuGraphicsSubResourceGetMappedArray(&p->cu_array[n], p->cu_res[n],
0, 0));
if (ret < 0)
goto error;
ret = CHECK_CU(cuGraphicsUnmapResources(1, &p->cu_res[n], 0));
ret = CHECK_CU(cu->cuGraphicsUnmapResources(1, &p->cu_res[n], 0));
if (ret < 0)
goto error;
}
error:
eret = CHECK_CU(cuCtxPopCurrent(&dummy));
eret = CHECK_CU(cu->cuCtxPopCurrent(&dummy));
if (eret < 0)
return eret;
@ -269,17 +277,19 @@ static int mapper_init(struct ra_hwdec_mapper *mapper)
static void mapper_uninit(struct ra_hwdec_mapper *mapper)
{
struct priv *p = mapper->priv;
struct priv_owner *p_owner = mapper->owner->priv;
CudaFunctions *cu = p_owner->cu;
CUcontext dummy;
// Don't bail if any CUDA calls fail. This is all best effort.
CHECK_CU(cuCtxPushCurrent(p->display_ctx));
CHECK_CU(cu->cuCtxPushCurrent(p->display_ctx));
for (int n = 0; n < 4; n++) {
if (p->cu_res[n] > 0)
CHECK_CU(cuGraphicsUnregisterResource(p->cu_res[n]));
CHECK_CU(cu->cuGraphicsUnregisterResource(p->cu_res[n]));
p->cu_res[n] = 0;
ra_tex_free(mapper->ra, &mapper->tex[n]);
}
CHECK_CU(cuCtxPopCurrent(&dummy));
CHECK_CU(cu->cuCtxPopCurrent(&dummy));
}
static void mapper_unmap(struct ra_hwdec_mapper *mapper)
@ -289,10 +299,12 @@ static void mapper_unmap(struct ra_hwdec_mapper *mapper)
static int mapper_map(struct ra_hwdec_mapper *mapper)
{
struct priv *p = mapper->priv;
struct priv_owner *p_owner = mapper->owner->priv;
CudaFunctions *cu = p_owner->cu;
CUcontext dummy;
int ret = 0, eret = 0;
ret = CHECK_CU(cuCtxPushCurrent(p->display_ctx));
ret = CHECK_CU(cu->cuCtxPushCurrent(p->display_ctx));
if (ret < 0)
return ret;
@ -308,14 +320,14 @@ static int mapper_map(struct ra_hwdec_mapper *mapper)
mapper->tex[n]->params.format->pixel_size,
.Height = mp_image_plane_h(&p->layout, n),
};
ret = CHECK_CU(cuMemcpy2D(&cpy));
ret = CHECK_CU(cu->cuMemcpy2D(&cpy));
if (ret < 0)
goto error;
}
error:
eret = CHECK_CU(cuCtxPopCurrent(&dummy));
eret = CHECK_CU(cu->cuCtxPopCurrent(&dummy));
if (eret < 0)
return eret;

View File

@ -838,10 +838,14 @@ hwaccel_features = [
'deps': 'gl-dxinterop && d3d9-hwaccel',
'groups': [ 'gl' ],
'func': check_true,
}, {
'name': 'ffnvcodec',
'desc': 'CUDA Headers and dynamic loader',
'func': check_pkg_config('ffnvcodec >= 8.1.24.1'),
}, {
'name': '--cuda-hwaccel',
'desc': 'CUDA hwaccel',
'deps': 'gl',
'deps': 'gl && ffnvcodec',
'func': check_cc(fragment=load_fragment('cuda.c'),
use='libavcodec'),
}

View File

@ -461,7 +461,6 @@ def build(ctx):
( "video/out/opengl/context_wayland.c", "gl-wayland" ),
( "video/out/opengl/context_win.c", "gl-win32" ),
( "video/out/opengl/context_x11egl.c", "egl-x11" ),
( "video/out/opengl/cuda_dynamic.c", "cuda-hwaccel" ),
( "video/out/opengl/egl_helpers.c", "egl-helpers" ),
( "video/out/opengl/formats.c", "gl" ),
( "video/out/opengl/hwdec_cuda.c", "cuda-hwaccel" ),