From e6464a44eda9503ab87bf8d2d9a878dd953be267 Mon Sep 17 00:00:00 2001 From: Timo Rothenpieler Date: Mon, 10 Oct 2016 12:52:40 +0200 Subject: [PATCH] avutil/hwcontext_cuda: use dynamically loaded CUDA --- configure | 2 + libavutil/hwcontext_cuda.c | 99 ++++++++++++++++++++++------- libavutil/hwcontext_cuda.h | 5 ++ libavutil/hwcontext_cuda_internal.h | 37 +++++++++++ libavutil/version.h | 2 +- 5 files changed, 120 insertions(+), 25 deletions(-) create mode 100644 libavutil/hwcontext_cuda_internal.h diff --git a/configure b/configure index b5bfad689c..3ab0c1fdb2 100755 --- a/configure +++ b/configure @@ -2537,6 +2537,7 @@ audiotoolbox_extralibs="-framework CoreFoundation -framework AudioToolbox -frame # hardware accelerators crystalhd_deps="libcrystalhd_libcrystalhd_if_h" +cuda_deps_any="dlopen LoadLibrary" cuvid_deps="cuda" d3d11va_deps="d3d11_h dxva_h ID3D11VideoDecoder ID3D11VideoContext" dxva2_deps="dxva2api_h DXVA2_ConfigPictureDecode" @@ -5414,6 +5415,7 @@ elif check_func dlopen -ldl && check_func dlsym -ldl; then fi avisynth_demuxer_extralibs='$ldl' +cuda_extralibs='$ldl' decklink_outdev_extralibs="$decklink_outdev_extralibs $ldl" decklink_indev_extralibs="$decklink_indev_extralibs $ldl" frei0r_filter_extralibs='$ldl' diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c index e1dcab0f25..30de299c1b 100644 --- a/libavutil/hwcontext_cuda.c +++ b/libavutil/hwcontext_cuda.c @@ -20,7 +20,7 @@ #include "common.h" #include "hwcontext.h" #include "hwcontext_internal.h" -#include "hwcontext_cuda.h" +#include "hwcontext_cuda_internal.h" #include "mem.h" #include "pixdesc.h" #include "pixfmt.h" @@ -41,44 +41,46 @@ static void cuda_buffer_free(void *opaque, uint8_t *data) { AVHWFramesContext *ctx = opaque; AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx; + CudaFunctions *cu = hwctx->internal->cuda_dl; CUcontext dummy; - cuCtxPushCurrent(hwctx->cuda_ctx); + cu->cuCtxPushCurrent(hwctx->cuda_ctx); - cuMemFree((CUdeviceptr)data); + cu->cuMemFree((CUdeviceptr)data); - cuCtxPopCurrent(&dummy); + cu->cuCtxPopCurrent(&dummy); } static AVBufferRef *cuda_pool_alloc(void *opaque, int size) { AVHWFramesContext *ctx = opaque; AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx; + CudaFunctions *cu = hwctx->internal->cuda_dl; AVBufferRef *ret = NULL; CUcontext dummy = NULL; CUdeviceptr data; CUresult err; - err = cuCtxPushCurrent(hwctx->cuda_ctx); + err = cu->cuCtxPushCurrent(hwctx->cuda_ctx); if (err != CUDA_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n"); return NULL; } - err = cuMemAlloc(&data, size); + err = cu->cuMemAlloc(&data, size); if (err != CUDA_SUCCESS) goto fail; ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0); if (!ret) { - cuMemFree(data); + cu->cuMemFree(data); goto fail; } fail: - cuCtxPopCurrent(&dummy); + cu->cuCtxPopCurrent(&dummy); return ret; } @@ -187,12 +189,13 @@ static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst, { CUDAFramesContext *priv = ctx->internal->priv; AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx; + CudaFunctions *cu = device_hwctx->internal->cuda_dl; CUcontext dummy; CUresult err; int i; - err = cuCtxPushCurrent(device_hwctx->cuda_ctx); + err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx); if (err != CUDA_SUCCESS) return AVERROR_UNKNOWN; @@ -208,14 +211,14 @@ static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst, .Height = src->height >> (i ? priv->shift_height : 0), }; - err = cuMemcpy2D(&cpy); + err = cu->cuMemcpy2D(&cpy); if (err != CUDA_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n"); return AVERROR_UNKNOWN; } } - cuCtxPopCurrent(&dummy); + cu->cuCtxPopCurrent(&dummy); return 0; } @@ -225,12 +228,13 @@ static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst, { CUDAFramesContext *priv = ctx->internal->priv; AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx; + CudaFunctions *cu = device_hwctx->internal->cuda_dl; CUcontext dummy; CUresult err; int i; - err = cuCtxPushCurrent(device_hwctx->cuda_ctx); + err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx); if (err != CUDA_SUCCESS) return AVERROR_UNKNOWN; @@ -246,28 +250,64 @@ static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst, .Height = src->height >> (i ? priv->shift_height : 0), }; - err = cuMemcpy2D(&cpy); + err = cu->cuMemcpy2D(&cpy); if (err != CUDA_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n"); return AVERROR_UNKNOWN; } } - cuCtxPopCurrent(&dummy); + cu->cuCtxPopCurrent(&dummy); return 0; } -static void cuda_device_free(AVHWDeviceContext *ctx) +static void cuda_device_uninit(AVHWDeviceContext *ctx) { AVCUDADeviceContext *hwctx = ctx->hwctx; - cuCtxDestroy(hwctx->cuda_ctx); + + if (hwctx->internal) { + if (hwctx->internal->is_allocated && hwctx->cuda_ctx) { + hwctx->internal->cuda_dl->cuCtxDestroy(hwctx->cuda_ctx); + hwctx->cuda_ctx = NULL; + } + cuda_free_functions(&hwctx->internal->cuda_dl); + } + + av_freep(&hwctx->internal); +} + +static int cuda_device_init(AVHWDeviceContext *ctx) +{ + AVCUDADeviceContext *hwctx = ctx->hwctx; + int ret; + + if (!hwctx->internal) { + hwctx->internal = av_mallocz(sizeof(*hwctx->internal)); + if (!hwctx->internal) + return AVERROR(ENOMEM); + } + + if (!hwctx->internal->cuda_dl) { + ret = cuda_load_functions(&hwctx->internal->cuda_dl); + if (ret < 0) { + av_log(ctx, AV_LOG_ERROR, "Could not dynamically load CUDA\n"); + goto error; + } + } + + return 0; + +error: + cuda_device_uninit(ctx); + return ret; } static int cuda_device_create(AVHWDeviceContext *ctx, const char *device, AVDictionary *opts, int flags) { AVCUDADeviceContext *hwctx = ctx->hwctx; + CudaFunctions *cu; CUdevice cu_device; CUcontext dummy; CUresult err; @@ -276,29 +316,38 @@ static int cuda_device_create(AVHWDeviceContext *ctx, const char *device, if (device) device_idx = strtol(device, NULL, 0); - err = cuInit(0); + if (cuda_device_init(ctx) < 0) + goto error; + + cu = hwctx->internal->cuda_dl; + + err = cu->cuInit(0); if (err != CUDA_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Could not initialize the CUDA driver API\n"); - return AVERROR_UNKNOWN; + goto error; } - err = cuDeviceGet(&cu_device, device_idx); + err = cu->cuDeviceGet(&cu_device, device_idx); if (err != CUDA_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Could not get the device number %d\n", device_idx); - return AVERROR_UNKNOWN; + goto error; } - err = cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device); + err = cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device); if (err != CUDA_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Error creating a CUDA context\n"); - return AVERROR_UNKNOWN; + goto error; } - cuCtxPopCurrent(&dummy); + cu->cuCtxPopCurrent(&dummy); - ctx->free = cuda_device_free; + hwctx->internal->is_allocated = 1; return 0; + +error: + cuda_device_uninit(ctx); + return AVERROR_UNKNOWN; } const HWContextType ff_hwcontext_type_cuda = { @@ -309,6 +358,8 @@ const HWContextType ff_hwcontext_type_cuda = { .frames_priv_size = sizeof(CUDAFramesContext), .device_create = cuda_device_create, + .device_init = cuda_device_init, + .device_uninit = cuda_device_uninit, .frames_init = cuda_frames_init, .frames_get_buffer = cuda_get_buffer, .transfer_get_formats = cuda_transfer_get_formats, diff --git a/libavutil/hwcontext_cuda.h b/libavutil/hwcontext_cuda.h index 23a77cee73..12dae8449e 100644 --- a/libavutil/hwcontext_cuda.h +++ b/libavutil/hwcontext_cuda.h @@ -20,7 +20,9 @@ #ifndef AVUTIL_HWCONTEXT_CUDA_H #define AVUTIL_HWCONTEXT_CUDA_H +#ifndef CUDA_VERSION #include +#endif #include "pixfmt.h" @@ -32,11 +34,14 @@ * AVBufferRefs whose data pointer is a CUdeviceptr. */ +typedef struct AVCUDADeviceContextInternal AVCUDADeviceContextInternal; + /** * This struct is allocated as AVHWDeviceContext.hwctx */ typedef struct AVCUDADeviceContext { CUcontext cuda_ctx; + AVCUDADeviceContextInternal *internal; } AVCUDADeviceContext; /** diff --git a/libavutil/hwcontext_cuda_internal.h b/libavutil/hwcontext_cuda_internal.h new file mode 100644 index 0000000000..e1bc6ff350 --- /dev/null +++ b/libavutil/hwcontext_cuda_internal.h @@ -0,0 +1,37 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +#ifndef AVUTIL_HWCONTEXT_CUDA_INTERNAL_H +#define AVUTIL_HWCONTEXT_CUDA_INTERNAL_H + +#include "compat/cuda/dynlink_loader.h" +#include "hwcontext_cuda.h" + +/** + * @file + * FFmpeg internal API for CUDA. + */ + +struct AVCUDADeviceContextInternal { + CudaFunctions *cuda_dl; + int is_allocated; +}; + +#endif /* AVUTIL_HWCONTEXT_CUDA_INTERNAL_H */ + diff --git a/libavutil/version.h b/libavutil/version.h index 471d968e53..45b3c8b81d 100644 --- a/libavutil/version.h +++ b/libavutil/version.h @@ -80,7 +80,7 @@ #define LIBAVUTIL_VERSION_MAJOR 55 #define LIBAVUTIL_VERSION_MINOR 40 -#define LIBAVUTIL_VERSION_MICRO 100 +#define LIBAVUTIL_VERSION_MICRO 101 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ LIBAVUTIL_VERSION_MINOR, \