avutil/hwcontext_cuda: combine transfer functions

Gets rid of some mostly duplicated code and adds the ability to do
hardware to hardware transfers.
This commit is contained in:
Timo Rothenpieler 2020-03-19 17:35:59 +01:00
parent c693e7b053
commit d9a37af070
1 changed files with 25 additions and 57 deletions

View File

@ -201,53 +201,7 @@ static int cuda_transfer_get_formats(AVHWFramesContext *ctx,
return 0;
}
static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
const AVFrame *src)
{
CUDAFramesContext *priv = ctx->internal->priv;
AVHWDeviceContext *device_ctx = ctx->device_ctx;
AVCUDADeviceContext *hwctx = device_ctx->hwctx;
CudaFunctions *cu = hwctx->internal->cuda_dl;
CUcontext dummy;
int i, ret;
/* We don't support transfers to HW devices. */
if (dst->hw_frames_ctx)
return AVERROR(ENOSYS);
ret = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
if (ret < 0)
return ret;
for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
CUDA_MEMCPY2D cpy = {
.srcMemoryType = CU_MEMORYTYPE_DEVICE,
.dstMemoryType = CU_MEMORYTYPE_HOST,
.srcDevice = (CUdeviceptr)src->data[i],
.dstHost = dst->data[i],
.srcPitch = src->linesize[i],
.dstPitch = dst->linesize[i],
.WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
.Height = src->height >> (i ? priv->shift_height : 0),
};
ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, hwctx->stream));
if (ret < 0)
goto exit;
}
ret = CHECK_CU(cu->cuStreamSynchronize(hwctx->stream));
if (ret < 0)
goto exit;
exit:
CHECK_CU(cu->cuCtxPopCurrent(&dummy));
return 0;
}
static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
static int cuda_transfer_data(AVHWFramesContext *ctx, AVFrame *dst,
const AVFrame *src)
{
CUDAFramesContext *priv = ctx->internal->priv;
@ -258,31 +212,45 @@ static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
CUcontext dummy;
int i, ret;
/* We don't support transfers from HW devices. */
if (src->hw_frames_ctx)
return AVERROR(ENOSYS);
ret = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
if (ret < 0)
return ret;
for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
CUDA_MEMCPY2D cpy = {
.srcMemoryType = CU_MEMORYTYPE_HOST,
.dstMemoryType = CU_MEMORYTYPE_DEVICE,
.srcHost = src->data[i],
.dstDevice = (CUdeviceptr)dst->data[i],
.srcPitch = src->linesize[i],
.dstPitch = dst->linesize[i],
.WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
.Height = src->height >> ((i == 0 || i == 3) ? 0 : priv->shift_height),
};
if (src->hw_frames_ctx) {
cpy.srcMemoryType = CU_MEMORYTYPE_DEVICE;
cpy.srcDevice = (CUdeviceptr)src->data[i];
} else {
cpy.srcMemoryType = CU_MEMORYTYPE_HOST;
cpy.srcHost = src->data[i];
}
if (dst->hw_frames_ctx) {
cpy.dstMemoryType = CU_MEMORYTYPE_DEVICE;
cpy.dstDevice = (CUdeviceptr)dst->data[i];
} else {
cpy.dstMemoryType = CU_MEMORYTYPE_HOST;
cpy.dstHost = dst->data[i];
}
ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, hwctx->stream));
if (ret < 0)
goto exit;
}
if (!dst->hw_frames_ctx) {
ret = CHECK_CU(cu->cuStreamSynchronize(hwctx->stream));
if (ret < 0)
goto exit;
}
exit:
CHECK_CU(cu->cuCtxPopCurrent(&dummy));
@ -522,8 +490,8 @@ const HWContextType ff_hwcontext_type_cuda = {
.frames_init = cuda_frames_init,
.frames_get_buffer = cuda_get_buffer,
.transfer_get_formats = cuda_transfer_get_formats,
.transfer_data_to = cuda_transfer_data_to,
.transfer_data_from = cuda_transfer_data_from,
.transfer_data_to = cuda_transfer_data,
.transfer_data_from = cuda_transfer_data,
.pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE },
};