mirror of https://git.ffmpeg.org/ffmpeg.git
avcodec/nvdec: avoid needless copy of output frame
Replaces the data pointers with the mapped cuvid ones. Adds buffer_refs to the frame to ensure the needed contexts stay alive and the cuvid idx stays allocated. Adds another buffer_ref to unmap the frame when it's unreferenced itself.
This commit is contained in:
parent
2e700b082c
commit
baabd3c2ad
|
@ -308,7 +308,7 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
|
||||||
params.CodecType = cuvid_codec_type;
|
params.CodecType = cuvid_codec_type;
|
||||||
params.ChromaFormat = cuvid_chroma_format;
|
params.ChromaFormat = cuvid_chroma_format;
|
||||||
params.ulNumDecodeSurfaces = frames_ctx->initial_pool_size;
|
params.ulNumDecodeSurfaces = frames_ctx->initial_pool_size;
|
||||||
params.ulNumOutputSurfaces = 1;
|
params.ulNumOutputSurfaces = frames_ctx->initial_pool_size;
|
||||||
|
|
||||||
ret = nvdec_decoder_create(&ctx->decoder_ref, frames_ctx->device_ref, ¶ms, avctx);
|
ret = nvdec_decoder_create(&ctx->decoder_ref, frames_ctx->device_ref, ¶ms, avctx);
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
|
@ -354,6 +354,32 @@ static void nvdec_fdd_priv_free(void *priv)
|
||||||
av_freep(&priv);
|
av_freep(&priv);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void nvdec_unmap_mapped_frame(void *opaque, uint8_t *data)
|
||||||
|
{
|
||||||
|
NVDECFrame *unmap_data = (NVDECFrame*)data;
|
||||||
|
NVDECDecoder *decoder = (NVDECDecoder*)unmap_data->decoder_ref->data;
|
||||||
|
CUdeviceptr devptr = (CUdeviceptr)opaque;
|
||||||
|
CUresult err;
|
||||||
|
CUcontext dummy;
|
||||||
|
|
||||||
|
err = decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx);
|
||||||
|
if (err != CUDA_SUCCESS) {
|
||||||
|
av_log(NULL, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
|
||||||
|
goto finish;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr);
|
||||||
|
if (err != CUDA_SUCCESS)
|
||||||
|
av_log(NULL, AV_LOG_ERROR, "cuvidUnmapVideoFrame failed\n");
|
||||||
|
|
||||||
|
decoder->cudl->cuCtxPopCurrent(&dummy);
|
||||||
|
|
||||||
|
finish:
|
||||||
|
av_buffer_unref(&unmap_data->idx_ref);
|
||||||
|
av_buffer_unref(&unmap_data->decoder_ref);
|
||||||
|
av_free(unmap_data);
|
||||||
|
}
|
||||||
|
|
||||||
static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
|
static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
|
||||||
{
|
{
|
||||||
FrameDecodeData *fdd = (FrameDecodeData*)frame->private_ref->data;
|
FrameDecodeData *fdd = (FrameDecodeData*)frame->private_ref->data;
|
||||||
|
@ -361,6 +387,7 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
|
||||||
NVDECDecoder *decoder = (NVDECDecoder*)cf->decoder_ref->data;
|
NVDECDecoder *decoder = (NVDECDecoder*)cf->decoder_ref->data;
|
||||||
|
|
||||||
CUVIDPROCPARAMS vpp = { .progressive_frame = 1 };
|
CUVIDPROCPARAMS vpp = { .progressive_frame = 1 };
|
||||||
|
NVDECFrame *unmap_data = NULL;
|
||||||
|
|
||||||
CUresult err;
|
CUresult err;
|
||||||
CUcontext dummy;
|
CUcontext dummy;
|
||||||
|
@ -383,32 +410,39 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
|
||||||
goto finish;
|
goto finish;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; frame->data[i]; i++) {
|
unmap_data = av_mallocz(sizeof(*unmap_data));
|
||||||
CUDA_MEMCPY2D cpy = {
|
if (!unmap_data) {
|
||||||
.srcMemoryType = CU_MEMORYTYPE_DEVICE,
|
ret = AVERROR(ENOMEM);
|
||||||
.dstMemoryType = CU_MEMORYTYPE_DEVICE,
|
goto copy_fail;
|
||||||
.srcDevice = devptr,
|
|
||||||
.dstDevice = (CUdeviceptr)frame->data[i],
|
|
||||||
.srcPitch = pitch,
|
|
||||||
.dstPitch = frame->linesize[i],
|
|
||||||
.srcY = offset,
|
|
||||||
.WidthInBytes = FFMIN(pitch, frame->linesize[i]),
|
|
||||||
.Height = frame->height >> (i ? 1 : 0),
|
|
||||||
};
|
|
||||||
|
|
||||||
err = decoder->cudl->cuMemcpy2D(&cpy);
|
|
||||||
if (err != CUDA_SUCCESS) {
|
|
||||||
av_log(logctx, AV_LOG_ERROR, "Error copying decoded frame: %d\n",
|
|
||||||
err);
|
|
||||||
ret = AVERROR_UNKNOWN;
|
|
||||||
goto copy_fail;
|
|
||||||
}
|
|
||||||
|
|
||||||
offset += cpy.Height;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
frame->buf[1] = av_buffer_create((uint8_t *)unmap_data, sizeof(*unmap_data),
|
||||||
|
nvdec_unmap_mapped_frame, (void*)devptr,
|
||||||
|
AV_BUFFER_FLAG_READONLY);
|
||||||
|
if (!frame->buf[1]) {
|
||||||
|
ret = AVERROR(ENOMEM);
|
||||||
|
goto copy_fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
unmap_data->idx = cf->idx;
|
||||||
|
unmap_data->idx_ref = av_buffer_ref(cf->idx_ref);
|
||||||
|
unmap_data->decoder_ref = av_buffer_ref(cf->decoder_ref);
|
||||||
|
|
||||||
|
for (i = 0; frame->linesize[i]; i++) {
|
||||||
|
frame->data[i] = (uint8_t*)(devptr + offset);
|
||||||
|
frame->linesize[i] = pitch;
|
||||||
|
offset += pitch * (frame->height >> (i ? 1 : 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
goto finish;
|
||||||
|
|
||||||
copy_fail:
|
copy_fail:
|
||||||
decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr);
|
if (!frame->buf[1]) {
|
||||||
|
decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr);
|
||||||
|
av_freep(&unmap_data);
|
||||||
|
} else {
|
||||||
|
av_buffer_unref(&frame->buf[1]);
|
||||||
|
}
|
||||||
|
|
||||||
finish:
|
finish:
|
||||||
decoder->cudl->cuCtxPopCurrent(&dummy);
|
decoder->cudl->cuCtxPopCurrent(&dummy);
|
||||||
|
@ -521,6 +555,16 @@ int ff_nvdec_simple_decode_slice(AVCodecContext *avctx, const uint8_t *buffer,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void nvdec_free_dummy(struct AVHWFramesContext *ctx)
|
||||||
|
{
|
||||||
|
av_buffer_pool_uninit(&ctx->pool);
|
||||||
|
}
|
||||||
|
|
||||||
|
static AVBufferRef *nvdec_alloc_dummy(int size)
|
||||||
|
{
|
||||||
|
return av_buffer_create(NULL, 0, NULL, NULL, 0);
|
||||||
|
}
|
||||||
|
|
||||||
int ff_nvdec_frame_params(AVCodecContext *avctx,
|
int ff_nvdec_frame_params(AVCodecContext *avctx,
|
||||||
AVBufferRef *hw_frames_ctx,
|
AVBufferRef *hw_frames_ctx,
|
||||||
int dpb_size)
|
int dpb_size)
|
||||||
|
@ -550,6 +594,12 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
|
||||||
frames_ctx->height = (avctx->coded_height + 1) & ~1;
|
frames_ctx->height = (avctx->coded_height + 1) & ~1;
|
||||||
frames_ctx->initial_pool_size = dpb_size;
|
frames_ctx->initial_pool_size = dpb_size;
|
||||||
|
|
||||||
|
frames_ctx->free = nvdec_free_dummy;
|
||||||
|
frames_ctx->pool = av_buffer_pool_init(0, nvdec_alloc_dummy);
|
||||||
|
|
||||||
|
if (!frames_ctx->pool)
|
||||||
|
return AVERROR(ENOMEM);
|
||||||
|
|
||||||
switch (sw_desc->comp[0].depth) {
|
switch (sw_desc->comp[0].depth) {
|
||||||
case 8:
|
case 8:
|
||||||
frames_ctx->sw_format = AV_PIX_FMT_NV12;
|
frames_ctx->sw_format = AV_PIX_FMT_NV12;
|
||||||
|
|
Loading…
Reference in New Issue