mirror of https://git.ffmpeg.org/ffmpeg.git
avcodec: implement vp9 nvdec hwaccel
This commit is contained in:
parent
52bf0febb3
commit
f3f73f0893
|
@ -13,7 +13,7 @@ version <next>:
|
|||
- PCE support for extended channel layouts in the AAC encoder
|
||||
- native aptX encoder and decoder
|
||||
- Raw aptX muxer and demuxer
|
||||
- NVIDIA NVDEC-accelerated H.264 and HEVC hwaccel decoding
|
||||
- NVIDIA NVDEC-accelerated H.264, HEVC and VP9 hwaccel decoding
|
||||
- Intel QSV-accelerated overlay filter
|
||||
|
||||
|
||||
|
|
|
@ -2764,6 +2764,8 @@ vp9_d3d11va2_hwaccel_select="vp9_decoder"
|
|||
vp9_dxva2_hwaccel_deps="dxva2 DXVA_PicParams_VP9"
|
||||
vp9_dxva2_hwaccel_select="vp9_decoder"
|
||||
vp9_mediacodec_hwaccel_deps="mediacodec"
|
||||
vp9_nvdec_hwaccel_deps="cuda nvdec"
|
||||
vp9_nvdec_hwaccel_select="vp9_decoder"
|
||||
vp9_vaapi_hwaccel_deps="vaapi VADecPictureParameterBufferVP9_bit_depth"
|
||||
vp9_vaapi_hwaccel_select="vp9_decoder"
|
||||
wmv3_d3d11va_hwaccel_select="vc1_d3d11va_hwaccel"
|
||||
|
|
|
@ -869,6 +869,7 @@ OBJS-$(CONFIG_VC1_VAAPI_HWACCEL) += vaapi_vc1.o
|
|||
OBJS-$(CONFIG_VC1_VDPAU_HWACCEL) += vdpau_vc1.o
|
||||
OBJS-$(CONFIG_VP9_D3D11VA_HWACCEL) += dxva2_vp9.o
|
||||
OBJS-$(CONFIG_VP9_DXVA2_HWACCEL) += dxva2_vp9.o
|
||||
OBJS-$(CONFIG_VP9_NVDEC_HWACCEL) += nvdec_vp9.o
|
||||
OBJS-$(CONFIG_VP9_VAAPI_HWACCEL) += vaapi_vp9.o
|
||||
OBJS-$(CONFIG_VP8_QSV_HWACCEL) += qsvdec_other.o
|
||||
|
||||
|
|
|
@ -123,6 +123,7 @@ static void register_all(void)
|
|||
REGISTER_HWACCEL(VP9_D3D11VA2, vp9_d3d11va2);
|
||||
REGISTER_HWACCEL(VP9_DXVA2, vp9_dxva2);
|
||||
REGISTER_HWACCEL(VP9_MEDIACODEC, vp9_mediacodec);
|
||||
REGISTER_HWACCEL(VP9_NVDEC, vp9_nvdec);
|
||||
REGISTER_HWACCEL(VP9_VAAPI, vp9_vaapi);
|
||||
REGISTER_HWACCEL(WMV3_D3D11VA, wmv3_d3d11va);
|
||||
REGISTER_HWACCEL(WMV3_D3D11VA2, wmv3_d3d11va2);
|
||||
|
|
|
@ -54,6 +54,7 @@ static int map_avcodec_id(enum AVCodecID id)
|
|||
switch (id) {
|
||||
case AV_CODEC_ID_H264: return cudaVideoCodec_H264;
|
||||
case AV_CODEC_ID_HEVC: return cudaVideoCodec_HEVC;
|
||||
case AV_CODEC_ID_VP9: return cudaVideoCodec_VP9;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,227 @@
|
|||
/*
|
||||
* VP9 HW decode acceleration through NVDEC
|
||||
*
|
||||
* Copyright (c) 2016 Timo Rothenpieler
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/pixdesc.h"
|
||||
|
||||
#include "avcodec.h"
|
||||
#include "nvdec.h"
|
||||
#include "decode.h"
|
||||
#include "internal.h"
|
||||
#include "vp9shared.h"
|
||||
|
||||
static unsigned char get_ref_idx(AVFrame *frame)
|
||||
{
|
||||
FrameDecodeData *fdd;
|
||||
NVDECFrame *cf;
|
||||
|
||||
if (!frame || !frame->private_ref)
|
||||
return 255;
|
||||
|
||||
fdd = (FrameDecodeData*)frame->private_ref->data;
|
||||
cf = (NVDECFrame*)fdd->hwaccel_priv;
|
||||
|
||||
return cf->idx;
|
||||
}
|
||||
|
||||
static int nvdec_vp9_start_frame(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
|
||||
{
|
||||
VP9SharedContext *h = avctx->priv_data;
|
||||
const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
|
||||
|
||||
NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
|
||||
CUVIDPICPARAMS *pp = &ctx->pic_params;
|
||||
CUVIDVP9PICPARAMS *ppc = &pp->CodecSpecific.vp9;
|
||||
FrameDecodeData *fdd;
|
||||
NVDECFrame *cf;
|
||||
AVFrame *cur_frame = h->frames[CUR_FRAME].tf.f;
|
||||
|
||||
int ret, i;
|
||||
|
||||
ret = ff_nvdec_start_frame(avctx, cur_frame);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
fdd = (FrameDecodeData*)cur_frame->private_ref->data;
|
||||
cf = (NVDECFrame*)fdd->hwaccel_priv;
|
||||
|
||||
*pp = (CUVIDPICPARAMS) {
|
||||
.PicWidthInMbs = (cur_frame->width + 15) / 16,
|
||||
.FrameHeightInMbs = (cur_frame->height + 15) / 16,
|
||||
.CurrPicIdx = cf->idx,
|
||||
|
||||
.CodecSpecific.vp9 = {
|
||||
.width = cur_frame->width,
|
||||
.height = cur_frame->height,
|
||||
|
||||
.LastRefIdx = get_ref_idx(h->refs[h->h.refidx[0]].f),
|
||||
.GoldenRefIdx = get_ref_idx(h->refs[h->h.refidx[1]].f),
|
||||
.AltRefIdx = get_ref_idx(h->refs[h->h.refidx[2]].f),
|
||||
|
||||
.profile = h->h.profile,
|
||||
.frameContextIdx = h->h.framectxid,
|
||||
.frameType = !h->h.keyframe,
|
||||
.showFrame = !h->h.invisible,
|
||||
.errorResilient = h->h.errorres,
|
||||
.frameParallelDecoding = h->h.parallelmode,
|
||||
.subSamplingX = pixdesc->log2_chroma_w,
|
||||
.subSamplingY = pixdesc->log2_chroma_h,
|
||||
.intraOnly = h->h.intraonly,
|
||||
.allow_high_precision_mv = h->h.keyframe ? 0 : h->h.highprecisionmvs,
|
||||
.refreshEntropyProbs = h->h.refreshctx,
|
||||
|
||||
.bitDepthMinus8Luma = pixdesc->comp[0].depth - 8,
|
||||
.bitDepthMinus8Chroma = pixdesc->comp[1].depth - 8,
|
||||
|
||||
.loopFilterLevel = h->h.filter.level,
|
||||
.loopFilterSharpness = h->h.filter.sharpness,
|
||||
.modeRefLfEnabled = h->h.lf_delta.enabled,
|
||||
|
||||
.log2_tile_columns = h->h.tiling.log2_tile_cols,
|
||||
.log2_tile_rows = h->h.tiling.log2_tile_rows,
|
||||
|
||||
.segmentEnabled = h->h.segmentation.enabled,
|
||||
.segmentMapUpdate = h->h.segmentation.update_map,
|
||||
.segmentMapTemporalUpdate = h->h.segmentation.temporal,
|
||||
.segmentFeatureMode = h->h.segmentation.absolute_vals,
|
||||
|
||||
.qpYAc = h->h.yac_qi,
|
||||
.qpYDc = h->h.ydc_qdelta,
|
||||
.qpChDc = h->h.uvdc_qdelta,
|
||||
.qpChAc = h->h.uvac_qdelta,
|
||||
|
||||
.resetFrameContext = h->h.resetctx,
|
||||
.mcomp_filter_type = h->h.filtermode ^ (h->h.filtermode <= 1),
|
||||
|
||||
.frameTagSize = h->h.uncompressed_header_size,
|
||||
.offsetToDctParts = h->h.compressed_header_size,
|
||||
|
||||
.refFrameSignBias[0] = 0,
|
||||
}
|
||||
};
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
ppc->mbModeLfDelta[i] = h->h.lf_delta.mode[i];
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
ppc->mbRefLfDelta[i] = h->h.lf_delta.ref[i];
|
||||
|
||||
for (i = 0; i < 7; i++)
|
||||
ppc->mb_segment_tree_probs[i] = h->h.segmentation.prob[i];
|
||||
|
||||
for (i = 0; i < 3; i++) {
|
||||
ppc->activeRefIdx[i] = h->h.refidx[i];
|
||||
ppc->segment_pred_probs[i] = h->h.segmentation.pred_prob[i];
|
||||
ppc->refFrameSignBias[i + 1] = h->h.signbias[i];
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; i++) {
|
||||
ppc->segmentFeatureEnable[i][0] = h->h.segmentation.feat[i].q_enabled;
|
||||
ppc->segmentFeatureEnable[i][1] = h->h.segmentation.feat[i].lf_enabled;
|
||||
ppc->segmentFeatureEnable[i][2] = h->h.segmentation.feat[i].ref_enabled;
|
||||
ppc->segmentFeatureEnable[i][3] = h->h.segmentation.feat[i].skip_enabled;
|
||||
|
||||
ppc->segmentFeatureData[i][0] = h->h.segmentation.feat[i].q_val;
|
||||
ppc->segmentFeatureData[i][1] = h->h.segmentation.feat[i].lf_val;
|
||||
ppc->segmentFeatureData[i][2] = h->h.segmentation.feat[i].ref_val;
|
||||
ppc->segmentFeatureData[i][3] = 0;
|
||||
}
|
||||
|
||||
switch (avctx->colorspace) {
|
||||
default:
|
||||
case AVCOL_SPC_UNSPECIFIED:
|
||||
ppc->colorSpace = 0;
|
||||
break;
|
||||
case AVCOL_SPC_BT470BG:
|
||||
ppc->colorSpace = 1;
|
||||
break;
|
||||
case AVCOL_SPC_BT709:
|
||||
ppc->colorSpace = 2;
|
||||
break;
|
||||
case AVCOL_SPC_SMPTE170M:
|
||||
ppc->colorSpace = 3;
|
||||
break;
|
||||
case AVCOL_SPC_SMPTE240M:
|
||||
ppc->colorSpace = 4;
|
||||
break;
|
||||
case AVCOL_SPC_BT2020_NCL:
|
||||
ppc->colorSpace = 5;
|
||||
break;
|
||||
case AVCOL_SPC_RESERVED:
|
||||
ppc->colorSpace = 6;
|
||||
break;
|
||||
case AVCOL_SPC_RGB:
|
||||
ppc->colorSpace = 7;
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nvdec_vp9_end_frame(AVCodecContext *avctx)
|
||||
{
|
||||
NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
|
||||
int ret = ff_nvdec_end_frame(avctx);
|
||||
ctx->bitstream = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int nvdec_vp9_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
|
||||
{
|
||||
NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
|
||||
void *tmp;
|
||||
|
||||
tmp = av_fast_realloc(ctx->slice_offsets, &ctx->slice_offsets_allocated,
|
||||
(ctx->nb_slices + 1) * sizeof(*ctx->slice_offsets));
|
||||
if (!tmp)
|
||||
return AVERROR(ENOMEM);
|
||||
ctx->slice_offsets = tmp;
|
||||
|
||||
if (!ctx->bitstream)
|
||||
ctx->bitstream = (uint8_t*)buffer;
|
||||
|
||||
ctx->slice_offsets[ctx->nb_slices] = buffer - ctx->bitstream;
|
||||
ctx->bitstream_len += size;
|
||||
ctx->nb_slices++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nvdec_vp9_frame_params(AVCodecContext *avctx,
|
||||
AVBufferRef *hw_frames_ctx)
|
||||
{
|
||||
// VP9 uses a fixed size pool of 8 possible reference frames
|
||||
return ff_nvdec_frame_params(avctx, hw_frames_ctx, 8);
|
||||
}
|
||||
|
||||
AVHWAccel ff_vp9_nvdec_hwaccel = {
|
||||
.name = "vp9_nvdec",
|
||||
.type = AVMEDIA_TYPE_VIDEO,
|
||||
.id = AV_CODEC_ID_VP9,
|
||||
.pix_fmt = AV_PIX_FMT_CUDA,
|
||||
.start_frame = nvdec_vp9_start_frame,
|
||||
.end_frame = nvdec_vp9_end_frame,
|
||||
.decode_slice = nvdec_vp9_decode_slice,
|
||||
.frame_params = nvdec_vp9_frame_params,
|
||||
.init = ff_nvdec_decode_init,
|
||||
.uninit = ff_nvdec_decode_uninit,
|
||||
.priv_data_size = sizeof(NVDECContext),
|
||||
};
|
|
@ -29,7 +29,7 @@
|
|||
|
||||
#define LIBAVCODEC_VERSION_MAJOR 58
|
||||
#define LIBAVCODEC_VERSION_MINOR 3
|
||||
#define LIBAVCODEC_VERSION_MICRO 100
|
||||
#define LIBAVCODEC_VERSION_MICRO 101
|
||||
|
||||
#define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
|
||||
LIBAVCODEC_VERSION_MINOR, \
|
||||
|
|
|
@ -169,7 +169,10 @@ fail:
|
|||
|
||||
static int update_size(AVCodecContext *avctx, int w, int h)
|
||||
{
|
||||
#define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + CONFIG_VP9_D3D11VA_HWACCEL * 2 + CONFIG_VP9_VAAPI_HWACCEL)
|
||||
#define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
|
||||
CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
|
||||
CONFIG_VP9_NVDEC_HWACCEL + \
|
||||
CONFIG_VP9_VAAPI_HWACCEL)
|
||||
enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
|
||||
VP9Context *s = avctx->priv_data;
|
||||
uint8_t *p;
|
||||
|
@ -191,12 +194,18 @@ static int update_size(AVCodecContext *avctx, int w, int h)
|
|||
*fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
|
||||
*fmtp++ = AV_PIX_FMT_D3D11;
|
||||
#endif
|
||||
#if CONFIG_VP9_NVDEC_HWACCEL
|
||||
*fmtp++ = AV_PIX_FMT_CUDA;
|
||||
#endif
|
||||
#if CONFIG_VP9_VAAPI_HWACCEL
|
||||
*fmtp++ = AV_PIX_FMT_VAAPI;
|
||||
#endif
|
||||
break;
|
||||
case AV_PIX_FMT_YUV420P10:
|
||||
case AV_PIX_FMT_YUV420P12:
|
||||
#if CONFIG_VP9_NVDEC_HWACCEL
|
||||
*fmtp++ = AV_PIX_FMT_CUDA;
|
||||
#endif
|
||||
#if CONFIG_VP9_VAAPI_HWACCEL
|
||||
*fmtp++ = AV_PIX_FMT_VAAPI;
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue