vd_lavc: delay images before reading them back

Facilitates hardware pipelining in particular with nvidia/dxva.
This commit is contained in:
wm4 2016-01-25 21:00:53 +01:00
parent 271cabe6a5
commit b53cb8de5e
4 changed files with 52 additions and 9 deletions

View File

@ -39,6 +39,8 @@
#include "video/hwdec.h" #include "video/hwdec.h"
#include "video/d3d.h" #include "video/d3d.h"
#define ADDTIONAL_SURFACES HWDEC_DELAY_QUEUE_COUNT
// A minor evil. // A minor evil.
#ifndef FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO #ifndef FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO
#define FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO 2 #define FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO 2
@ -574,7 +576,7 @@ static int dxva2_create_decoder(struct lavc_ctx *s, int w, int h,
surface_alignment = 16; surface_alignment = 16;
/* 4 base work surfaces */ /* 4 base work surfaces */
ctx->num_surfaces = 4; ctx->num_surfaces = 4 + ADDTIONAL_SURFACES;
/* add surfaces based on number of possible refs */ /* add surfaces based on number of possible refs */
if (codec_id == AV_CODEC_ID_H264 || codec_id == AV_CODEC_ID_HEVC) if (codec_id == AV_CODEC_ID_H264 || codec_id == AV_CODEC_ID_HEVC)

View File

@ -9,6 +9,8 @@
#include "video/mp_image.h" #include "video/mp_image.h"
#include "video/hwdec.h" #include "video/hwdec.h"
#define HWDEC_DELAY_QUEUE_COUNT 2
typedef struct lavc_ctx { typedef struct lavc_ctx {
struct mp_log *log; struct mp_log *log;
struct MPOpts *opts; struct MPOpts *opts;
@ -23,6 +25,10 @@ typedef struct lavc_ctx {
bool hwdec_failed; bool hwdec_failed;
bool hwdec_notified; bool hwdec_notified;
struct mp_image **delay_queue;
int num_delay_queue;
int max_delay_queue;
// From VO // From VO
struct mp_hwdec_info *hwdec_info; struct mp_hwdec_info *hwdec_info;

View File

@ -44,7 +44,7 @@
* Note that redundant additional surfaces also might allow for some * Note that redundant additional surfaces also might allow for some
* buffering (i.e. not trying to reuse a surface while it's busy). * buffering (i.e. not trying to reuse a surface while it's busy).
*/ */
#define ADDTIONAL_SURFACES 6 #define ADDTIONAL_SURFACES (6 + HWDEC_DELAY_QUEUE_COUNT)
// Some upper bound. // Some upper bound.
#define MAX_SURFACES 25 #define MAX_SURFACES 25

View File

@ -391,6 +391,10 @@ static void init_avctx(struct dec_video *vd, const char *decoder,
avctx->get_buffer2 = get_buffer2_hwdec; avctx->get_buffer2 = get_buffer2_hwdec;
if (ctx->hwdec->init(ctx) < 0) if (ctx->hwdec->init(ctx) < 0)
goto error; goto error;
// This can increase efficiency by not blocking on the hardware
// pipeline by reading back immediately after decoding.
if (ctx->hwdec->process_image)
ctx->max_delay_queue = HWDEC_DELAY_QUEUE_COUNT;
} else { } else {
mp_set_avcodec_threads(vd->log, avctx, lavc_param->threads); mp_set_avcodec_threads(vd->log, avctx, lavc_param->threads);
} }
@ -454,6 +458,17 @@ static void reset_avctx(struct dec_video *vd)
ctx->flushing = false; ctx->flushing = false;
} }
static void flush_all(struct dec_video *vd)
{
vd_ffmpeg_ctx *ctx = vd->priv;
for (int n = 0; n < ctx->num_delay_queue; n++)
talloc_free(ctx->delay_queue[n]);
ctx->num_delay_queue = 0;
reset_avctx(vd);
}
static void uninit_avctx(struct dec_video *vd) static void uninit_avctx(struct dec_video *vd)
{ {
vd_ffmpeg_ctx *ctx = vd->priv; vd_ffmpeg_ctx *ctx = vd->priv;
@ -474,9 +489,11 @@ static void uninit_avctx(struct dec_video *vd)
av_frame_free(&ctx->pic); av_frame_free(&ctx->pic);
ctx->flushing = false; flush_all(vd);
ctx->hwdec_failed = false; ctx->hwdec_failed = false;
ctx->hwdec_fail_count = 0; ctx->hwdec_fail_count = 0;
ctx->max_delay_queue = 0;
} }
static void update_image_params(struct dec_video *vd, AVFrame *frame, static void update_image_params(struct dec_video *vd, AVFrame *frame,
@ -618,6 +635,22 @@ static int get_buffer2_hwdec(AVCodecContext *avctx, AVFrame *pic, int flags)
return 0; return 0;
} }
static struct mp_image *read_output(struct dec_video *vd)
{
vd_ffmpeg_ctx *ctx = vd->priv;
if (!ctx->num_delay_queue)
return NULL;
struct mp_image *res = ctx->delay_queue[0];
MP_TARRAY_REMOVE_AT(ctx->delay_queue, ctx->num_delay_queue, 0);
if (ctx->hwdec && ctx->hwdec->process_image)
res = ctx->hwdec->process_image(ctx, res);
return mp_img_swap_to_native(res);
}
static void decode(struct dec_video *vd, struct demux_packet *packet, static void decode(struct dec_video *vd, struct demux_packet *packet,
int flags, struct mp_image **out_image) int flags, struct mp_image **out_image)
{ {
@ -671,8 +704,11 @@ static void decode(struct dec_video *vd, struct demux_packet *packet,
} }
// Skipped frame, or delayed output due to multithreaded decoding. // Skipped frame, or delayed output due to multithreaded decoding.
if (!got_picture) if (!got_picture) {
if (!packet)
*out_image = read_output(vd);
return; return;
}
ctx->hwdec_fail_count = 0; ctx->hwdec_fail_count = 0;
@ -701,10 +737,9 @@ static void decode(struct dec_video *vd, struct demux_packet *packet,
av_frame_unref(ctx->pic); av_frame_unref(ctx->pic);
if (ctx->hwdec && ctx->hwdec->process_image) MP_TARRAY_APPEND(ctx, ctx->delay_queue, ctx->num_delay_queue, mpi);
mpi = ctx->hwdec->process_image(ctx, mpi); if (ctx->num_delay_queue > ctx->max_delay_queue)
*out_image = read_output(vd);
*out_image = mp_img_swap_to_native(mpi);
} }
static struct mp_image *decode_with_fallback(struct dec_video *vd, static struct mp_image *decode_with_fallback(struct dec_video *vd,
@ -740,7 +775,7 @@ static int control(struct dec_video *vd, int cmd, void *arg)
vd_ffmpeg_ctx *ctx = vd->priv; vd_ffmpeg_ctx *ctx = vd->priv;
switch (cmd) { switch (cmd) {
case VDCTRL_RESET: case VDCTRL_RESET:
reset_avctx(vd); flush_all(vd);
return CONTROL_TRUE; return CONTROL_TRUE;
case VDCTRL_QUERY_UNSEEN_FRAMES: { case VDCTRL_QUERY_UNSEEN_FRAMES: {
AVCodecContext *avctx = ctx->avctx; AVCodecContext *avctx = ctx->avctx;