vd_lavc: delay images before reading them back

Facilitates hardware pipelining in particular with nvidia/dxva.
2016-01-25 21:00:53 +01:00 · 2016-01-25 21:00:53 +01:00 · b53cb8de5e
parent 271cabe6a5
commit b53cb8de5e
4 changed files with 52 additions and 9 deletions
--- a/video/decode/dxva2.c
+++ b/video/decode/dxva2.c
@ -39,6 +39,8 @@
 #include "video/hwdec.h"
 #include "video/d3d.h"
 #define ADDTIONAL_SURFACES HWDEC_DELAY_QUEUE_COUNT
 // A minor evil.
 #ifndef FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO
 #define FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO    2
@ -574,7 +576,7 @@ static int dxva2_create_decoder(struct lavc_ctx *s, int w, int h,
        surface_alignment = 16;
    /* 4 base work surfaces */
-    ctx->num_surfaces = 4;
+    ctx->num_surfaces = 4 + ADDTIONAL_SURFACES;
    /* add surfaces based on number of possible refs */
    if (codec_id == AV_CODEC_ID_H264 || codec_id == AV_CODEC_ID_HEVC)
--- a/video/decode/lavc.h
+++ b/video/decode/lavc.h
@ -9,6 +9,8 @@
 #include "video/mp_image.h"
 #include "video/hwdec.h"
 #define HWDEC_DELAY_QUEUE_COUNT 2
 typedef struct lavc_ctx {
    struct mp_log *log;
    struct MPOpts *opts;
@ -23,6 +25,10 @@ typedef struct lavc_ctx {
    bool hwdec_failed;
    bool hwdec_notified;
    struct mp_image **delay_queue;
    int num_delay_queue;
    int max_delay_queue;
    // From VO
    struct mp_hwdec_info *hwdec_info;
--- a/video/decode/vaapi.c
+++ b/video/decode/vaapi.c
@ -44,7 +44,7 @@
 * Note that redundant additional surfaces also might allow for some
 * buffering (i.e. not trying to reuse a surface while it's busy).
 */
-#define ADDTIONAL_SURFACES 6
+#define ADDTIONAL_SURFACES (6 + HWDEC_DELAY_QUEUE_COUNT)
 // Some upper bound.
 #define MAX_SURFACES 25
--- a/video/decode/vd_lavc.c
+++ b/video/decode/vd_lavc.c
@ -391,6 +391,10 @@ static void init_avctx(struct dec_video *vd, const char *decoder,
            avctx->get_buffer2 = get_buffer2_hwdec;
        if (ctx->hwdec->init(ctx) < 0)
            goto error;
        // This can increase efficiency by not blocking on the hardware
        // pipeline by reading back immediately after decoding.
        if (ctx->hwdec->process_image)
            ctx->max_delay_queue = HWDEC_DELAY_QUEUE_COUNT;
    } else {
        mp_set_avcodec_threads(vd->log, avctx, lavc_param->threads);
    }
@ -454,6 +458,17 @@ static void reset_avctx(struct dec_video *vd)
    ctx->flushing = false;
 }
 static void flush_all(struct dec_video *vd)
 {
    vd_ffmpeg_ctx *ctx = vd->priv;
    for (int n = 0; n < ctx->num_delay_queue; n++)
        talloc_free(ctx->delay_queue[n]);
    ctx->num_delay_queue = 0;
    reset_avctx(vd);
 }
 static void uninit_avctx(struct dec_video *vd)
 {
    vd_ffmpeg_ctx *ctx = vd->priv;
@ -474,9 +489,11 @@ static void uninit_avctx(struct dec_video *vd)
    av_frame_free(&ctx->pic);
-    ctx->flushing = false;
+    flush_all(vd);
    ctx->hwdec_failed = false;
    ctx->hwdec_fail_count = 0;
    ctx->max_delay_queue = 0;
 }
 static void update_image_params(struct dec_video *vd, AVFrame *frame,
@ -618,6 +635,22 @@ static int get_buffer2_hwdec(AVCodecContext *avctx, AVFrame *pic, int flags)
    return 0;
 }
 static struct mp_image *read_output(struct dec_video *vd)
 {
    vd_ffmpeg_ctx *ctx = vd->priv;
    if (!ctx->num_delay_queue)
        return NULL;
    struct mp_image *res = ctx->delay_queue[0];
    MP_TARRAY_REMOVE_AT(ctx->delay_queue, ctx->num_delay_queue, 0);
    if (ctx->hwdec && ctx->hwdec->process_image)
        res = ctx->hwdec->process_image(ctx, res);
    return mp_img_swap_to_native(res);
 }
 static void decode(struct dec_video *vd, struct demux_packet *packet,
                   int flags, struct mp_image **out_image)
 {
@ -671,8 +704,11 @@ static void decode(struct dec_video *vd, struct demux_packet *packet,
    }
    // Skipped frame, or delayed output due to multithreaded decoding.
-    if (!got_picture)
+    if (!got_picture) {
        if (!packet)
            *out_image = read_output(vd);
        return;
    }
    ctx->hwdec_fail_count = 0;
@ -701,10 +737,9 @@ static void decode(struct dec_video *vd, struct demux_packet *packet,
    av_frame_unref(ctx->pic);
-    if (ctx->hwdec && ctx->hwdec->process_image)
+    MP_TARRAY_APPEND(ctx, ctx->delay_queue, ctx->num_delay_queue, mpi);
-        mpi = ctx->hwdec->process_image(ctx, mpi);
+    if (ctx->num_delay_queue > ctx->max_delay_queue)
-
+        *out_image = read_output(vd);
    *out_image = mp_img_swap_to_native(mpi);
 }
 static struct mp_image *decode_with_fallback(struct dec_video *vd,
@ -740,7 +775,7 @@ static int control(struct dec_video *vd, int cmd, void *arg)
    vd_ffmpeg_ctx *ctx = vd->priv;
    switch (cmd) {
    case VDCTRL_RESET:
-        reset_avctx(vd);
+        flush_all(vd);
        return CONTROL_TRUE;
    case VDCTRL_QUERY_UNSEEN_FRAMES: {
        AVCodecContext *avctx = ctx->avctx;