fftools/ffmpeg: support input frame params in encoding stats

2023-01-27 12:19:16 +01:00 · 2023-01-27 12:19:16 +01:00 · 806ecace91
parent 61afbc2376
commit 806ecace91
4 changed files with 92 additions and 6 deletions
--- a/doc/ffmpeg.texi
+++ b/doc/ffmpeg.texi
@ -2082,18 +2082,35 @@ Index of the output stream in the file.
 Frame number. Pre-encoding: number of frames sent to the encoder so far.
 Post-encoding: number of packets received from the encoder so far.

+@item ni
+Input frame number. Index of the input frame (i.e. output by a decoder) that
+corresponds to this output frame or packet. -1 if unavailable.
+
@item tb
 Encoder timebase, as a rational number @var{num/den}. Note that this may be
 different from the timebase used by the muxer.

+@item tbi
+Timebase for @var{ptsi}, as a rational number @var{num/den}. Available when
+@var{ptsi} is available, @var{0/1} otherwise.
+
@item pts
 Presentation timestamp of the frame or packet, as an integer. Should be
 multiplied by the timebase to compute presentation time.

+@item ptsi
+Presentation timestamp of the input frame (see @var{ni}), as an integer. Should
+be multiplied by @var{tbi} to compute presentation time. Printed as
+(2^63 - 1 = 9223372036854775807) when not available.
+
@item t
 Presentation time of the frame or packet, as a decimal number. Equal to
@var{pts} multiplied by @var{tb}.

+@item ti
+Presentation time of the input frame (see @var{ni}), as a decimal number. Equal
+to @var{ptsi} multiplied by @var{tbi}. Printed as inf when not available.
+
@item dts
 Decoding timestamp of the packet, as an integer. Should be multiplied by the
 timebase to compute presentation time. Post-encoding only.
--- a/fftools/ffmpeg.c
+++ b/fftools/ffmpeg.c
@ -113,6 +113,13 @@ const int program_birth_year = 2000;

 static FILE *vstats_file;

+// optionally attached as opaque_ref to decoded AVFrames
+typedef struct FrameData {
+    uint64_t   idx;
+    int64_t    pts;
+    AVRational tb;
+} FrameData;
+
 typedef struct BenchmarkTimeStamps {
    int64_t real_usec;
    int64_t user_usec;
@ -807,6 +814,17 @@ static void enc_stats_write(OutputStream *ost, EncStats *es,
    AVRational   tb = ost->enc_ctx->time_base;
    int64_t     pts = frame ? frame->pts : pkt->pts;

+    AVRational  tbi = (AVRational){ 0, 1};
+    int64_t    ptsi = INT64_MAX;
+
+    const FrameData *fd;
+
+    if ((frame && frame->opaque_ref) || (pkt && pkt->opaque_ref)) {
+        fd   = (const FrameData*)(frame ? frame->opaque_ref->data : pkt->opaque_ref->data);
+        tbi  = fd->tb;
+        ptsi = fd->pts;
+    }
+
    for (size_t i = 0; i < es->nb_components; i++) {
        const EncStatsComponent *c = &es->components[i];

@ -815,8 +833,13 @@ static void enc_stats_write(OutputStream *ost, EncStats *es,
        case ENC_STATS_FILE_IDX:        avio_printf(io, "%d",       ost->file_index);               continue;
        case ENC_STATS_STREAM_IDX:      avio_printf(io, "%d",       ost->index);                    continue;
        case ENC_STATS_TIMEBASE:        avio_printf(io, "%d/%d",    tb.num, tb.den);                continue;
+        case ENC_STATS_TIMEBASE_IN:     avio_printf(io, "%d/%d",    tbi.num, tbi.den);              continue;
        case ENC_STATS_PTS:             avio_printf(io, "%"PRId64,  pts);                           continue;
+        case ENC_STATS_PTS_IN:          avio_printf(io, "%"PRId64,  ptsi);                          continue;
        case ENC_STATS_PTS_TIME:        avio_printf(io, "%g",       pts * av_q2d(tb));              continue;
+        case ENC_STATS_PTS_TIME_IN:     avio_printf(io, "%g",       ptsi == INT64_MAX ?
+                                                                    INFINITY : ptsi * av_q2d(tbi)); continue;
+        case ENC_STATS_FRAME_NUM_IN:    avio_printf(io, "%"PRIu64,  fd ? fd->idx : -1);             continue;
        }

        if (frame) {
@ -2034,7 +2057,8 @@ static int ifilter_send_eof(InputFilter *ifilter, int64_t pts)
 // There is the following difference: if you got a frame, you must call
 // it again with pkt=NULL. pkt==NULL is treated differently from pkt->size==0
 // (pkt==NULL means get more output, pkt->size==0 is a flush/drain packet)
-static int decode(AVCodecContext *avctx, AVFrame *frame, int *got_frame, AVPacket *pkt)
+static int decode(InputStream *ist, AVCodecContext *avctx,
+                  AVFrame *frame, int *got_frame, AVPacket *pkt)
 {
    int ret;

@ -2051,8 +2075,24 @@ static int decode(AVCodecContext *avctx, AVFrame *frame, int *got_frame, AVPacke
    ret = avcodec_receive_frame(avctx, frame);
    if (ret < 0 && ret != AVERROR(EAGAIN))
        return ret;
-    if (ret >= 0)
+    if (ret >= 0) {
+        if (ist->want_frame_data) {
+            FrameData *fd;
+
+            av_assert0(!frame->opaque_ref);
+            frame->opaque_ref = av_buffer_allocz(sizeof(*fd));
+            if (!frame->opaque_ref) {
+                av_frame_unref(frame);
+                return AVERROR(ENOMEM);
+            }
+            fd      = (FrameData*)frame->opaque_ref->data;
+            fd->pts = frame->pts;
+            fd->tb  = avctx->pkt_timebase;
+            fd->idx = avctx->frame_number - 1;
+        }
+
        *got_frame = 1;
+    }

    return 0;
 }
@ -2084,7 +2124,7 @@ static int decode_audio(InputStream *ist, AVPacket *pkt, int *got_output,
    AVRational decoded_frame_tb;

    update_benchmark(NULL);
-    ret = decode(avctx, decoded_frame, got_output, pkt);
+    ret = decode(ist, avctx, decoded_frame, got_output, pkt);
    update_benchmark("decode_audio %d.%d", ist->file_index, ist->st->index);
    if (ret < 0)
        *decode_failed = 1;
@ -2163,7 +2203,7 @@ static int decode_video(InputStream *ist, AVPacket *pkt, int *got_output, int64_
    }

    update_benchmark(NULL);
-    ret = decode(ist->dec_ctx, decoded_frame, got_output, pkt);
+    ret = decode(ist, ist->dec_ctx, decoded_frame, got_output, pkt);
    update_benchmark("decode_video %d.%d", ist->file_index, ist->st->index);
    if (ret < 0)
        *decode_failed = 1;
@ -3017,6 +3057,12 @@ static int init_output_stream(OutputStream *ost, AVFrame *frame,
        if (!av_dict_get(ost->encoder_opts, "threads", NULL, 0))
            av_dict_set(&ost->encoder_opts, "threads", "auto", 0);

+        if (codec->capabilities & AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE) {
+            ret = av_dict_set(&ost->encoder_opts, "flags", "+copy_opaque", AV_DICT_MULTIKEY);
+            if (ret < 0)
+                return ret;
+        }
+
        ret = hw_device_setup_for_encode(ost);
        if (ret < 0) {
            snprintf(error, error_len, "Device setup failed for "
--- a/fftools/ffmpeg.h
+++ b/fftools/ffmpeg.h
@ -335,6 +335,8 @@ typedef struct InputStream {
 #define DECODING_FOR_OST    1
 #define DECODING_FOR_FILTER 2
    int processing_needed;   /* non zero if the packets must be processed */
+    // should attach FrameData as opaque_ref after decoding
+    int want_frame_data;

    /**
     * Codec parameters - to be used by the decoding/streamcopy code.
@ -493,9 +495,13 @@ enum EncStatsType {
    ENC_STATS_FILE_IDX,
    ENC_STATS_STREAM_IDX,
    ENC_STATS_FRAME_NUM,
+    ENC_STATS_FRAME_NUM_IN,
    ENC_STATS_TIMEBASE,
+    ENC_STATS_TIMEBASE_IN,
    ENC_STATS_PTS,
    ENC_STATS_PTS_TIME,
+    ENC_STATS_PTS_IN,
+    ENC_STATS_PTS_TIME_IN,
    ENC_STATS_DTS,
    ENC_STATS_DTS_TIME,
    ENC_STATS_SAMPLE_NUM,
--- a/fftools/ffmpeg_mux_init.c
+++ b/fftools/ffmpeg_mux_init.c
@ -269,13 +269,18 @@ static int enc_stats_init(OutputStream *ost, int pre,
        const char        *str;
        int                pre_only:1;
        int                post_only:1;
+        int                need_input_data:1;
    } fmt_specs[] = {
        { ENC_STATS_FILE_IDX,       "fidx"                      },
        { ENC_STATS_STREAM_IDX,     "sidx"                      },
        { ENC_STATS_FRAME_NUM,      "n"                         },
+        { ENC_STATS_FRAME_NUM_IN,   "ni",       0, 0, 1         },
        { ENC_STATS_TIMEBASE,       "tb"                        },
+        { ENC_STATS_TIMEBASE_IN,    "tbi",      0, 0, 1         },
        { ENC_STATS_PTS,            "pts"                       },
        { ENC_STATS_PTS_TIME,       "t"                         },
+        { ENC_STATS_PTS_IN,         "ptsi",     0, 0, 1         },
+        { ENC_STATS_PTS_TIME_IN,    "ti",       0, 0, 1         },
        { ENC_STATS_DTS,            "dts",      0, 1            },
        { ENC_STATS_DTS_TIME,       "dt",       0, 1            },
        { ENC_STATS_SAMPLE_NUM,     "sn",       1               },
@ -345,6 +350,18 @@ static int enc_stats_init(OutputStream *ost, int pre,
                }

                c->type = fmt_specs[i].type;
+
+                if (fmt_specs[i].need_input_data) {
+                    if (ost->ist)
+                        ost->ist->want_frame_data = 1;
+                    else {
+                        av_log(ost, AV_LOG_WARNING,
+                               "Format directive '%s' is unavailable, because "
+                               "this output stream has no associated input stream\n",
+                               val);
+                    }
+                }
+
                break;
            }
        }
@ -428,6 +445,7 @@ static OutputStream *new_output_stream(Muxer *mux, const OptionsContext *o,
    ms->last_mux_dts = AV_NOPTS_VALUE;

    ost->st         = st;
+    ost->ist        = ist;
    ost->kf.ref_pts = AV_NOPTS_VALUE;
    st->codecpar->codec_type = type;

@ -605,8 +623,7 @@ static OutputStream *new_output_stream(Muxer *mux, const OptionsContext *o,
    if (ost->enc_ctx && av_get_exact_bits_per_sample(ost->enc_ctx->codec_id) == 24)
        av_dict_set(&ost->swr_opts, "output_sample_bits", "24", 0);

-    if (ist) {
-        ost->ist = ist;
+    if (ost->ist) {
        ost->ist->discard = 0;
        ost->ist->st->discard = ost->ist->user_set_discard;
    }