fftools/ffmpeg: support input frame params in encoding stats

This commit is contained in:
Anton Khirnov 2023-01-27 12:19:16 +01:00
parent 61afbc2376
commit 806ecace91
4 changed files with 92 additions and 6 deletions

View File

@ -2082,18 +2082,35 @@ Index of the output stream in the file.
Frame number. Pre-encoding: number of frames sent to the encoder so far. Frame number. Pre-encoding: number of frames sent to the encoder so far.
Post-encoding: number of packets received from the encoder so far. Post-encoding: number of packets received from the encoder so far.
@item ni
Input frame number. Index of the input frame (i.e. output by a decoder) that
corresponds to this output frame or packet. -1 if unavailable.
@item tb @item tb
Encoder timebase, as a rational number @var{num/den}. Note that this may be Encoder timebase, as a rational number @var{num/den}. Note that this may be
different from the timebase used by the muxer. different from the timebase used by the muxer.
@item tbi
Timebase for @var{ptsi}, as a rational number @var{num/den}. Available when
@var{ptsi} is available, @var{0/1} otherwise.
@item pts @item pts
Presentation timestamp of the frame or packet, as an integer. Should be Presentation timestamp of the frame or packet, as an integer. Should be
multiplied by the timebase to compute presentation time. multiplied by the timebase to compute presentation time.
@item ptsi
Presentation timestamp of the input frame (see @var{ni}), as an integer. Should
be multiplied by @var{tbi} to compute presentation time. Printed as
(2^63 - 1 = 9223372036854775807) when not available.
@item t @item t
Presentation time of the frame or packet, as a decimal number. Equal to Presentation time of the frame or packet, as a decimal number. Equal to
@var{pts} multiplied by @var{tb}. @var{pts} multiplied by @var{tb}.
@item ti
Presentation time of the input frame (see @var{ni}), as a decimal number. Equal
to @var{ptsi} multiplied by @var{tbi}. Printed as inf when not available.
@item dts @item dts
Decoding timestamp of the packet, as an integer. Should be multiplied by the Decoding timestamp of the packet, as an integer. Should be multiplied by the
timebase to compute presentation time. Post-encoding only. timebase to compute presentation time. Post-encoding only.

View File

@ -113,6 +113,13 @@ const int program_birth_year = 2000;
static FILE *vstats_file; static FILE *vstats_file;
// optionally attached as opaque_ref to decoded AVFrames
typedef struct FrameData {
uint64_t idx;
int64_t pts;
AVRational tb;
} FrameData;
typedef struct BenchmarkTimeStamps { typedef struct BenchmarkTimeStamps {
int64_t real_usec; int64_t real_usec;
int64_t user_usec; int64_t user_usec;
@ -807,6 +814,17 @@ static void enc_stats_write(OutputStream *ost, EncStats *es,
AVRational tb = ost->enc_ctx->time_base; AVRational tb = ost->enc_ctx->time_base;
int64_t pts = frame ? frame->pts : pkt->pts; int64_t pts = frame ? frame->pts : pkt->pts;
AVRational tbi = (AVRational){ 0, 1};
int64_t ptsi = INT64_MAX;
const FrameData *fd;
if ((frame && frame->opaque_ref) || (pkt && pkt->opaque_ref)) {
fd = (const FrameData*)(frame ? frame->opaque_ref->data : pkt->opaque_ref->data);
tbi = fd->tb;
ptsi = fd->pts;
}
for (size_t i = 0; i < es->nb_components; i++) { for (size_t i = 0; i < es->nb_components; i++) {
const EncStatsComponent *c = &es->components[i]; const EncStatsComponent *c = &es->components[i];
@ -815,8 +833,13 @@ static void enc_stats_write(OutputStream *ost, EncStats *es,
case ENC_STATS_FILE_IDX: avio_printf(io, "%d", ost->file_index); continue; case ENC_STATS_FILE_IDX: avio_printf(io, "%d", ost->file_index); continue;
case ENC_STATS_STREAM_IDX: avio_printf(io, "%d", ost->index); continue; case ENC_STATS_STREAM_IDX: avio_printf(io, "%d", ost->index); continue;
case ENC_STATS_TIMEBASE: avio_printf(io, "%d/%d", tb.num, tb.den); continue; case ENC_STATS_TIMEBASE: avio_printf(io, "%d/%d", tb.num, tb.den); continue;
case ENC_STATS_TIMEBASE_IN: avio_printf(io, "%d/%d", tbi.num, tbi.den); continue;
case ENC_STATS_PTS: avio_printf(io, "%"PRId64, pts); continue; case ENC_STATS_PTS: avio_printf(io, "%"PRId64, pts); continue;
case ENC_STATS_PTS_IN: avio_printf(io, "%"PRId64, ptsi); continue;
case ENC_STATS_PTS_TIME: avio_printf(io, "%g", pts * av_q2d(tb)); continue; case ENC_STATS_PTS_TIME: avio_printf(io, "%g", pts * av_q2d(tb)); continue;
case ENC_STATS_PTS_TIME_IN: avio_printf(io, "%g", ptsi == INT64_MAX ?
INFINITY : ptsi * av_q2d(tbi)); continue;
case ENC_STATS_FRAME_NUM_IN: avio_printf(io, "%"PRIu64, fd ? fd->idx : -1); continue;
} }
if (frame) { if (frame) {
@ -2034,7 +2057,8 @@ static int ifilter_send_eof(InputFilter *ifilter, int64_t pts)
// There is the following difference: if you got a frame, you must call // There is the following difference: if you got a frame, you must call
// it again with pkt=NULL. pkt==NULL is treated differently from pkt->size==0 // it again with pkt=NULL. pkt==NULL is treated differently from pkt->size==0
// (pkt==NULL means get more output, pkt->size==0 is a flush/drain packet) // (pkt==NULL means get more output, pkt->size==0 is a flush/drain packet)
static int decode(AVCodecContext *avctx, AVFrame *frame, int *got_frame, AVPacket *pkt) static int decode(InputStream *ist, AVCodecContext *avctx,
AVFrame *frame, int *got_frame, AVPacket *pkt)
{ {
int ret; int ret;
@ -2051,8 +2075,24 @@ static int decode(AVCodecContext *avctx, AVFrame *frame, int *got_frame, AVPacke
ret = avcodec_receive_frame(avctx, frame); ret = avcodec_receive_frame(avctx, frame);
if (ret < 0 && ret != AVERROR(EAGAIN)) if (ret < 0 && ret != AVERROR(EAGAIN))
return ret; return ret;
if (ret >= 0) if (ret >= 0) {
if (ist->want_frame_data) {
FrameData *fd;
av_assert0(!frame->opaque_ref);
frame->opaque_ref = av_buffer_allocz(sizeof(*fd));
if (!frame->opaque_ref) {
av_frame_unref(frame);
return AVERROR(ENOMEM);
}
fd = (FrameData*)frame->opaque_ref->data;
fd->pts = frame->pts;
fd->tb = avctx->pkt_timebase;
fd->idx = avctx->frame_number - 1;
}
*got_frame = 1; *got_frame = 1;
}
return 0; return 0;
} }
@ -2084,7 +2124,7 @@ static int decode_audio(InputStream *ist, AVPacket *pkt, int *got_output,
AVRational decoded_frame_tb; AVRational decoded_frame_tb;
update_benchmark(NULL); update_benchmark(NULL);
ret = decode(avctx, decoded_frame, got_output, pkt); ret = decode(ist, avctx, decoded_frame, got_output, pkt);
update_benchmark("decode_audio %d.%d", ist->file_index, ist->st->index); update_benchmark("decode_audio %d.%d", ist->file_index, ist->st->index);
if (ret < 0) if (ret < 0)
*decode_failed = 1; *decode_failed = 1;
@ -2163,7 +2203,7 @@ static int decode_video(InputStream *ist, AVPacket *pkt, int *got_output, int64_
} }
update_benchmark(NULL); update_benchmark(NULL);
ret = decode(ist->dec_ctx, decoded_frame, got_output, pkt); ret = decode(ist, ist->dec_ctx, decoded_frame, got_output, pkt);
update_benchmark("decode_video %d.%d", ist->file_index, ist->st->index); update_benchmark("decode_video %d.%d", ist->file_index, ist->st->index);
if (ret < 0) if (ret < 0)
*decode_failed = 1; *decode_failed = 1;
@ -3017,6 +3057,12 @@ static int init_output_stream(OutputStream *ost, AVFrame *frame,
if (!av_dict_get(ost->encoder_opts, "threads", NULL, 0)) if (!av_dict_get(ost->encoder_opts, "threads", NULL, 0))
av_dict_set(&ost->encoder_opts, "threads", "auto", 0); av_dict_set(&ost->encoder_opts, "threads", "auto", 0);
if (codec->capabilities & AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE) {
ret = av_dict_set(&ost->encoder_opts, "flags", "+copy_opaque", AV_DICT_MULTIKEY);
if (ret < 0)
return ret;
}
ret = hw_device_setup_for_encode(ost); ret = hw_device_setup_for_encode(ost);
if (ret < 0) { if (ret < 0) {
snprintf(error, error_len, "Device setup failed for " snprintf(error, error_len, "Device setup failed for "

View File

@ -335,6 +335,8 @@ typedef struct InputStream {
#define DECODING_FOR_OST 1 #define DECODING_FOR_OST 1
#define DECODING_FOR_FILTER 2 #define DECODING_FOR_FILTER 2
int processing_needed; /* non zero if the packets must be processed */ int processing_needed; /* non zero if the packets must be processed */
// should attach FrameData as opaque_ref after decoding
int want_frame_data;
/** /**
* Codec parameters - to be used by the decoding/streamcopy code. * Codec parameters - to be used by the decoding/streamcopy code.
@ -493,9 +495,13 @@ enum EncStatsType {
ENC_STATS_FILE_IDX, ENC_STATS_FILE_IDX,
ENC_STATS_STREAM_IDX, ENC_STATS_STREAM_IDX,
ENC_STATS_FRAME_NUM, ENC_STATS_FRAME_NUM,
ENC_STATS_FRAME_NUM_IN,
ENC_STATS_TIMEBASE, ENC_STATS_TIMEBASE,
ENC_STATS_TIMEBASE_IN,
ENC_STATS_PTS, ENC_STATS_PTS,
ENC_STATS_PTS_TIME, ENC_STATS_PTS_TIME,
ENC_STATS_PTS_IN,
ENC_STATS_PTS_TIME_IN,
ENC_STATS_DTS, ENC_STATS_DTS,
ENC_STATS_DTS_TIME, ENC_STATS_DTS_TIME,
ENC_STATS_SAMPLE_NUM, ENC_STATS_SAMPLE_NUM,

View File

@ -269,13 +269,18 @@ static int enc_stats_init(OutputStream *ost, int pre,
const char *str; const char *str;
int pre_only:1; int pre_only:1;
int post_only:1; int post_only:1;
int need_input_data:1;
} fmt_specs[] = { } fmt_specs[] = {
{ ENC_STATS_FILE_IDX, "fidx" }, { ENC_STATS_FILE_IDX, "fidx" },
{ ENC_STATS_STREAM_IDX, "sidx" }, { ENC_STATS_STREAM_IDX, "sidx" },
{ ENC_STATS_FRAME_NUM, "n" }, { ENC_STATS_FRAME_NUM, "n" },
{ ENC_STATS_FRAME_NUM_IN, "ni", 0, 0, 1 },
{ ENC_STATS_TIMEBASE, "tb" }, { ENC_STATS_TIMEBASE, "tb" },
{ ENC_STATS_TIMEBASE_IN, "tbi", 0, 0, 1 },
{ ENC_STATS_PTS, "pts" }, { ENC_STATS_PTS, "pts" },
{ ENC_STATS_PTS_TIME, "t" }, { ENC_STATS_PTS_TIME, "t" },
{ ENC_STATS_PTS_IN, "ptsi", 0, 0, 1 },
{ ENC_STATS_PTS_TIME_IN, "ti", 0, 0, 1 },
{ ENC_STATS_DTS, "dts", 0, 1 }, { ENC_STATS_DTS, "dts", 0, 1 },
{ ENC_STATS_DTS_TIME, "dt", 0, 1 }, { ENC_STATS_DTS_TIME, "dt", 0, 1 },
{ ENC_STATS_SAMPLE_NUM, "sn", 1 }, { ENC_STATS_SAMPLE_NUM, "sn", 1 },
@ -345,6 +350,18 @@ static int enc_stats_init(OutputStream *ost, int pre,
} }
c->type = fmt_specs[i].type; c->type = fmt_specs[i].type;
if (fmt_specs[i].need_input_data) {
if (ost->ist)
ost->ist->want_frame_data = 1;
else {
av_log(ost, AV_LOG_WARNING,
"Format directive '%s' is unavailable, because "
"this output stream has no associated input stream\n",
val);
}
}
break; break;
} }
} }
@ -428,6 +445,7 @@ static OutputStream *new_output_stream(Muxer *mux, const OptionsContext *o,
ms->last_mux_dts = AV_NOPTS_VALUE; ms->last_mux_dts = AV_NOPTS_VALUE;
ost->st = st; ost->st = st;
ost->ist = ist;
ost->kf.ref_pts = AV_NOPTS_VALUE; ost->kf.ref_pts = AV_NOPTS_VALUE;
st->codecpar->codec_type = type; st->codecpar->codec_type = type;
@ -605,8 +623,7 @@ static OutputStream *new_output_stream(Muxer *mux, const OptionsContext *o,
if (ost->enc_ctx && av_get_exact_bits_per_sample(ost->enc_ctx->codec_id) == 24) if (ost->enc_ctx && av_get_exact_bits_per_sample(ost->enc_ctx->codec_id) == 24)
av_dict_set(&ost->swr_opts, "output_sample_bits", "24", 0); av_dict_set(&ost->swr_opts, "output_sample_bits", "24", 0);
if (ist) { if (ost->ist) {
ost->ist = ist;
ost->ist->discard = 0; ost->ist->discard = 0;
ost->ist->st->discard = ost->ist->user_set_discard; ost->ist->st->discard = ost->ist->user_set_discard;
} }