avcodec/nvenc: rework on DTS generation

Before the patch, the method to generate DTS only works with timebase equal to 1/fps. With timebase like 1/1000 ./ffmpeg -i foo.mp4 -an -c:v h264_nvenc -enc_time_base 1/1000 bar.mp4 pts 0 dts -3 pts 160 dts 37 pts 80 dts 77 pts 40 dts 117 <-- invalid pts 120 dts 157 pts 320 dts 197 pts 240 dts 237 pts 200 dts 277 <-- invalid pts 280 dts 317 <-- invalid The generated DTS can be larger than PTS, since it only reorder the input PTS and minus the number of frame delay, which doesn't take timebase into account. It should minus the "time" of frame delay. 9a245bd trying to fix the issue, but the implementation is incomplete, which only use time_base.num. Then it got reverted by ac7c265b33. After this patch: pts 0 dts -120 pts 160 dts -80 pts 80 dts -40 pts 40 dts 0 pts 120 dts 40 pts 320 dts 80 pts 240 dts 120 pts 200 dts 160 pts 280 dts 200 Signed-off-by: Timo Rothenpieler <timo@rothenpieler.org>
2025-01-13 19:01:03 +00:00 · 2024-07-17 17:47:13 +08:00 · 2024-07-17 17:47:13 +08:00 · 670ff6c7ce
commit 670ff6c7ce
parent bcea693f75
2 changed files with 55 additions and 10 deletions
--- a/libavcodec/nvenc.c
+++ b/libavcodec/nvenc.c
@ -1893,7 +1893,8 @@ static av_cold int nvenc_setup_surfaces(AVCodecContext *avctx)
    if (!ctx->frame_data_array)
        return AVERROR(ENOMEM);

-    ctx->timestamp_list = av_fifo_alloc2(ctx->nb_surfaces, sizeof(int64_t), 0);
+    ctx->timestamp_list = av_fifo_alloc2(ctx->nb_surfaces + ctx->encode_config.frameIntervalP,
+                                         sizeof(int64_t), 0);
    if (!ctx->timestamp_list)
        return AVERROR(ENOMEM);

@ -2347,26 +2348,65 @@ static inline int64_t timestamp_queue_dequeue(AVFifo *queue)
    return timestamp;
 }

+static inline int64_t timestamp_queue_peek(AVFifo *queue, size_t index)
+{
+    int64_t timestamp = AV_NOPTS_VALUE;
+    av_fifo_peek(queue, &timestamp, 1, index);
+
+    return timestamp;
+}
+
 static int nvenc_set_timestamp(AVCodecContext *avctx,
                               NV_ENC_LOCK_BITSTREAM *params,
                               AVPacket *pkt)
 {
    NvencContext *ctx = avctx->priv_data;
+    int delay;
+    int64_t delay_time;

    pkt->pts = params->outputTimeStamp;

-    if (avctx->codec_descriptor->props & AV_CODEC_PROP_REORDER) {
-FF_DISABLE_DEPRECATION_WARNINGS
-        pkt->dts = timestamp_queue_dequeue(ctx->timestamp_list) -
-#if FF_API_TICKS_PER_FRAME
-            FFMAX(avctx->ticks_per_frame, 1) *
-#endif
-            FFMAX(ctx->encode_config.frameIntervalP - 1, 0);
-FF_ENABLE_DEPRECATION_WARNINGS
-    } else {
+    if (!(avctx->codec_descriptor->props & AV_CODEC_PROP_REORDER)) {
        pkt->dts = pkt->pts;
+        return 0;
    }

+    // This can be more than necessary, but we don't know the real reorder delay.
+    delay = FFMAX(ctx->encode_config.frameIntervalP - 1, 0);
+    if (ctx->output_frame_num >= delay) {
+        pkt->dts = timestamp_queue_dequeue(ctx->timestamp_list);
+        ctx->output_frame_num++;
+        return 0;
+    }
+
+    delay_time = ctx->initial_delay_time;
+    if (!delay_time) {
+        int64_t t1, t2, t3;
+        t1 = timestamp_queue_peek(ctx->timestamp_list, delay);
+        t2 = timestamp_queue_peek(ctx->timestamp_list, 0);
+        t3 = (delay > 1) ? timestamp_queue_peek(ctx->timestamp_list, 1) : t1;
+
+        if (t1 != AV_NOPTS_VALUE) {
+            delay_time = t1 - t2;
+        } else if (avctx->framerate.num > 0 && avctx->framerate.den > 0) {
+            delay_time = av_rescale_q(delay, (AVRational) {avctx->framerate.den, avctx->framerate.num},
+                                      avctx->time_base);
+        } else if (t3 != AV_NOPTS_VALUE) {
+            delay_time = delay * (t3 - t2);
+        } else {
+            delay_time = delay;
+        }
+        ctx->initial_delay_time = delay_time;
+    }
+
+    /* The following method is simple, but doesn't guarantee monotonic with VFR
+     * when delay_time isn't accurate (that is, t1 == AV_NOPTS_VALUE)
+     *
+     * dts = timestamp_queue_peek(ctx->timestamp_list, ctx->output_frame_num) - delay_time
+     */
+    pkt->dts = timestamp_queue_peek(ctx->timestamp_list, 0) - delay_time * (delay - ctx->output_frame_num) / delay;
+    ctx->output_frame_num++;
+
    return 0;
 }

@ -2902,4 +2942,6 @@ av_cold void ff_nvenc_encode_flush(AVCodecContext *avctx)

    nvenc_send_frame(avctx, NULL);
    av_fifo_reset2(ctx->timestamp_list);
+    ctx->output_frame_num = 0;
+    ctx->initial_delay_time = 0;
 }
--- a/libavcodec/nvenc.h
+++ b/libavcodec/nvenc.h
@ -206,6 +206,9 @@ typedef struct NvencContext
    AVFifo *output_surface_queue;
    AVFifo *output_surface_ready_queue;
    AVFifo *timestamp_list;
+    // This is for DTS calculating, reset after flush
+    int64_t output_frame_num;
+    int64_t initial_delay_time;

    NV_ENC_SEI_PAYLOAD *sei_data;
    int sei_data_size;