avcodec: add a bsf to reorder DTS into PTS

Starting with an h264 implementation. Can be extended to support other codecs.

A few caveats:
- OpenGOP streams are currently not supported. The firt packet must be an IDR
  frame.
- In some streams, a few frames at the end may not get a reordered PTS when
  they reference frames past EOS. The code added to derive timestamps from
  previous frames needs to extended.

Addresses ticket #502.

Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
James Almer 2022-08-15 14:46:09 -03:00
parent 8f4b000c37
commit b92e14147f
6 changed files with 540 additions and 1 deletions

View File

@ -16,6 +16,7 @@ version <next>:
- LAF demuxer
- APAC decoder and demuxer
- Media 100i decoders
- DTS to PTS reorder bsf
version 5.1:

1
configure vendored
View File

@ -3284,6 +3284,7 @@ aac_adtstoasc_bsf_select="adts_header mpeg4audio"
av1_frame_merge_bsf_select="cbs_av1"
av1_frame_split_bsf_select="cbs_av1"
av1_metadata_bsf_select="cbs_av1"
dts2pts_bsf_select="cbs_h264 h264parse"
eac3_core_bsf_select="ac3_parser"
filter_units_bsf_select="cbs"
h264_metadata_bsf_deps="const_nan"

View File

@ -1184,6 +1184,7 @@ OBJS-$(CONFIG_AV1_FRAME_SPLIT_BSF) += av1_frame_split_bsf.o
OBJS-$(CONFIG_CHOMP_BSF) += chomp_bsf.o
OBJS-$(CONFIG_DUMP_EXTRADATA_BSF) += dump_extradata_bsf.o
OBJS-$(CONFIG_DCA_CORE_BSF) += dca_core_bsf.o
OBJS-$(CONFIG_DTS2PTS_BSF) += dts2pts_bsf.o
OBJS-$(CONFIG_DV_ERROR_MARKER_BSF) += dv_error_marker_bsf.o
OBJS-$(CONFIG_EAC3_CORE_BSF) += eac3_core_bsf.o
OBJS-$(CONFIG_EXTRACT_EXTRADATA_BSF) += extract_extradata_bsf.o \

View File

@ -31,6 +31,7 @@ extern const FFBitStreamFilter ff_av1_metadata_bsf;
extern const FFBitStreamFilter ff_chomp_bsf;
extern const FFBitStreamFilter ff_dump_extradata_bsf;
extern const FFBitStreamFilter ff_dca_core_bsf;
extern const FFBitStreamFilter ff_dts2pts_bsf;
extern const FFBitStreamFilter ff_dv_error_marker_bsf;
extern const FFBitStreamFilter ff_eac3_core_bsf;
extern const FFBitStreamFilter ff_extract_extradata_bsf;

535
libavcodec/dts2pts_bsf.c Normal file
View File

@ -0,0 +1,535 @@
/*
* Copyright (c) 2022 James Almer
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* Derive PTS by reordering DTS from supported streams
*/
#include "libavutil/avassert.h"
#include "libavutil/fifo.h"
#include "libavutil/tree.h"
#include "bsf.h"
#include "bsf_internal.h"
#include "cbs.h"
#include "cbs_h264.h"
#include "h264_parse.h"
#include "h264_ps.h"
typedef struct DTS2PTSNode {
int64_t dts;
int64_t duration;
int poc;
int gop;
} DTS2PTSNode;
typedef struct DTS2PTSFrame {
AVPacket *pkt;
int poc;
int poc_diff;
int gop;
} DTS2PTSFrame;
typedef struct DTS2PTSH264Context {
H264POCContext poc;
SPS sps;
int poc_diff;
int last_poc;
int highest_poc;
int picture_structure;
} DTS2PTSH264Context;
typedef struct DTS2PTSContext {
struct AVTreeNode *root;
AVFifo *fifo;
// Codec specific function pointers and constants
int (*init)(AVBSFContext *ctx);
int (*filter)(AVBSFContext *ctx);
void (*flush)(AVBSFContext *ctx);
size_t fifo_size;
CodedBitstreamContext *cbc;
CodedBitstreamFragment au;
union {
DTS2PTSH264Context h264;
} u;
int nb_frame;
int gop;
int eof;
} DTS2PTSContext;
// AVTreeNode callbacks
static int cmp_insert(const void *key, const void *node)
{
int ret = ((const DTS2PTSNode *)key)->poc - ((const DTS2PTSNode *)node)->poc;
if (!ret)
ret = ((const DTS2PTSNode *)key)->gop - ((const DTS2PTSNode *)node)->gop;
return ret;
}
static int cmp_find(const void *key, const void *node)
{
int ret = ((const DTS2PTSFrame *)key)->poc - ((const DTS2PTSNode *) node)->poc;
if (!ret)
ret = ((const DTS2PTSFrame *)key)->gop - ((const DTS2PTSNode *) node)->gop;
return ret;
}
static int dec_poc(void *opaque, void *elem)
{
DTS2PTSNode *node = elem;
int dec = *(int *)opaque;
node->poc -= dec;
return 0;
}
static int free_node(void *opaque, void *elem)
{
DTS2PTSNode *node = elem;
av_free(node);
return 0;
}
// Shared functions
static int alloc_and_insert_node(AVBSFContext *ctx, int64_t ts, int64_t duration,
int poc, int poc_diff, int gop)
{
DTS2PTSContext *s = ctx->priv_data;
for (int i = 0; i < poc_diff; i++) {
struct AVTreeNode *node = av_tree_node_alloc();
DTS2PTSNode *poc_node, *ret;
if (!node)
return AVERROR(ENOMEM);
poc_node = av_malloc(sizeof(*poc_node));
if (!poc_node) {
av_free(node);
return AVERROR(ENOMEM);
}
if (i && ts != AV_NOPTS_VALUE)
ts += duration / poc_diff;
*poc_node = (DTS2PTSNode) { ts, duration, poc++, gop };
ret = av_tree_insert(&s->root, poc_node, cmp_insert, &node);
if (ret && ret != poc_node) {
*ret = *poc_node;
av_free(poc_node);
av_free(node);
}
}
return 0;
}
// H.264
static const CodedBitstreamUnitType h264_decompose_unit_types[] = {
H264_NAL_SPS,
H264_NAL_PPS,
H264_NAL_IDR_SLICE,
H264_NAL_SLICE,
};
static int h264_init(AVBSFContext *ctx)
{
DTS2PTSContext *s = ctx->priv_data;
DTS2PTSH264Context *h264 = &s->u.h264;
s->cbc->decompose_unit_types = h264_decompose_unit_types;
s->cbc->nb_decompose_unit_types = FF_ARRAY_ELEMS(h264_decompose_unit_types);
s->nb_frame = -(ctx->par_in->video_delay << 1);
h264->last_poc = h264->highest_poc = INT_MIN;
return 0;
}
static int get_mmco_reset(const H264RawSliceHeader *header)
{
if (header->nal_unit_header.nal_ref_idc == 0 ||
!header->adaptive_ref_pic_marking_mode_flag)
return 0;
for (int i = 0; i < H264_MAX_MMCO_COUNT; i++) {
if (header->mmco[i].memory_management_control_operation == 0)
return 0;
else if (header->mmco[i].memory_management_control_operation == 5)
return 1;
}
return 0;
}
static int h264_queue_frame(AVBSFContext *ctx, AVPacket *pkt, int poc, int *queued)
{
DTS2PTSContext *s = ctx->priv_data;
DTS2PTSH264Context *h264 = &s->u.h264;
DTS2PTSFrame frame;
int poc_diff, ret;
poc_diff = (h264->picture_structure == 3) + 1;
if (h264->sps.frame_mbs_only_flag && h264->poc_diff)
poc_diff = FFMIN(poc_diff, h264->poc_diff);
if (poc < 0) {
av_tree_enumerate(s->root, &poc_diff, NULL, dec_poc);
s->nb_frame -= poc_diff;
}
// Check if there was a POC reset (Like an IDR slice)
if (s->nb_frame > h264->highest_poc) {
s->nb_frame = 0;
s->gop = (s->gop + 1) % s->fifo_size;
h264->highest_poc = h264->last_poc;
}
ret = alloc_and_insert_node(ctx, pkt->dts, pkt->duration, s->nb_frame, poc_diff, s->gop);
if (ret < 0)
return ret;
av_log(ctx, AV_LOG_DEBUG, "Queueing frame with POC %d, GOP %d, dts %"PRId64"\n",
poc, s->gop, pkt->dts);
s->nb_frame += poc_diff;
// Add frame to output FIFO only once
if (*queued)
return 0;
frame = (DTS2PTSFrame) { pkt, poc, poc_diff, s->gop };
ret = av_fifo_write(s->fifo, &frame, 1);
av_assert2(ret >= 0);
*queued = 1;
return 0;
}
static int h264_filter(AVBSFContext *ctx)
{
DTS2PTSContext *s = ctx->priv_data;
DTS2PTSH264Context *h264 = &s->u.h264;
CodedBitstreamFragment *au = &s->au;
AVPacket *in;
int output_picture_number = INT_MIN;
int field_poc[2];
int queued = 0, ret;
ret = ff_bsf_get_packet(ctx, &in);
if (ret < 0)
return ret;
ret = ff_cbs_read_packet(s->cbc, au, in);
if (ret < 0) {
av_log(ctx, AV_LOG_WARNING, "Failed to parse access unit.\n");
goto fail;
}
for (int i = 0; i < au->nb_units; i++) {
CodedBitstreamUnit *unit = &au->units[i];
switch (unit->type) {
case H264_NAL_IDR_SLICE:
h264->poc.prev_frame_num = 0;
h264->poc.prev_frame_num_offset = 0;
h264->poc.prev_poc_msb =
h264->poc.prev_poc_lsb = 0;
// fall-through
case H264_NAL_SLICE: {
const H264RawSlice *slice = unit->content;
const H264RawSliceHeader *header = &slice->header;
const CodedBitstreamH264Context *cbs_h264 = s->cbc->priv_data;
const H264RawSPS *sps = cbs_h264->active_sps;
int got_reset;
if (!sps) {
av_log(ctx, AV_LOG_ERROR, "No active SPS for a slice\n");
goto fail;
}
// Initialize the SPS struct with the fields ff_h264_init_poc() cares about
h264->sps.frame_mbs_only_flag = sps->frame_mbs_only_flag;
h264->sps.log2_max_frame_num = sps->log2_max_frame_num_minus4 + 4;
h264->sps.poc_type = sps->pic_order_cnt_type;
h264->sps.log2_max_poc_lsb = sps->log2_max_pic_order_cnt_lsb_minus4 + 4;
h264->sps.offset_for_non_ref_pic = sps->offset_for_non_ref_pic;
h264->sps.offset_for_top_to_bottom_field = sps->offset_for_top_to_bottom_field;
h264->sps.poc_cycle_length = sps->num_ref_frames_in_pic_order_cnt_cycle;
for (int i = 0; i < h264->sps.poc_cycle_length; i++)
h264->sps.offset_for_ref_frame[i] = sps->offset_for_ref_frame[i];
h264->picture_structure = sps->frame_mbs_only_flag ? 3 :
(header->field_pic_flag ?
header->field_pic_flag + header->bottom_field_flag : 3);
h264->poc.frame_num = header->frame_num;
h264->poc.poc_lsb = header->pic_order_cnt_lsb;
h264->poc.delta_poc_bottom = header->delta_pic_order_cnt_bottom;
h264->poc.delta_poc[0] = header->delta_pic_order_cnt[0];
h264->poc.delta_poc[1] = header->delta_pic_order_cnt[1];
field_poc[0] = field_poc[1] = INT_MAX;
ret = ff_h264_init_poc(field_poc, &output_picture_number, &h264->sps,
&h264->poc, h264->picture_structure,
header->nal_unit_header.nal_ref_idc);
if (ret < 0) {
av_log(ctx, AV_LOG_ERROR, "ff_h264_init_poc() failure\n");
goto fail;
}
got_reset = get_mmco_reset(header);
h264->poc.prev_frame_num = got_reset ? 0 : h264->poc.frame_num;
h264->poc.prev_frame_num_offset = got_reset ? 0 : h264->poc.frame_num_offset;
if (header->nal_unit_header.nal_ref_idc != 0) {
h264->poc.prev_poc_msb = got_reset ? 0 : h264->poc.poc_msb;
if (got_reset)
h264->poc.prev_poc_lsb = h264->picture_structure == 2 ? 0 : field_poc[0];
else
h264->poc.prev_poc_lsb = h264->poc.poc_lsb;
}
if (output_picture_number != h264->last_poc) {
if (h264->last_poc != INT_MIN) {
int diff = FFABS(h264->last_poc - output_picture_number);
if ((output_picture_number < 0) && !h264->last_poc)
h264->poc_diff = 0;
else if (FFABS(output_picture_number) < h264->poc_diff) {
diff = FFABS(output_picture_number);
h264->poc_diff = 0;
}
if (!h264->poc_diff || (h264->poc_diff > diff)) {
h264->poc_diff = diff;
if (h264->poc_diff == 1 && h264->sps.frame_mbs_only_flag) {
av_tree_enumerate(s->root, &h264->poc_diff, NULL, dec_poc);
s->nb_frame -= 2;
}
}
}
h264->last_poc = output_picture_number;
h264->highest_poc = FFMAX(h264->highest_poc, output_picture_number);
ret = h264_queue_frame(ctx, in, output_picture_number, &queued);
if (ret < 0)
goto fail;
}
break;
}
default:
break;
}
}
if (output_picture_number == INT_MIN) {
av_log(ctx, AV_LOG_ERROR, "No slices in access unit\n");
ret = AVERROR_INVALIDDATA;
goto fail;
}
ret = 0;
fail:
ff_cbs_fragment_reset(au);
if (!queued)
av_packet_free(&in);
return ret;
}
static void h264_flush(AVBSFContext *ctx)
{
DTS2PTSContext *s = ctx->priv_data;
DTS2PTSH264Context *h264 = &s->u.h264;
memset(&h264->sps, 0, sizeof(h264->sps));
memset(&h264->poc, 0, sizeof(h264->poc));
s->nb_frame = -(ctx->par_in->video_delay << 1);
h264->last_poc = h264->highest_poc = INT_MIN;
}
// Core functions
static const struct {
enum AVCodecID id;
int (*init)(AVBSFContext *ctx);
int (*filter)(AVBSFContext *ctx);
void (*flush)(AVBSFContext *ctx);
size_t fifo_size;
} func_tab[] = {
{ AV_CODEC_ID_H264, h264_init, h264_filter, h264_flush, H264_MAX_DPB_FRAMES * 2 * 2 },
};
static int dts2pts_init(AVBSFContext *ctx)
{
DTS2PTSContext *s = ctx->priv_data;
CodedBitstreamFragment *au = &s->au;
int i, ret;
for (i = 0; i < FF_ARRAY_ELEMS(func_tab); i++) {
if (func_tab[i].id == ctx->par_in->codec_id) {
s->init = func_tab[i].init;
s->filter = func_tab[i].filter;
s->flush = func_tab[i].flush;
s->fifo_size = func_tab[i].fifo_size;
break;
}
}
if (i == FF_ARRAY_ELEMS(func_tab))
return AVERROR_BUG;
av_assert0(s->filter && s->fifo_size);
s->fifo = av_fifo_alloc2(s->fifo_size, sizeof(DTS2PTSFrame), 0);
if (!s->fifo)
return AVERROR(ENOMEM);
ret = ff_cbs_init(&s->cbc, ctx->par_in->codec_id, ctx);
if (ret < 0)
return ret;
if (s->init) {
ret = s->init(ctx);
if (ret < 0)
return ret;
}
if (!ctx->par_in->extradata_size)
return 0;
ret = ff_cbs_read_extradata(s->cbc, au, ctx->par_in);
if (ret < 0)
av_log(ctx, AV_LOG_WARNING, "Failed to parse extradata.\n");
ff_cbs_fragment_reset(au);
return 0;
}
static int dts2pts_filter(AVBSFContext *ctx, AVPacket *out)
{
DTS2PTSContext *s = ctx->priv_data;
DTS2PTSNode *poc_node = NULL, *next[2] = { NULL, NULL };
DTS2PTSFrame frame;
int ret;
// Fill up the FIFO and POC tree
while (!s->eof && av_fifo_can_write(s->fifo)) {
ret = s->filter(ctx);
if (ret < 0) {
if (ret != AVERROR_EOF)
return ret;
s->eof = 1;
}
}
if (!av_fifo_can_read(s->fifo))
return AVERROR_EOF;
// Fetch a packet from the FIFO
ret = av_fifo_read(s->fifo, &frame, 1);
av_assert2(ret >= 0);
av_packet_move_ref(out, frame.pkt);
av_packet_free(&frame.pkt);
// Search the timestamp for the requested POC and set PTS
poc_node = av_tree_find(s->root, &frame, cmp_find, (void **)next);
if (!poc_node) {
poc_node = next[1];
if (!poc_node || poc_node->poc != frame.poc)
poc_node = next[0];
}
if (poc_node && poc_node->poc == frame.poc) {
out->pts = poc_node->dts;
if (!s->eof) {
// Remove the found entry from the tree
DTS2PTSFrame dup = (DTS2PTSFrame) { NULL, frame.poc + 1, frame.poc_diff, frame.gop };
for (; dup.poc_diff > 0; dup.poc++, dup.poc_diff--) {
struct AVTreeNode *node = NULL;
if (!poc_node || poc_node->dts != out->pts)
continue;
av_tree_insert(&s->root, poc_node, cmp_insert, &node);
av_free(poc_node);
av_free(node);
poc_node = av_tree_find(s->root, &dup, cmp_find, NULL);
}
}
} else {
DTS2PTSFrame dup = (DTS2PTSFrame) { NULL, frame.poc - 1, frame.poc_diff, frame.gop };
if (s->eof && (poc_node = av_tree_find(s->root, &dup, cmp_find, NULL)) && poc_node->poc == dup.poc) {
out->pts = poc_node->dts;
if (out->pts != AV_NOPTS_VALUE)
out->pts += poc_node->duration;
ret = alloc_and_insert_node(ctx, out->pts, out->duration,
frame.poc, frame.poc_diff, frame.gop);
if (ret < 0) {
av_packet_unref(out);
return ret;
}
if (!ret)
av_log(ctx, AV_LOG_DEBUG, "Queueing frame for POC %d, GOP %d, dts %"PRId64", "
"generated from POC %d, GOP %d, dts %"PRId64", duration %"PRId64"\n",
frame.poc, frame.gop, out->pts,
poc_node->poc, poc_node->gop, poc_node->dts, poc_node->duration);
} else
av_log(ctx, AV_LOG_WARNING, "No timestamp for POC %d in tree\n", frame.poc);
}
av_log(ctx, AV_LOG_DEBUG, "Returning frame for POC %d, GOP %d, dts %"PRId64", pts %"PRId64"\n",
frame.poc, frame.gop, out->dts, out->pts);
return 0;
}
static void dts2pts_flush(AVBSFContext *ctx)
{
DTS2PTSContext *s = ctx->priv_data;
DTS2PTSFrame frame;
if (s->flush)
s->flush(ctx);
s->eof = 0;
s->gop = 0;
while (s->fifo && av_fifo_read(s->fifo, &frame, 1) >= 0)
av_packet_free(&frame.pkt);
av_tree_enumerate(s->root, NULL, NULL, free_node);
av_tree_destroy(s->root);
s->root = NULL;
ff_cbs_fragment_reset(&s->au);
ff_cbs_flush(s->cbc);
}
static void dts2pts_close(AVBSFContext *ctx)
{
DTS2PTSContext *s = ctx->priv_data;
dts2pts_flush(ctx);
av_fifo_freep2(&s->fifo);
ff_cbs_fragment_free(&s->au);
ff_cbs_close(&s->cbc);
}
static const enum AVCodecID dts2pts_codec_ids[] = {
AV_CODEC_ID_H264,
AV_CODEC_ID_NONE,
};
const FFBitStreamFilter ff_dts2pts_bsf = {
.p.name = "dts2pts",
.p.codec_ids = dts2pts_codec_ids,
.priv_data_size = sizeof(DTS2PTSContext),
.init = dts2pts_init,
.flush = dts2pts_flush,
.close = dts2pts_close,
.filter = dts2pts_filter,
};

View File

@ -29,7 +29,7 @@
#include "version_major.h"
#define LIBAVCODEC_VERSION_MINOR 49
#define LIBAVCODEC_VERSION_MINOR 50
#define LIBAVCODEC_VERSION_MICRO 100
#define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \