avformat/spdifenc: fix handling of large TrueHD frames

The TrueHD IEC 61937 encapsulation code uses a very naive method of always inserting 24 TrueHD frames evenly in a MAT frame. This does not work for larger frames as they may exceed the size of 1/24th of a MAT frame. To fix that, use the input_timing field in the TrueHD frame to determine the proper position of the TrueHD frame in the MAT frame. That field is basically a dts field, telling the time to feed this frame to the decoder in sample count units. This can cause a TrueHD frame to be split between two MAT frames, so a second concatenation hd_buf is added, alternating with the first buffer. Large frames are preceded by smaller frames that have input_timing values that cause the frames to be sent out faster than the nominal rate (i.e. increasing decoder latency, long decoder buffer), allowing the larger frames to then be sent out slower than the nominal rate as the decoder has enough data buffered to keep it busy.
2020-02-19 22:46:35 +02:00 · 2020-02-19 22:46:35 +02:00 · 36e156bef0
parent 1d5338e450
commit 36e156bef0
1 changed files with 166 additions and 45 deletions
--- a/libavformat/spdifenc.c
+++ b/libavformat/spdifenc.c
@ -1,7 +1,7 @@
 /*
 * IEC 61937 muxer
 * Copyright (c) 2009 Bartlomiej Wolowiec
- * Copyright (c) 2010 Anssi Hannula
+ * Copyright (c) 2010, 2020 Anssi Hannula
 * Copyright (c) 2010 Carl Eugen Hoyos
 *
 * This file is part of FFmpeg.
@ -69,13 +69,18 @@ typedef struct IEC61937Context {
    int use_preamble;               ///< preamble enabled (disabled for exactly pre-padded DTS)
    int extra_bswap;                ///< extra bswap for payload (for LE DTS => standard BE DTS)
-    uint8_t *hd_buf[1];             ///< allocated buffer to concatenate hd audio frames
+    uint8_t *hd_buf[2];             ///< allocated buffers to concatenate hd audio frames
    int hd_buf_size;                ///< size of the hd audio buffer (eac3, dts4)
-    int hd_buf_count;               ///< number of frames in the hd audio buffer (eac3, truehd)
+    int hd_buf_count;               ///< number of frames in the hd audio buffer (eac3)
-    int hd_buf_filled;              ///< amount of bytes in the hd audio buffer (eac3)
+    int hd_buf_filled;              ///< amount of bytes in the hd audio buffer (eac3, truehd)
    int hd_buf_idx;                 ///< active hd buffer index (truehd)
    int dtshd_skip;                 ///< counter used for skipping DTS-HD frames
    uint16_t truehd_prev_time;      ///< input_timing from the last frame
    int truehd_prev_size;           ///< previous frame size in bytes, including any MAT codes
    int truehd_samples_per_frame;   ///< samples per frame for padding calculation
    /* AVOptions: */
    int dtshd_rate;
    int dtshd_fallback;
@ -384,62 +389,175 @@ static int spdif_header_aac(AVFormatContext *s, AVPacket *pkt)
 /*
 * It seems Dolby TrueHD frames have to be encapsulated in MAT frames before
 * they can be encapsulated in IEC 61937.
 * Here we encapsulate 24 TrueHD frames in a single MAT frame, padding them
 * to achieve constant rate.
 * The actual format of a MAT frame is unknown, but the below seems to work.
 * However, it seems it is not actually necessary for the 24 TrueHD frames to
 * be in an exact alignment with the MAT frame.
 */
 #define MAT_PKT_OFFSET          61440
 #define MAT_FRAME_SIZE          61424
-#define TRUEHD_FRAME_OFFSET     2560
+
-#define MAT_MIDDLE_CODE_OFFSET  -4
+static const uint8_t mat_start_code[20] = {
    0x07, 0x9E, 0x00, 0x03, 0x84, 0x01, 0x01, 0x01, 0x80, 0x00, 0x56, 0xA5, 0x3B, 0xF4, 0x81, 0x83,
    0x49, 0x80, 0x77, 0xE0,
 };
 static const uint8_t mat_middle_code[12] = {
    0xC3, 0xC1, 0x42, 0x49, 0x3B, 0xFA, 0x82, 0x83, 0x49, 0x80, 0x77, 0xE0,
 };
 static const uint8_t mat_end_code[16] = {
    0xC3, 0xC2, 0xC0, 0xC4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x97, 0x11,
 };
 #define MAT_CODE(position, data) { .pos = position, .code = data, .len = sizeof(data) }
 static const struct {
    unsigned int pos;
    const uint8_t *code;
    unsigned int len;
 } mat_codes[] = {
    MAT_CODE(0, mat_start_code),
    MAT_CODE(30708, mat_middle_code),
    MAT_CODE(MAT_FRAME_SIZE - sizeof(mat_end_code), mat_end_code),
 };
 static int spdif_header_truehd(AVFormatContext *s, AVPacket *pkt)
 {
    IEC61937Context *ctx = s->priv_data;
-    int mat_code_length = 0;
+    uint8_t *hd_buf = ctx->hd_buf[ctx->hd_buf_idx];
-    static const char mat_end_code[16] = { 0xC3, 0xC2, 0xC0, 0xC4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x97, 0x11 };
+    int ratebits;
    int padding_remaining = 0;
    uint16_t input_timing;
    int total_frame_size = pkt->size;
    const uint8_t *dataptr = pkt->data;
    int data_remaining = pkt->size;
    int have_pkt = 0;
    int next_code_idx;
-    if (!ctx->hd_buf_count) {
+    if (pkt->size < 10)
-        static const char mat_start_code[20] = { 0x07, 0x9E, 0x00, 0x03, 0x84, 0x01, 0x01, 0x01, 0x80, 0x00, 0x56, 0xA5, 0x3B, 0xF4, 0x81, 0x83, 0x49, 0x80, 0x77, 0xE0 };
+        return AVERROR_INVALIDDATA;
        mat_code_length = sizeof(mat_start_code) + BURST_HEADER_SIZE;
        memcpy(ctx->hd_buf[0], mat_start_code, sizeof(mat_start_code));
-    } else if (ctx->hd_buf_count == 12) {
+    if (AV_RB24(pkt->data + 4) == 0xf8726f) {
-        static const char mat_middle_code[12] = { 0xC3, 0xC1, 0x42, 0x49, 0x3B, 0xFA, 0x82, 0x83, 0x49, 0x80, 0x77, 0xE0 };
+        /* major sync unit, fetch sample rate */
-        mat_code_length = sizeof(mat_middle_code) + MAT_MIDDLE_CODE_OFFSET;
+        if (pkt->data[7] == 0xba)
-        memcpy(&ctx->hd_buf[0][12 * TRUEHD_FRAME_OFFSET - BURST_HEADER_SIZE + MAT_MIDDLE_CODE_OFFSET],
+            ratebits = pkt->data[8] >> 8;
-               mat_middle_code, sizeof(mat_middle_code));
+        else if (pkt->data[7] == 0xbb)
            ratebits = pkt->data[9] >> 8;
        else
            return AVERROR_INVALIDDATA;
        ctx->truehd_samples_per_frame = 40 << (ratebits & 3);
        av_log(s, AV_LOG_TRACE, "TrueHD samples per frame: %d\n",
               ctx->truehd_samples_per_frame);
    }
-    if (pkt->size > TRUEHD_FRAME_OFFSET - mat_code_length) {
+    if (!ctx->truehd_samples_per_frame)
-        /* if such frames exist, we'd need some more complex logic to
+        return AVERROR_INVALIDDATA;
-         * distribute the TrueHD frames in the MAT frame */
+
-        avpriv_request_sample(s, "Too large TrueHD frame of %d bytes",
+    input_timing = AV_RB16(pkt->data + 2);
-                              pkt->size);
+    if (ctx->truehd_prev_size) {
-        return AVERROR_PATCHWELCOME;
+        uint16_t delta_samples = input_timing - ctx->truehd_prev_time;
        /*
         * One multiple-of-48kHz frame is 1/1200 sec and the IEC 61937 rate
         * is 768kHz = 768000*4 bytes/sec.
         * The nominal space per frame is therefore
         * (768000*4 bytes/sec) * (1/1200 sec) = 2560 bytes.
         * For multiple-of-44.1kHz frames: 1/1102.5 sec, 705.6kHz, 2560 bytes.
         *
         * 2560 is divisible by truehd_samples_per_frame.
         */
        int delta_bytes = delta_samples * 2560 / ctx->truehd_samples_per_frame;
        /* padding needed before this frame */
        padding_remaining = delta_bytes - ctx->truehd_prev_size;
        av_log(s, AV_LOG_TRACE, "delta_samples: %"PRIu16", delta_bytes: %d\n",
               delta_samples, delta_bytes);
        /* sanity check */
        if (padding_remaining < 0 || padding_remaining >= MAT_FRAME_SIZE / 2) {
            avpriv_request_sample(s, "Unusual frame timing: %"PRIu16" => %"PRIu16", %d samples/frame",
                                  ctx->truehd_prev_time, input_timing, ctx->truehd_samples_per_frame);
            padding_remaining = 0;
        }
    }
-    memcpy(&ctx->hd_buf[0][ctx->hd_buf_count * TRUEHD_FRAME_OFFSET - BURST_HEADER_SIZE + mat_code_length],
+    for (next_code_idx = 0; next_code_idx < FF_ARRAY_ELEMS(mat_codes); next_code_idx++)
-           pkt->data, pkt->size);
+        if (ctx->hd_buf_filled <= mat_codes[next_code_idx].pos)
-    if (ctx->hd_buf_count < 23) {
+            break;
-        memset(&ctx->hd_buf[0][ctx->hd_buf_count * TRUEHD_FRAME_OFFSET - BURST_HEADER_SIZE + mat_code_length + pkt->size],
+
-               0, TRUEHD_FRAME_OFFSET - pkt->size - mat_code_length);
+    if (next_code_idx >= FF_ARRAY_ELEMS(mat_codes))
-    } else {
+        return AVERROR_BUG;
-        size_t padding = MAT_FRAME_SIZE - (ctx->hd_buf_count * TRUEHD_FRAME_OFFSET - BURST_HEADER_SIZE + pkt->size);
+
-        memset(&ctx->hd_buf[0][MAT_FRAME_SIZE - padding], 0, padding);
+    while (padding_remaining || data_remaining ||
           mat_codes[next_code_idx].pos == ctx->hd_buf_filled) {
        if (mat_codes[next_code_idx].pos == ctx->hd_buf_filled) {
            /* time to insert MAT code */
            int code_len = mat_codes[next_code_idx].len;
            int code_len_remaining = code_len;
            memcpy(hd_buf + mat_codes[next_code_idx].pos,
                   mat_codes[next_code_idx].code, code_len);
            ctx->hd_buf_filled += code_len;
            next_code_idx++;
            if (next_code_idx == FF_ARRAY_ELEMS(mat_codes)) {
                next_code_idx = 0;
                /* this was the last code, move to the next MAT frame */
                have_pkt = 1;
                ctx->out_buf = hd_buf;
                ctx->hd_buf_idx ^= 1;
                hd_buf = ctx->hd_buf[ctx->hd_buf_idx];
                ctx->hd_buf_filled = 0;
                /* inter-frame gap has to be counted as well, add it */
                code_len_remaining += MAT_PKT_OFFSET - MAT_FRAME_SIZE;
            }
            if (padding_remaining) {
                /* consider the MAT code as padding */
                int counted_as_padding = FFMIN(padding_remaining,
                                               code_len_remaining);
                padding_remaining -= counted_as_padding;
                code_len_remaining -= counted_as_padding;
            }
            /* count the remainder of the code as part of frame size */
            if (code_len_remaining)
                total_frame_size += code_len_remaining;
        }
        if (padding_remaining) {
            int padding_to_insert = FFMIN(mat_codes[next_code_idx].pos - ctx->hd_buf_filled,
                                          padding_remaining);
            memset(hd_buf + ctx->hd_buf_filled, 0, padding_to_insert);
            ctx->hd_buf_filled += padding_to_insert;
            padding_remaining -= padding_to_insert;
            if (padding_remaining)
                continue; /* time to insert MAT code */
        }
        if (data_remaining) {
            int data_to_insert = FFMIN(mat_codes[next_code_idx].pos - ctx->hd_buf_filled,
                                       data_remaining);
            memcpy(hd_buf + ctx->hd_buf_filled, dataptr, data_to_insert);
            ctx->hd_buf_filled += data_to_insert;
            dataptr += data_to_insert;
            data_remaining -= data_to_insert;
        }
    }
-    if (++ctx->hd_buf_count < 24){
+    ctx->truehd_prev_size = total_frame_size;
    ctx->truehd_prev_time = input_timing;
    av_log(s, AV_LOG_TRACE, "TrueHD frame inserted, total size %d, buffer position %d\n",
           total_frame_size, ctx->hd_buf_filled);
    if (!have_pkt) {
        ctx->pkt_offset = 0;
        return 0;
    }
    memcpy(&ctx->hd_buf[0][MAT_FRAME_SIZE - sizeof(mat_end_code)], mat_end_code, sizeof(mat_end_code));
    ctx->hd_buf_count = 0;
    ctx->data_type   = IEC61937_TRUEHD;
-    ctx->pkt_offset  = 61440;
+    ctx->pkt_offset  = MAT_PKT_OFFSET;
    ctx->out_buf     = ctx->hd_buf[0];
    ctx->out_bytes   = MAT_FRAME_SIZE;
    ctx->length_code = MAT_FRAME_SIZE;
    return 0;
@ -470,9 +588,11 @@ static int spdif_write_header(AVFormatContext *s)
    case AV_CODEC_ID_TRUEHD:
    case AV_CODEC_ID_MLP:
        ctx->header_info = spdif_header_truehd;
-        ctx->hd_buf[0] = av_malloc(MAT_FRAME_SIZE);
+        for (int i = 0; i < FF_ARRAY_ELEMS(ctx->hd_buf); i++) {
-        if (!ctx->hd_buf[0])
+            ctx->hd_buf[i] = av_malloc(MAT_FRAME_SIZE);
-            return AVERROR(ENOMEM);
+            if (!ctx->hd_buf[i])
                return AVERROR(ENOMEM);
        }
        break;
    default:
        avpriv_report_missing_feature(s, "Codec %d",
@ -486,7 +606,8 @@ static void spdif_deinit(AVFormatContext *s)
 {
    IEC61937Context *ctx = s->priv_data;
    av_freep(&ctx->buffer);
-    av_freep(&ctx->hd_buf[0]);
+    for (int i = 0; i < FF_ARRAY_ELEMS(ctx->hd_buf); i++)
        av_freep(&ctx->hd_buf[i]);
 }
 static av_always_inline void spdif_put_16(IEC61937Context *ctx,