From 3c70b941d5d1f756cf4e141c3c7ee921478ec300 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt Date: Fri, 17 May 2019 00:30:16 +0200 Subject: [PATCH] avformat/matroskadec: Accept more unknown-length elements The current Matroska specifications mandate that only two elements may use an unknown-length length: Segments and clusters. But this was not always so: For the greater part of Matroska's existence, all master elements were allowed to make use of the unknown-length feature. And there were muxers creating such files: For several years libavformat's Matroska muxer used unknown-length for all master elements when the output wasn't seekable. This only stopped in March 2010 with 2529bb30. And even afterwards it was possible (albeit unlikely) for libavformat to create unknown-length master elements that are in violation of today's specifications, namely if the master element was so big that the seek backwards to update the size could no longer be performed inside the AVIOContext's write buffer. This has only been fixed in October 2016 (with the patches that introduced support for writing CRC-32 elements). Libavformat's Matroska demuxer meanwhile has never really supported unknown-length elements besides segments and clusters. Support for the latter was hardcoded. This commit changes this: Now all master elements for which a syntax to parse them is available are supported. This includes the files produced by old versions of libavformat's muxer. More precisely, master elements that have unknown length and are about to be parsed (not skipped) are supported; only a warning is emitted for them. For normal files, this means that level 1 elements after the clusters that are encountered after the clusters have been parsed (i.e. not because they are referenced by the seekhead at the beginning of the file) are still unsupported (they would be skipped at this point if their length were known). Signed-off-by: Andreas Rheinhardt --- libavformat/matroskadec.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c index 26a1d702a4..6eab076538 100644 --- a/libavformat/matroskadec.c +++ b/libavformat/matroskadec.c @@ -1281,15 +1281,20 @@ static int ebml_parse(MatroskaDemuxContext *matroska, av_log(matroska->ctx, AV_LOG_ERROR, "Unknown-sized element " "at 0x%"PRIx64" inside parent with finite size\n", pos); return AVERROR_INVALIDDATA; - } else if (id != MATROSKA_ID_CLUSTER) { - // According to the specifications only clusters and segments - // are allowed to be unknown-sized. - av_log(matroska->ctx, AV_LOG_ERROR, - "Found unknown-sized element other than a cluster at " - "0x%"PRIx64". Dropping the invalid element.\n", pos); - return AVERROR_INVALIDDATA; - } else + } else { level_check = 0; + if (id != MATROSKA_ID_CLUSTER && (syntax->type == EBML_LEVEL1 + || syntax->type == EBML_NEST)) { + // According to the current specifications only clusters and + // segments are allowed to be unknown-length. We also accept + // other unknown-length master elements. + av_log(matroska->ctx, AV_LOG_WARNING, + "Found unknown-length element 0x%"PRIX32" other than " + "a cluster at 0x%"PRIx64". Spec-incompliant, but " + "parsing will nevertheless be attempted.\n", id, pos); + update_pos = -1; + } + } } else level_check = 0; @@ -1355,7 +1360,7 @@ static int ebml_parse(MatroskaDemuxContext *matroska, } } - if (update_pos) { + if (update_pos > 0) { // We have found an element that is allowed at this place // in the hierarchy and it passed all checks, so treat the beginning // of the element as the "last known good" position.