diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c index f87d7d8f0e..84c6e44eb7 100644 --- a/libavformat/matroskadec.c +++ b/libavformat/matroskadec.c @@ -71,6 +71,7 @@ typedef enum { EBML_UTF8, EBML_BIN, EBML_NEST, + EBML_LEVEL1, EBML_PASS, EBML_STOP, EBML_SINT, @@ -252,6 +253,12 @@ typedef struct { EbmlList blocks; } MatroskaCluster; +typedef struct { + uint64_t id; + uint64_t pos; + int parsed; +} MatroskaLevel1Element; + typedef struct { AVFormatContext *ctx; @@ -290,6 +297,10 @@ typedef struct { /* File has a CUES element, but we defer parsing until it is needed. */ int cues_parsing_deferred; + /* Level1 elements and whether they were read yet */ + MatroskaLevel1Element level1_elems[64]; + int num_level1_elems; + int current_cluster_num_blocks; int64_t current_cluster_pos; MatroskaCluster current_cluster; @@ -551,13 +562,13 @@ static EbmlSyntax matroska_seekhead[] = { }; static EbmlSyntax matroska_segment[] = { - { MATROSKA_ID_INFO, EBML_NEST, 0, 0, { .n = matroska_info } }, - { MATROSKA_ID_TRACKS, EBML_NEST, 0, 0, { .n = matroska_tracks } }, - { MATROSKA_ID_ATTACHMENTS, EBML_NEST, 0, 0, { .n = matroska_attachments } }, - { MATROSKA_ID_CHAPTERS, EBML_NEST, 0, 0, { .n = matroska_chapters } }, - { MATROSKA_ID_CUES, EBML_NEST, 0, 0, { .n = matroska_index } }, - { MATROSKA_ID_TAGS, EBML_NEST, 0, 0, { .n = matroska_tags } }, - { MATROSKA_ID_SEEKHEAD, EBML_NEST, 0, 0, { .n = matroska_seekhead } }, + { MATROSKA_ID_INFO, EBML_LEVEL1, 0, 0, { .n = matroska_info } }, + { MATROSKA_ID_TRACKS, EBML_LEVEL1, 0, 0, { .n = matroska_tracks } }, + { MATROSKA_ID_ATTACHMENTS, EBML_LEVEL1, 0, 0, { .n = matroska_attachments } }, + { MATROSKA_ID_CHAPTERS, EBML_LEVEL1, 0, 0, { .n = matroska_chapters } }, + { MATROSKA_ID_CUES, EBML_LEVEL1, 0, 0, { .n = matroska_index } }, + { MATROSKA_ID_TAGS, EBML_LEVEL1, 0, 0, { .n = matroska_tags } }, + { MATROSKA_ID_SEEKHEAD, EBML_LEVEL1, 0, 0, { .n = matroska_seekhead } }, { MATROSKA_ID_CLUSTER, EBML_STOP }, { 0 } }; @@ -976,6 +987,42 @@ static int ebml_parse_nest(MatroskaDemuxContext *matroska, EbmlSyntax *syntax, return res; } +/* + * Allocate and return the entry for the level1 element with the given ID. If + * an entry already exists, return the existing entry. + */ +static MatroskaLevel1Element *matroska_find_level1_elem(MatroskaDemuxContext *matroska, + uint32_t id) +{ + int i; + MatroskaLevel1Element *elem; + + // Some files link to all clusters; useless. + if (id == MATROSKA_ID_CLUSTER) + return NULL; + + // There can be multiple seekheads. + if (id != MATROSKA_ID_SEEKHEAD) { + for (i = 0; i < matroska->num_level1_elems; i++) { + if (matroska->level1_elems[i].id == id) + return &matroska->level1_elems[i]; + } + } + + // Only a completely broken file would have more elements. + // It also provides a low-effort way to escape from circular seekheads + // (every iteration will add a level1 entry). + if (matroska->num_level1_elems >= FF_ARRAY_ELEMS(matroska->level1_elems)) { + av_log(matroska->ctx, AV_LOG_ERROR, "Too many level1 elements or circular seekheads.\n"); + return NULL; + } + + elem = &matroska->level1_elems[matroska->num_level1_elems++]; + *elem = (MatroskaLevel1Element){.id = id}; + + return elem; +} + static int ebml_parse_elem(MatroskaDemuxContext *matroska, EbmlSyntax *syntax, void *data) { @@ -994,6 +1041,7 @@ static int ebml_parse_elem(MatroskaDemuxContext *matroska, uint64_t length; int res; void *newelem; + MatroskaLevel1Element *level1_elem; data = (char *) data + syntax->data_offset; if (syntax->list_elem_size) { @@ -1036,11 +1084,20 @@ static int ebml_parse_elem(MatroskaDemuxContext *matroska, case EBML_BIN: res = ebml_read_binary(pb, length, data); break; + case EBML_LEVEL1: case EBML_NEST: if ((res = ebml_read_master(matroska, length)) < 0) return res; if (id == MATROSKA_ID_SEGMENT) matroska->segment_start = avio_tell(matroska->ctx->pb); + if (id == MATROSKA_ID_CUES) + matroska->cues_parsing_deferred = 0; + if (syntax->type == EBML_LEVEL1 && + (level1_elem = matroska_find_level1_elem(matroska, syntax->id))) { + if (level1_elem->parsed) + av_log(matroska->ctx, AV_LOG_ERROR, "Duplicate element\n"); + level1_elem->parsed = 1; + } return ebml_parse_nest(matroska, syntax->def.n, data); case EBML_PASS: return ebml_parse_id(matroska, syntax->def.n, id, data); @@ -1071,6 +1128,7 @@ static void ebml_free(EbmlSyntax *syntax, void *data) case EBML_BIN: av_freep(&((EbmlBin *) data_off)->data); break; + case EBML_LEVEL1: case EBML_NEST: if (syntax[i].list_elem_size) { EbmlList *list = data_off; @@ -1356,24 +1414,17 @@ static void matroska_convert_tags(AVFormatContext *s) } static int matroska_parse_seekhead_entry(MatroskaDemuxContext *matroska, - int idx) + uint64_t pos) { - EbmlList *seekhead_list = &matroska->seekhead; uint32_t level_up = matroska->level_up; uint32_t saved_id = matroska->current_id; - MatroskaSeekhead *seekhead = seekhead_list->elem; int64_t before_pos = avio_tell(matroska->ctx->pb); MatroskaLevel level; int64_t offset; int ret = 0; - if (idx >= seekhead_list->nb_elem || - seekhead[idx].id == MATROSKA_ID_SEEKHEAD || - seekhead[idx].id == MATROSKA_ID_CLUSTER) - return 0; - /* seek */ - offset = seekhead[idx].pos + matroska->segment_start; + offset = pos + matroska->segment_start; if (avio_seek(matroska->ctx->pb, offset, SEEK_SET) == offset) { /* We don't want to lose our seekhead level, so we add * a dummy. This is a crude hack. */ @@ -1410,37 +1461,35 @@ static int matroska_parse_seekhead_entry(MatroskaDemuxContext *matroska, static void matroska_execute_seekhead(MatroskaDemuxContext *matroska) { EbmlList *seekhead_list = &matroska->seekhead; - int64_t before_pos = avio_tell(matroska->ctx->pb); int i; - int nb_elem; // we should not do any seeking in the streaming case if (!matroska->ctx->pb->seekable || (matroska->ctx->flags & AVFMT_FLAG_IGNIDX)) return; - // do not read entries that are added while parsing seekhead entries - nb_elem = seekhead_list->nb_elem; + for (i = 0; i < seekhead_list->nb_elem; i++) { + MatroskaSeekhead *seekheads = seekhead_list->elem; + uint32_t id = seekheads[i].id; + uint64_t pos = seekheads[i].pos; - for (i = 0; i < nb_elem; i++) { - MatroskaSeekhead *seekhead = seekhead_list->elem; - if (seekhead[i].pos <= before_pos) + MatroskaLevel1Element *elem = matroska_find_level1_elem(matroska, id); + if (!elem || elem->parsed) continue; + elem->pos = pos; + // defer cues parsing until we actually need cue data. - if (seekhead[i].id == MATROSKA_ID_CUES) { - matroska->cues_parsing_deferred = 1; + if (id == MATROSKA_ID_CUES) continue; - } - if (matroska_parse_seekhead_entry(matroska, i) < 0) { + if (matroska_parse_seekhead_entry(matroska, pos) < 0) { // mark index as broken matroska->cues_parsing_deferred = -1; break; } - } - if (nb_elem != seekhead_list->nb_elem) { - avpriv_request_sample(matroska->ctx, "recursive SeekHead elements"); + + elem->parsed = 1; } } @@ -1474,17 +1523,18 @@ static void matroska_add_index_entries(MatroskaDemuxContext *matroska) } static void matroska_parse_cues(MatroskaDemuxContext *matroska) { - EbmlList *seekhead_list = &matroska->seekhead; - MatroskaSeekhead *seekhead = seekhead_list->elem; int i; - for (i = 0; i < seekhead_list->nb_elem; i++) - if (seekhead[i].id == MATROSKA_ID_CUES) + for (i = 0; i < matroska->num_level1_elems; i++) { + MatroskaLevel1Element *elem = &matroska->level1_elems[i]; + if (elem->id == MATROSKA_ID_CUES && !elem->parsed) { + if (matroska_parse_seekhead_entry(matroska, elem->pos) < 0) + matroska->cues_parsing_deferred = -1; + elem->parsed = 1; break; - av_assert1(i <= seekhead_list->nb_elem); + } + } - if (matroska_parse_seekhead_entry(matroska, i) < 0) - matroska->cues_parsing_deferred = -1; matroska_add_index_entries(matroska); } @@ -2014,6 +2064,7 @@ static int matroska_read_header(AVFormatContext *s) int i, j, res; matroska->ctx = s; + matroska->cues_parsing_deferred = 1; /* First read the EBML header. */ if (ebml_parse(matroska, ebml_syntax, &ebml) ||