demux_mkv: improve robustness by explicitly checking for level 1 elements

Matroska makes it pretty hard to resync correctly on broken files:
random data returns "valid" EBML IDs with a high probability, and when
trying to skip them it's likely that you skip a random amount of data
(instead of considering the element length invalid).

Improve upon this by skipping known level 1 elements only. Consider
everything else invalid and call the resync code. This might result in
annoying behavior when Matroska adds new level 1 elements, although it
won't be particularly harmful. Matroska doesn't really allow us to do
better (even mkvtoolnix explicitly checks for known level 1 elements).

Since we now don't always want to combine EBML element skipping and
resyncing, remove ebml_read_skip_or_resync_cluster(), and make
ebml_read_skip() more tolerant against skipping broken elements.

Also, don't resync when reading sub-elements, and instead do resyncing
when reading them results in an error.
This commit is contained in:
wm4 2014-01-14 17:38:21 +01:00
parent ae27e13a0a
commit 3c2f93aec8
3 changed files with 50 additions and 46 deletions

View File

@ -723,7 +723,7 @@ static int demux_mkv_read_cues(demuxer_t *demuxer)
stream_t *s = demuxer->stream;
if (opts->index_mode == 0 || opts->index_mode == 2) {
ebml_read_skip(s, NULL);
ebml_read_skip(demuxer->log, -1, s);
return 0;
}
@ -1128,7 +1128,7 @@ static int read_header_element(struct demuxer *demuxer, uint32_t id,
res = 2;
}
if (!at_filepos && id != EBML_ID_INVALID)
ebml_read_skip(s, NULL);
ebml_read_skip(demuxer->log, -1, s);
return res;
}
@ -2454,11 +2454,12 @@ static int read_block_group(demuxer_t *demuxer, int64_t end,
block->keyframe = false;
break;
case MATROSKA_ID_CLUSTER:
case EBML_ID_INVALID:
goto error;
default:
if (ebml_read_skip_or_resync_cluster(demuxer->log, end, s) != 0)
if (ebml_read_skip(demuxer->log, end, s) != 0)
goto error;
break;
}
@ -2519,8 +2520,7 @@ static int read_next_block(demuxer_t *demuxer, struct block_info *block)
goto find_next_cluster;
default: ;
if (ebml_read_skip_or_resync_cluster
(demuxer->log, mkv_d->cluster_end, s) != 0)
if (ebml_read_skip(demuxer->log, mkv_d->cluster_end, s) != 0)
goto find_next_cluster;
break;
}
@ -2535,7 +2535,13 @@ static int read_next_block(demuxer_t *demuxer, struct block_info *block)
break;
if (s->eof)
return -1;
ebml_read_skip_or_resync_cluster(demuxer->log, -1, s);
// For the sake of robustness, consider even unknown level 1
// elements the same as unknown/broken IDs.
if (!ebml_is_mkv_level1_id(id) ||
ebml_read_skip(demuxer->log, -1, s) != 0)
{
ebml_resync_cluster(demuxer->log, s);
}
}
next_cluster:
mkv_d->cluster_end = ebml_read_length(s, NULL);

View File

@ -41,6 +41,26 @@
#define SIZE_MAX ((size_t)-1)
#endif
// Whether the id is a known Matroska level 1 element (allowed as element on
// global file level, after the level 0 MATROSKA_ID_SEGMENT).
// This (intentionally) doesn't include "global" elements.
bool ebml_is_mkv_level1_id(uint32_t id)
{
switch (id) {
case MATROSKA_ID_SEEKHEAD:
case MATROSKA_ID_INFO:
case MATROSKA_ID_CLUSTER:
case MATROSKA_ID_TRACKS:
case MATROSKA_ID_CUES:
case MATROSKA_ID_ATTACHMENTS:
case MATROSKA_ID_CHAPTERS:
case MATROSKA_ID_TAGS:
return true;
default:
return false;
}
}
/*
* Read: the element content data ID.
* Return: the ID.
@ -252,21 +272,32 @@ char *ebml_read_utf8(stream_t *s, uint64_t *length)
/*
* Skip the current element.
* end: the end of the parent element or -1 (for robust error handling)
*/
int ebml_read_skip(stream_t *s, uint64_t *length)
int ebml_read_skip(struct mp_log *log, int64_t end, stream_t *s)
{
uint64_t len;
int l;
int64_t pos = stream_tell(s);
len = ebml_read_length(s, &l);
if (len == EBML_UINT_INVALID)
return 1;
if (length)
*length = len + l;
goto invalid;
stream_skip(s, len);
int64_t pos2 = stream_tell(s);
if (len >= INT64_MAX - pos2 || (end > 0 && pos2 + len > end))
goto invalid;
if (!stream_skip(s, len))
goto invalid;
return 0;
invalid:
mp_err(log, "Invalid EBML length at position %"PRId64"\n", pos);
stream_seek(s, pos);
return 1;
}
/*
@ -291,38 +322,6 @@ int ebml_resync_cluster(struct mp_log *log, stream_t *s)
return -1;
}
/*
* Skip the current element, or on error, call ebml_resync_cluster().
* end gives the maximum possible file pos (due to EBML parent element size).
*/
int ebml_read_skip_or_resync_cluster(struct mp_log *log, int64_t end,
stream_t *s)
{
uint64_t len;
int l;
len = ebml_read_length(s, &l);
if (len == EBML_UINT_INVALID)
goto resync;
int64_t pos = stream_tell(s);
if (end >= 0 && pos + len > end)
goto resync;
// When reading corrupted elements, len will often be a random high number,
// and stream_skip() will fail when skipping past EOF.
if (!stream_skip(s, len)) {
stream_seek(s, pos);
goto resync;
}
return 0;
resync:
return ebml_resync_cluster(log, s) < 0 ? -1 : 1;
}
/*
* Read the next element, but only the header. The contents
* are supposed to be sub-elements which can be read separately.

View File

@ -92,6 +92,7 @@ struct ebml_parse_ctx {
#define EBML_FLOAT_INVALID -1000000000.0
bool ebml_is_mkv_level1_id(uint32_t id);
uint32_t ebml_read_id (stream_t *s, int *length);
uint64_t ebml_read_vlen_uint (bstr *buffer);
int64_t ebml_read_vlen_int (bstr *buffer);
@ -101,9 +102,7 @@ int64_t ebml_read_int (stream_t *s, uint64_t *length);
double ebml_read_float (stream_t *s, uint64_t *length);
char *ebml_read_ascii (stream_t *s, uint64_t *length);
char *ebml_read_utf8 (stream_t *s, uint64_t *length);
int ebml_read_skip (stream_t *s, uint64_t *length);
int ebml_read_skip_or_resync_cluster(struct mp_log *log, int64_t end,
stream_t *s);
int ebml_read_skip(struct mp_log *log, int64_t end, stream_t *s);
int ebml_resync_cluster(struct mp_log *log, stream_t *s);
uint32_t ebml_read_master (stream_t *s, uint64_t *length);