mirror of
https://github.com/mpv-player/mpv
synced 2025-02-16 20:27:23 +00:00
demux_mkv: improve robustness by explicitly checking for level 1 elements
Matroska makes it pretty hard to resync correctly on broken files: random data returns "valid" EBML IDs with a high probability, and when trying to skip them it's likely that you skip a random amount of data (instead of considering the element length invalid). Improve upon this by skipping known level 1 elements only. Consider everything else invalid and call the resync code. This might result in annoying behavior when Matroska adds new level 1 elements, although it won't be particularly harmful. Matroska doesn't really allow us to do better (even mkvtoolnix explicitly checks for known level 1 elements). Since we now don't always want to combine EBML element skipping and resyncing, remove ebml_read_skip_or_resync_cluster(), and make ebml_read_skip() more tolerant against skipping broken elements. Also, don't resync when reading sub-elements, and instead do resyncing when reading them results in an error.
This commit is contained in:
parent
ae27e13a0a
commit
3c2f93aec8
@ -723,7 +723,7 @@ static int demux_mkv_read_cues(demuxer_t *demuxer)
|
|||||||
stream_t *s = demuxer->stream;
|
stream_t *s = demuxer->stream;
|
||||||
|
|
||||||
if (opts->index_mode == 0 || opts->index_mode == 2) {
|
if (opts->index_mode == 0 || opts->index_mode == 2) {
|
||||||
ebml_read_skip(s, NULL);
|
ebml_read_skip(demuxer->log, -1, s);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1128,7 +1128,7 @@ static int read_header_element(struct demuxer *demuxer, uint32_t id,
|
|||||||
res = 2;
|
res = 2;
|
||||||
}
|
}
|
||||||
if (!at_filepos && id != EBML_ID_INVALID)
|
if (!at_filepos && id != EBML_ID_INVALID)
|
||||||
ebml_read_skip(s, NULL);
|
ebml_read_skip(demuxer->log, -1, s);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2454,11 +2454,12 @@ static int read_block_group(demuxer_t *demuxer, int64_t end,
|
|||||||
block->keyframe = false;
|
block->keyframe = false;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case MATROSKA_ID_CLUSTER:
|
||||||
case EBML_ID_INVALID:
|
case EBML_ID_INVALID:
|
||||||
goto error;
|
goto error;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
if (ebml_read_skip_or_resync_cluster(demuxer->log, end, s) != 0)
|
if (ebml_read_skip(demuxer->log, end, s) != 0)
|
||||||
goto error;
|
goto error;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -2519,8 +2520,7 @@ static int read_next_block(demuxer_t *demuxer, struct block_info *block)
|
|||||||
goto find_next_cluster;
|
goto find_next_cluster;
|
||||||
|
|
||||||
default: ;
|
default: ;
|
||||||
if (ebml_read_skip_or_resync_cluster
|
if (ebml_read_skip(demuxer->log, mkv_d->cluster_end, s) != 0)
|
||||||
(demuxer->log, mkv_d->cluster_end, s) != 0)
|
|
||||||
goto find_next_cluster;
|
goto find_next_cluster;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -2535,7 +2535,13 @@ static int read_next_block(demuxer_t *demuxer, struct block_info *block)
|
|||||||
break;
|
break;
|
||||||
if (s->eof)
|
if (s->eof)
|
||||||
return -1;
|
return -1;
|
||||||
ebml_read_skip_or_resync_cluster(demuxer->log, -1, s);
|
// For the sake of robustness, consider even unknown level 1
|
||||||
|
// elements the same as unknown/broken IDs.
|
||||||
|
if (!ebml_is_mkv_level1_id(id) ||
|
||||||
|
ebml_read_skip(demuxer->log, -1, s) != 0)
|
||||||
|
{
|
||||||
|
ebml_resync_cluster(demuxer->log, s);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
next_cluster:
|
next_cluster:
|
||||||
mkv_d->cluster_end = ebml_read_length(s, NULL);
|
mkv_d->cluster_end = ebml_read_length(s, NULL);
|
||||||
|
73
demux/ebml.c
73
demux/ebml.c
@ -41,6 +41,26 @@
|
|||||||
#define SIZE_MAX ((size_t)-1)
|
#define SIZE_MAX ((size_t)-1)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Whether the id is a known Matroska level 1 element (allowed as element on
|
||||||
|
// global file level, after the level 0 MATROSKA_ID_SEGMENT).
|
||||||
|
// This (intentionally) doesn't include "global" elements.
|
||||||
|
bool ebml_is_mkv_level1_id(uint32_t id)
|
||||||
|
{
|
||||||
|
switch (id) {
|
||||||
|
case MATROSKA_ID_SEEKHEAD:
|
||||||
|
case MATROSKA_ID_INFO:
|
||||||
|
case MATROSKA_ID_CLUSTER:
|
||||||
|
case MATROSKA_ID_TRACKS:
|
||||||
|
case MATROSKA_ID_CUES:
|
||||||
|
case MATROSKA_ID_ATTACHMENTS:
|
||||||
|
case MATROSKA_ID_CHAPTERS:
|
||||||
|
case MATROSKA_ID_TAGS:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Read: the element content data ID.
|
* Read: the element content data ID.
|
||||||
* Return: the ID.
|
* Return: the ID.
|
||||||
@ -252,21 +272,32 @@ char *ebml_read_utf8(stream_t *s, uint64_t *length)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Skip the current element.
|
* Skip the current element.
|
||||||
|
* end: the end of the parent element or -1 (for robust error handling)
|
||||||
*/
|
*/
|
||||||
int ebml_read_skip(stream_t *s, uint64_t *length)
|
int ebml_read_skip(struct mp_log *log, int64_t end, stream_t *s)
|
||||||
{
|
{
|
||||||
uint64_t len;
|
uint64_t len;
|
||||||
int l;
|
int l;
|
||||||
|
|
||||||
|
int64_t pos = stream_tell(s);
|
||||||
|
|
||||||
len = ebml_read_length(s, &l);
|
len = ebml_read_length(s, &l);
|
||||||
if (len == EBML_UINT_INVALID)
|
if (len == EBML_UINT_INVALID)
|
||||||
return 1;
|
goto invalid;
|
||||||
if (length)
|
|
||||||
*length = len + l;
|
|
||||||
|
|
||||||
stream_skip(s, len);
|
int64_t pos2 = stream_tell(s);
|
||||||
|
if (len >= INT64_MAX - pos2 || (end > 0 && pos2 + len > end))
|
||||||
|
goto invalid;
|
||||||
|
|
||||||
|
if (!stream_skip(s, len))
|
||||||
|
goto invalid;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
invalid:
|
||||||
|
mp_err(log, "Invalid EBML length at position %"PRId64"\n", pos);
|
||||||
|
stream_seek(s, pos);
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -291,38 +322,6 @@ int ebml_resync_cluster(struct mp_log *log, stream_t *s)
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Skip the current element, or on error, call ebml_resync_cluster().
|
|
||||||
* end gives the maximum possible file pos (due to EBML parent element size).
|
|
||||||
*/
|
|
||||||
int ebml_read_skip_or_resync_cluster(struct mp_log *log, int64_t end,
|
|
||||||
stream_t *s)
|
|
||||||
{
|
|
||||||
uint64_t len;
|
|
||||||
int l;
|
|
||||||
|
|
||||||
len = ebml_read_length(s, &l);
|
|
||||||
if (len == EBML_UINT_INVALID)
|
|
||||||
goto resync;
|
|
||||||
|
|
||||||
int64_t pos = stream_tell(s);
|
|
||||||
|
|
||||||
if (end >= 0 && pos + len > end)
|
|
||||||
goto resync;
|
|
||||||
|
|
||||||
// When reading corrupted elements, len will often be a random high number,
|
|
||||||
// and stream_skip() will fail when skipping past EOF.
|
|
||||||
if (!stream_skip(s, len)) {
|
|
||||||
stream_seek(s, pos);
|
|
||||||
goto resync;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
resync:
|
|
||||||
return ebml_resync_cluster(log, s) < 0 ? -1 : 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Read the next element, but only the header. The contents
|
* Read the next element, but only the header. The contents
|
||||||
* are supposed to be sub-elements which can be read separately.
|
* are supposed to be sub-elements which can be read separately.
|
||||||
|
@ -92,6 +92,7 @@ struct ebml_parse_ctx {
|
|||||||
#define EBML_FLOAT_INVALID -1000000000.0
|
#define EBML_FLOAT_INVALID -1000000000.0
|
||||||
|
|
||||||
|
|
||||||
|
bool ebml_is_mkv_level1_id(uint32_t id);
|
||||||
uint32_t ebml_read_id (stream_t *s, int *length);
|
uint32_t ebml_read_id (stream_t *s, int *length);
|
||||||
uint64_t ebml_read_vlen_uint (bstr *buffer);
|
uint64_t ebml_read_vlen_uint (bstr *buffer);
|
||||||
int64_t ebml_read_vlen_int (bstr *buffer);
|
int64_t ebml_read_vlen_int (bstr *buffer);
|
||||||
@ -101,9 +102,7 @@ int64_t ebml_read_int (stream_t *s, uint64_t *length);
|
|||||||
double ebml_read_float (stream_t *s, uint64_t *length);
|
double ebml_read_float (stream_t *s, uint64_t *length);
|
||||||
char *ebml_read_ascii (stream_t *s, uint64_t *length);
|
char *ebml_read_ascii (stream_t *s, uint64_t *length);
|
||||||
char *ebml_read_utf8 (stream_t *s, uint64_t *length);
|
char *ebml_read_utf8 (stream_t *s, uint64_t *length);
|
||||||
int ebml_read_skip (stream_t *s, uint64_t *length);
|
int ebml_read_skip(struct mp_log *log, int64_t end, stream_t *s);
|
||||||
int ebml_read_skip_or_resync_cluster(struct mp_log *log, int64_t end,
|
|
||||||
stream_t *s);
|
|
||||||
int ebml_resync_cluster(struct mp_log *log, stream_t *s);
|
int ebml_resync_cluster(struct mp_log *log, stream_t *s);
|
||||||
uint32_t ebml_read_master (stream_t *s, uint64_t *length);
|
uint32_t ebml_read_master (stream_t *s, uint64_t *length);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user