stream_libarchive: fix unnecessarily opening all volumes on opening

Seems like I'm still not done with rar playback stuff...

It turns out the reason for archive_read_open1() opening all volumes had
nothing to do with libarchive's rar code, but was a consequence of how
multi volume support is implemented in libarchive, and due to the fact
that we enabled archive_read_support_format_zip_seekable() (through
archive_read_support_format_zip()).

The seekable zip format will seek to the end of the file and search for
a zip "header" there. It could possibly be considered a libarchive bug
that it does that even if it's fairly sure that it's a RAR file.

We already do probing on a small buffer read from the start of the file
(i.e. not giving libarchive a way to seek the stream before we think
it's an archive), but that does not help, since libarchive needs to
probe _again_. libarchive does not seem to provide a function to query
the format (no archive_read_get_format()). Which seems quite strange,
but at least I didn't find one.

This commit works this around by doing some manual rar/zip probing. We
could have gone only with rar probing. But detecting zip separately
allows us to avoid that stream_libarchive seeks to the end during early
probing. This is an additional bonus on top of "fixing" multi volume
rar.

The zip probing is from archive_read_format_zip_streamable_bid(). The
rar signature is the common prefix of the rar and rar5 formats in
libarchive (presumably the RAR fixed header parts without version).

If the demuxer seeks to the end of the rar entry, this will still open
all volumes; I'm not sure whether the old/removed rar code in mpv could
handle this better.

See: #7182
This commit is contained in:
wm4 2020-01-09 02:25:13 +01:00
parent 5e0875c9e0
commit 33e999de82
1 changed files with 54 additions and 9 deletions

View File

@ -33,6 +33,33 @@ struct mp_archive_volume {
char *url;
};
static bool probe_rar(struct stream *s)
{
static uint8_t rar_sig[] = {0x52, 0x61, 0x72, 0x21, 0x1a, 0x07};
uint8_t buf[6];
if (stream_read_peek(s, buf, sizeof(buf)) != sizeof(buf))
return false;
return memcmp(buf, rar_sig, 6) == 0;
}
static bool probe_zip(struct stream *s)
{
uint8_t p[4];
if (stream_read_peek(s, p, sizeof(p)) != sizeof(p))
return false;
// Lifted from libarchive, BSD license.
if (p[0] == 'P' && p[1] == 'K') {
if ((p[2] == '\001' && p[3] == '\002') ||
(p[2] == '\003' && p[3] == '\004') ||
(p[2] == '\005' && p[3] == '\006') ||
(p[2] == '\006' && p[3] == '\006') ||
(p[2] == '\007' && p[3] == '\010') ||
(p[2] == '0' && p[3] == '0'))
return true;
}
return false;
}
static bool volume_seek(struct mp_archive_volume *vol)
{
if (!vol->src || vol->seek_to < 0)
@ -267,19 +294,37 @@ struct mp_archive *mp_archive_new(struct mp_log *log, struct stream *src,
locale_t oldlocale = uselocale(mpa->locale);
archive_read_support_format_7zip(mpa->arch);
archive_read_support_format_iso9660(mpa->arch);
bool maybe_rar = probe_rar(src);
bool maybe_zip = probe_zip(src);
bool probe_all = flags & MP_ARCHIVE_FLAG_UNSAFE;
archive_read_support_format_rar(mpa->arch);
archive_read_support_format_rar5(mpa->arch);
archive_read_support_format_zip(mpa->arch);
// Exclude other formats if it's probably RAR, because other formats may
// behave suboptimal with multiple volumes exposed, such as opening every
// single volume by seeking at the end of the file.
if (!maybe_rar) {
archive_read_support_format_7zip(mpa->arch);
archive_read_support_format_iso9660(mpa->arch);
archive_read_support_filter_bzip2(mpa->arch);
archive_read_support_filter_gzip(mpa->arch);
archive_read_support_filter_xz(mpa->arch);
if (flags & MP_ARCHIVE_FLAG_UNSAFE) {
archive_read_support_format_zip_streamable(mpa->arch);
if (probe_all) {
archive_read_support_format_gnutar(mpa->arch);
archive_read_support_format_tar(mpa->arch);
}
// This zip reader is normally preferable. However, it seeks to the end
// of the file, which may be annoying (HTTP reconnect, volume skipping),
// so use it only as last resort, or if it's relatively likely that it's
// really zip.
if (maybe_zip || probe_all)
archive_read_support_format_zip_seekable(mpa->arch);
}
archive_read_set_read_callback(mpa->arch, read_cb);
archive_read_set_skip_callback(mpa->arch, skip_cb);
archive_read_set_open_callback(mpa->arch, open_cb);