mpv/demux/packet.c

294 lines
8.7 KiB
C
Raw Normal View History

/*
* This file is part of mpv.
*
* mpv is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* mpv is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with mpv. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <libavcodec/avcodec.h>
#include <libavutil/hdr_dynamic_metadata.h>
#include <libavutil/intreadwrite.h>
#include "common/av_common.h"
#include "common/common.h"
#include "demux.h"
#include "demux/ebml.h"
#include "packet.h"
demux: add a on-disk cache Somewhat similar to the old --cache-file, except for the demuxer cache. Instead of keeping packet data in memory, it's written to disk and read back when needed. The idea is to reduce main memory usage, while allowing fast seeking in large cached network streams (especially live streams). Keeping the packet metadata on disk would be rather hard (would use mmap or so, or rewrite the entire demux.c packet queue handling), and since it's relatively small, just keep it in memory. Also for simplicity, the disk cache is append-only. If you're watching really long livestreams, and need pruning, you're probably out of luck. This still could be improved by trying to free unused blocks with fallocate(), but since we're writing multiple streams in an interleaved manner, this is slightly hard. Some rather gross ugliness in packet.h: we want to store the file position of the cached data somewhere, but on 32 bit architectures, we don't have any usable 64 bit members for this, just the buf/len fields, which add up to 64 bit - so the shitty union aliases this memory. Error paths untested. Side data (the complicated part of trying to serialize ffmpeg packets) untested. Stream recording had to be adjusted. Some minor details change due to this, but probably nothing important. The change in attempt_range_joining() is because packets in cache have no valid len field. It was a useful check (heuristically finding broken cases), but not a necessary one. Various other approaches were tried. It would be interesting to list them and to mention the pros and cons, but I don't feel like it.
2019-06-13 17:10:32 +00:00
// Free any refcounted data dp holds (but don't free dp itself). This does not
// care about pointers that are _not_ refcounted (like demux_packet.codec).
// Normally, a user should use talloc_free(dp). This function is only for
// annoyingly specific obscure use cases.
void demux_packet_unref_contents(struct demux_packet *dp)
{
if (dp->avpacket) {
assert(!dp->is_cached);
av_packet_free(&dp->avpacket);
demux: add a on-disk cache Somewhat similar to the old --cache-file, except for the demuxer cache. Instead of keeping packet data in memory, it's written to disk and read back when needed. The idea is to reduce main memory usage, while allowing fast seeking in large cached network streams (especially live streams). Keeping the packet metadata on disk would be rather hard (would use mmap or so, or rewrite the entire demux.c packet queue handling), and since it's relatively small, just keep it in memory. Also for simplicity, the disk cache is append-only. If you're watching really long livestreams, and need pruning, you're probably out of luck. This still could be improved by trying to free unused blocks with fallocate(), but since we're writing multiple streams in an interleaved manner, this is slightly hard. Some rather gross ugliness in packet.h: we want to store the file position of the cached data somewhere, but on 32 bit architectures, we don't have any usable 64 bit members for this, just the buf/len fields, which add up to 64 bit - so the shitty union aliases this memory. Error paths untested. Side data (the complicated part of trying to serialize ffmpeg packets) untested. Stream recording had to be adjusted. Some minor details change due to this, but probably nothing important. The change in attempt_range_joining() is because packets in cache have no valid len field. It was a useful check (heuristically finding broken cases), but not a necessary one. Various other approaches were tried. It would be interesting to list them and to mention the pros and cons, but I don't feel like it.
2019-06-13 17:10:32 +00:00
dp->buffer = NULL;
dp->len = 0;
}
}
static void packet_destroy(void *ptr)
{
struct demux_packet *dp = ptr;
demux: add a on-disk cache Somewhat similar to the old --cache-file, except for the demuxer cache. Instead of keeping packet data in memory, it's written to disk and read back when needed. The idea is to reduce main memory usage, while allowing fast seeking in large cached network streams (especially live streams). Keeping the packet metadata on disk would be rather hard (would use mmap or so, or rewrite the entire demux.c packet queue handling), and since it's relatively small, just keep it in memory. Also for simplicity, the disk cache is append-only. If you're watching really long livestreams, and need pruning, you're probably out of luck. This still could be improved by trying to free unused blocks with fallocate(), but since we're writing multiple streams in an interleaved manner, this is slightly hard. Some rather gross ugliness in packet.h: we want to store the file position of the cached data somewhere, but on 32 bit architectures, we don't have any usable 64 bit members for this, just the buf/len fields, which add up to 64 bit - so the shitty union aliases this memory. Error paths untested. Side data (the complicated part of trying to serialize ffmpeg packets) untested. Stream recording had to be adjusted. Some minor details change due to this, but probably nothing important. The change in attempt_range_joining() is because packets in cache have no valid len field. It was a useful check (heuristically finding broken cases), but not a necessary one. Various other approaches were tried. It would be interesting to list them and to mention the pros and cons, but I don't feel like it.
2019-06-13 17:10:32 +00:00
demux_packet_unref_contents(dp);
}
static struct demux_packet *packet_create(void)
{
struct demux_packet *dp = talloc(NULL, struct demux_packet);
talloc_set_destructor(dp, packet_destroy);
*dp = (struct demux_packet) {
.pts = MP_NOPTS_VALUE,
.dts = MP_NOPTS_VALUE,
.duration = -1,
.pos = -1,
Rewrite ordered chapters and timeline stuff This uses a different method to piece segments together. The old approach basically changes to a new file (with a new start offset) any time a segment ends. This meant waiting for audio/video end on segment end, and then changing to the new segment all at once. It had a very weird impact on the playback core, and some things (like truly gapless segment transitions, or frame backstepping) just didn't work. The new approach adds the demux_timeline pseudo-demuxer, which presents an uniform packet stream from the many segments. This is pretty similar to how ordered chapters are implemented everywhere else. It also reminds of the FFmpeg concat pseudo-demuxer. The "pure" version of this approach doesn't work though. Segments can actually have different codec configurations (different extradata), and subtitles are most likely broken too. (Subtitles have multiple corner cases which break the pure stream-concatenation approach completely.) To counter this, we do two things: - Reinit the decoder with each segment. We go as far as allowing concatenating files with completely different codecs for the sake of EDL (which also uses the timeline infrastructure). A "lighter" approach would try to make use of decoder mechanism to update e.g. the extradata, but that seems fragile. - Clip decoded data to segment boundaries. This is equivalent to normal playback core mechanisms like hr-seek, but now the playback core doesn't need to care about these things. These two mechanisms are equivalent to what happened in the old implementation, except they don't happen in the playback core anymore. In other words, the playback core is completely relieved from timeline implementation details. (Which honestly is exactly what I'm trying to do here. I don't think ordered chapter behavior deserves improvement, even if it's bad - but I want to get it out from the playback core.) There is code duplication between audio and video decoder common code. This is awful and could be shareable - but this will happen later. Note that the audio path has some code to clip audio frames for the purpose of codec preroll/gapless handling, but it's not shared as sharing it would cause more pain than it would help.
2016-02-15 20:04:07 +00:00
.start = MP_NOPTS_VALUE,
.end = MP_NOPTS_VALUE,
.stream = -1,
.avpacket = av_packet_alloc(),
.animated = -1,
};
MP_HANDLE_OOM(dp->avpacket);
return dp;
}
// This actually preserves only data and side data, not PTS/DTS/pos/etc.
// It also allows avpkt->data==NULL with avpkt->size!=0 - the libavcodec API
// does not allow it, but we do it to simplify new_demux_packet().
struct demux_packet *new_demux_packet_from_avpacket(struct AVPacket *avpkt)
{
if (avpkt->size > 1000000000)
return NULL;
struct demux_packet *dp = packet_create();
int r = -1;
if (avpkt->data) {
// We hope that this function won't need/access AVPacket input padding,
// because otherwise new_demux_packet_from() wouldn't work.
r = av_packet_ref(dp->avpacket, avpkt);
} else {
r = av_new_packet(dp->avpacket, avpkt->size);
}
if (r < 0) {
talloc_free(dp);
return NULL;
}
dp->buffer = dp->avpacket->data;
dp->len = dp->avpacket->size;
return dp;
}
// (buf must include proper padding)
struct demux_packet *new_demux_packet_from_buf(struct AVBufferRef *buf)
{
if (!buf)
return NULL;
if (buf->size > 1000000000)
return NULL;
struct demux_packet *dp = packet_create();
dp->avpacket->buf = av_buffer_ref(buf);
if (!dp->avpacket->buf) {
talloc_free(dp);
return NULL;
}
dp->avpacket->data = dp->buffer = buf->data;
dp->avpacket->size = dp->len = buf->size;
return dp;
}
// Input data doesn't need to be padded.
struct demux_packet *new_demux_packet_from(void *data, size_t len)
{
struct demux_packet *dp = new_demux_packet(len);
if (!dp)
return NULL;
memcpy(dp->avpacket->data, data, len);
return dp;
}
struct demux_packet *new_demux_packet(size_t len)
{
if (len > INT_MAX)
return NULL;
struct demux_packet *dp = packet_create();
int r = av_new_packet(dp->avpacket, len);
if (r < 0) {
talloc_free(dp);
return NULL;
}
dp->buffer = dp->avpacket->data;
dp->len = len;
return dp;
}
void demux_packet_shorten(struct demux_packet *dp, size_t len)
{
assert(len <= dp->len);
if (dp->len) {
dp->len = len;
memset(dp->buffer + dp->len, 0, AV_INPUT_BUFFER_PADDING_SIZE);
}
}
void free_demux_packet(struct demux_packet *dp)
{
talloc_free(dp);
}
void demux_packet_copy_attribs(struct demux_packet *dst, struct demux_packet *src)
{
dst->pts = src->pts;
dst->dts = src->dts;
dst->duration = src->duration;
dst->pos = src->pos;
dst->segmented = src->segmented;
Rewrite ordered chapters and timeline stuff This uses a different method to piece segments together. The old approach basically changes to a new file (with a new start offset) any time a segment ends. This meant waiting for audio/video end on segment end, and then changing to the new segment all at once. It had a very weird impact on the playback core, and some things (like truly gapless segment transitions, or frame backstepping) just didn't work. The new approach adds the demux_timeline pseudo-demuxer, which presents an uniform packet stream from the many segments. This is pretty similar to how ordered chapters are implemented everywhere else. It also reminds of the FFmpeg concat pseudo-demuxer. The "pure" version of this approach doesn't work though. Segments can actually have different codec configurations (different extradata), and subtitles are most likely broken too. (Subtitles have multiple corner cases which break the pure stream-concatenation approach completely.) To counter this, we do two things: - Reinit the decoder with each segment. We go as far as allowing concatenating files with completely different codecs for the sake of EDL (which also uses the timeline infrastructure). A "lighter" approach would try to make use of decoder mechanism to update e.g. the extradata, but that seems fragile. - Clip decoded data to segment boundaries. This is equivalent to normal playback core mechanisms like hr-seek, but now the playback core doesn't need to care about these things. These two mechanisms are equivalent to what happened in the old implementation, except they don't happen in the playback core anymore. In other words, the playback core is completely relieved from timeline implementation details. (Which honestly is exactly what I'm trying to do here. I don't think ordered chapter behavior deserves improvement, even if it's bad - but I want to get it out from the playback core.) There is code duplication between audio and video decoder common code. This is awful and could be shareable - but this will happen later. Note that the audio path has some code to clip audio frames for the purpose of codec preroll/gapless handling, but it's not shared as sharing it would cause more pain than it would help.
2016-02-15 20:04:07 +00:00
dst->start = src->start;
dst->end = src->end;
dst->codec = src->codec;
Implement backwards playback See manpage additions. This is a huge hack. You can bet there are shit tons of bugs. It's literally forcing square pegs into round holes. Hopefully, the manpage wall of text makes it clear enough that the whole shit can easily crash and burn. (Although it shouldn't literally crash. That would be a bug. It possibly _could_ start a fire by entering some sort of endless loop, not a literal one, just something where it tries to do work without making progress.) (Some obvious bugs I simply ignored for this initial version, but there's a number of potential bugs I can't even imagine. Normal playback should remain completely unaffected, though.) How this works is also described in the manpage. Basically, we demux in reverse, then we decode in reverse, then we render in reverse. The decoding part is the simplest: just reorder the decoder output. This weirdly integrates with the timeline/ordered chapter code, which also has special requirements on feeding the packets to the decoder in a non-straightforward way (it doesn't conflict, although a bugmessmass breaks correct slicing of segments, so EDL/ordered chapter playback is broken in backward direction). Backward demuxing is pretty involved. In theory, it could be much easier: simply iterating the usual demuxer output backward. But this just doesn't fit into our code, so there's a cthulhu nightmare of shit. To be specific, each stream (audio, video) is reversed separately. At least this means we can do backward playback within cached content (for example, you could play backwards in a live stream; on that note, it disables prefetching, which would lead to losing new live video, but this could be avoided). The fuckmess also meant that I didn't bother trying to support subtitles. Subtitles are a problem because they're "sparse" streams. They need to be "passively" demuxed: you don't try to read a subtitle packet, you demux audio and video, and then look whether there was a subtitle packet. This means to get subtitles for a time range, you need to know that you demuxed video and audio over this range, which becomes pretty messy when you demux audio and video backwards separately. Backward display is the most weird (and potentially buggy) part. To avoid that we need to touch a LOT of timing code, we negate all timestamps. The basic idea is that due to the navigation, all comparisons and subtractions of timestamps keep working, and you don't need to touch every single of them to "reverse" them. E.g.: bool before = pts_a < pts_b; would need to be: bool before = forward ? pts_a < pts_b : pts_a > pts_b; or: bool before = pts_a * dir < pts_b * dir; or if you, as it's implemented now, just do this after decoding: pts_a *= dir; pts_b *= dir; and then in the normal timing/renderer code: bool before = pts_a < pts_b; Consequently, we don't need many changes in the latter code. But some assumptions inhererently true for forward playback may have been broken anyway. What is mainly needed is fixing places where values are passed between positive and negative "domains". For example, seeking and timestamp user display always uses positive timestamps. The main mess is that it's not obvious which domain a given variable should or does use. Well, in my tests with a single file, it suddenly started to work when I did this. I'm honestly surprised that it did, and that I didn't have to change a single line in the timing code past decoder (just something minor to make external/cached text subtitles display). I committed it immediately while avoiding thinking about it. But there really likely are subtle problems of all sorts. As far as I'm aware, gstreamer also supports backward playback. When I looked at this years ago, I couldn't find a way to actually try this, and I didn't revisit it now. Back then I also read talk slides from the person who implemented it, and I'm not sure if and which ideas I might have taken from it. It's possible that the timestamp reversal is inspired by it, but I didn't check. (I think it claimed that it could avoid large changes by changing a sign?) VapourSynth has some sort of reverse function, which provides a backward view on a video. The function itself is trivial to implement, as VapourSynth aims to provide random access to video by frame numbers (so you just request decreasing frame numbers). From what I remember, it wasn't exactly fluid, but it worked. It's implemented by creating an index, and seeking to the target on demand, and a bunch of caching. mpv could use it, but it would either require using VapourSynth as demuxer and decoder for everything, or replacing the current file every time something is supposed to be played backwards. FFmpeg's libavfilter has reversal filters for audio and video. These require buffering the entire media data of the file, and don't really fit into mpv's architecture. It could be used by playing a libavfilter graph that also demuxes, but that's like VapourSynth but worse.
2019-05-18 00:10:51 +00:00
dst->back_restart = src->back_restart;
dst->back_preroll = src->back_preroll;
dst->keyframe = src->keyframe;
dst->stream = src->stream;
}
struct demux_packet *demux_copy_packet(struct demux_packet *dp)
{
struct demux_packet *new = NULL;
if (dp->avpacket) {
new = new_demux_packet_from_avpacket(dp->avpacket);
} else {
// Some packets might be not created by new_demux_packet*().
new = new_demux_packet_from(dp->buffer, dp->len);
}
if (!new)
return NULL;
demux_packet_copy_attribs(new, dp);
return new;
}
#define ROUND_ALLOC(s) MP_ALIGN_UP((s), 16)
// Attempt to estimate the total memory consumption of the given packet.
// This is important if we store thousands of packets and not to exceed
// user-provided limits. Of course we can't know how much memory internal
// fragmentation of the libc memory allocator will waste.
// Note that this should return a "stable" value - e.g. if a new packet ref
// is created, this should return the same value with the new ref. (This
// implies the value is not exact and does not return the actual size of
// memory wasted due to internal fragmentation.)
size_t demux_packet_estimate_total_size(struct demux_packet *dp)
{
size_t size = ROUND_ALLOC(sizeof(struct demux_packet));
size += 8 * sizeof(void *); // ta overhead
size += 10 * sizeof(void *); // additional estimate for ta_ext_header
if (dp->avpacket) {
demux: add a on-disk cache Somewhat similar to the old --cache-file, except for the demuxer cache. Instead of keeping packet data in memory, it's written to disk and read back when needed. The idea is to reduce main memory usage, while allowing fast seeking in large cached network streams (especially live streams). Keeping the packet metadata on disk would be rather hard (would use mmap or so, or rewrite the entire demux.c packet queue handling), and since it's relatively small, just keep it in memory. Also for simplicity, the disk cache is append-only. If you're watching really long livestreams, and need pruning, you're probably out of luck. This still could be improved by trying to free unused blocks with fallocate(), but since we're writing multiple streams in an interleaved manner, this is slightly hard. Some rather gross ugliness in packet.h: we want to store the file position of the cached data somewhere, but on 32 bit architectures, we don't have any usable 64 bit members for this, just the buf/len fields, which add up to 64 bit - so the shitty union aliases this memory. Error paths untested. Side data (the complicated part of trying to serialize ffmpeg packets) untested. Stream recording had to be adjusted. Some minor details change due to this, but probably nothing important. The change in attempt_range_joining() is because packets in cache have no valid len field. It was a useful check (heuristically finding broken cases), but not a necessary one. Various other approaches were tried. It would be interesting to list them and to mention the pros and cons, but I don't feel like it.
2019-06-13 17:10:32 +00:00
assert(!dp->is_cached);
size += ROUND_ALLOC(dp->len);
size += ROUND_ALLOC(sizeof(AVPacket));
size += 8 * sizeof(void *); // ta overhead
size += ROUND_ALLOC(sizeof(AVBufferRef));
size += ROUND_ALLOC(64); // upper bound estimate on sizeof(AVBuffer)
size += ROUND_ALLOC(dp->avpacket->side_data_elems *
sizeof(dp->avpacket->side_data[0]));
for (int n = 0; n < dp->avpacket->side_data_elems; n++)
size += ROUND_ALLOC(dp->avpacket->side_data[n].size);
}
return size;
}
int demux_packet_set_padding(struct demux_packet *dp, int start, int end)
{
if (!start && !end)
return 0;
if (!dp->avpacket)
return -1;
uint8_t *p = av_packet_new_side_data(dp->avpacket, AV_PKT_DATA_SKIP_SAMPLES, 10);
if (!p)
return -1;
AV_WL32(p + 0, start);
AV_WL32(p + 4, end);
return 0;
}
int demux_packet_add_blockadditional(struct demux_packet *dp, uint64_t id,
void *data, size_t size)
{
if (!dp->avpacket)
return -1;
switch (id) {
case MATROSKA_BLOCK_ADD_ID_TYPE_ITU_T_T35: {
static const uint8_t ITU_T_T35_COUNTRY_CODE_US = 0xB5;
static const uint16_t ITU_T_T35_PROVIDER_CODE_SMTPE = 0x3C;
if (size < 6)
break;
uint8_t *p = data;
uint8_t country_code = AV_RB8(p);
p += sizeof(country_code);
uint16_t provider_code = AV_RB16(p);
p += sizeof(provider_code);
if (country_code != ITU_T_T35_COUNTRY_CODE_US ||
provider_code != ITU_T_T35_PROVIDER_CODE_SMTPE)
break;
uint16_t provider_oriented_code = AV_RB16(p);
p += sizeof(provider_oriented_code);
uint8_t application_identifier = AV_RB8(p);
p += sizeof(application_identifier);
if (provider_oriented_code != 1 || application_identifier != 4)
break;
size_t hdrplus_size;
AVDynamicHDRPlus *hdrplus = av_dynamic_hdr_plus_alloc(&hdrplus_size);
MP_HANDLE_OOM(hdrplus);
if (av_dynamic_hdr_plus_from_t35(hdrplus, p, size - (p - (uint8_t *)data)) < 0 ||
av_packet_add_side_data(dp->avpacket, AV_PKT_DATA_DYNAMIC_HDR10_PLUS,
(uint8_t *)hdrplus, hdrplus_size) < 0)
{
av_free(hdrplus);
return -1;
}
return 0;
}
default:
break;
}
uint8_t *sd = av_packet_new_side_data(dp->avpacket,
AV_PKT_DATA_MATROSKA_BLOCKADDITIONAL,
8 + size);
if (!sd)
return -1;
AV_WB64(sd, id);
if (size > 0)
memcpy(sd + 8, data, size);
return 0;
}