demux, player: fix playback of sparse video streams (w/ still images)

Fixes several issues playing back mpegts with video streams marked as having "still images". For example, see this video which has frames only every 6s: https://s3.amazonaws.com/tmm1/music-choice.ts Changes include: - start playback right away, without waiting for first video frame - do not consider the sparse video stream in demuxer underrun detection - do not require multiple video frames for the VO - use audio as the master stream for demuxer metadata events - use audio stream for playback time Signed-off-by: Aman Gupta <aman@tmm1.net>
2025-02-16 04:07:08 +00:00 · 2018-05-02 19:29:11 -07:00 · 2018-05-02 19:29:11 -07:00 · 814869759c
commit 814869759c
parent b24bd4e570
6 changed files with 36 additions and 5 deletions
--- a/demux/demux.c
+++ b/demux/demux.c
@ -282,6 +282,7 @@ struct demux_stream {
    bool eager;             // try to keep at least 1 packet queued
                            // if false, this stream is disabled, or passively
                            // read (like subtitles)
    bool still_image;       // stream has still video images
    bool refreshing;        // finding old position after track switches
    bool eof;               // end of demuxed stream? (true if no more packets)
@ -703,8 +704,9 @@ static void update_stream_selection_state(struct demux_internal *in,
    for (int n = 0; n < in->num_streams; n++) {
        struct demux_stream *s = in->streams[n]->ds;
        s->still_image = s->sh->still_image;
        s->eager = s->selected && !s->sh->attached_picture;
-        if (s->eager) {
+        if (s->eager && !s->still_image) {
            any_av_streams |= s->type != STREAM_SUB;
            if (!master ||
                (master->type == STREAM_VIDEO && s->type == STREAM_AUDIO))
@ -2994,7 +2996,7 @@ static int cached_demux_control(struct demux_internal *in, int cmd, void *arg)
            struct demux_stream *ds = in->streams[n]->ds;
            if (ds->eager && !(!ds->queue->head && ds->eof) && !ds->ignore_eof)
            {
-                r->underrun |= !ds->reader_head && !ds->eof;
+                r->underrun |= !ds->reader_head && !ds->eof && !ds->still_image;
                r->ts_reader = MP_PTS_MAX(r->ts_reader, ds->base_ts);
                r->ts_end = MP_PTS_MAX(r->ts_end, ds->queue->last_ts);
                any_packets |= !!ds->reader_head;
--- a/demux/demux_lavf.c
+++ b/demux/demux_lavf.c
@ -53,6 +53,9 @@
 #ifndef AV_DISPOSITION_TIMED_THUMBNAILS
 #define AV_DISPOSITION_TIMED_THUMBNAILS 0
 #endif
 #ifndef AV_DISPOSITION_STILL_IMAGE
 #define AV_DISPOSITION_STILL_IMAGE 0
 #endif
 #define INITIAL_PROBE_SIZE STREAM_BUFFER_SIZE
 #define PROBE_BUF_SIZE FFMIN(STREAM_MAX_BUFFER_SIZE, 2 * 1024 * 1024)
@ -717,6 +720,8 @@ static void handle_new_stream(demuxer_t *demuxer, int i)
            sh->forced_track = true;
        if (st->disposition & AV_DISPOSITION_DEPENDENT)
            sh->dependent_track = true;
        if (st->disposition & AV_DISPOSITION_STILL_IMAGE)
            sh->still_image = true;
        if (priv->format_hack.use_stream_ids)
            sh->demuxer_id = st->id;
        AVDictionaryEntry *title = av_dict_get(st->metadata, "title", NULL, 0);
--- a/demux/stheader.h
+++ b/demux/stheader.h
@ -46,6 +46,7 @@ struct sh_stream {
    bool default_track;         // container default track flag
    bool forced_track;          // container forced track flag
    bool dependent_track;       // container dependent track flag
    bool still_image;           // video stream contains still images
    int hls_bitrate;
    struct mp_tags *tags;
--- a/player/core.h
+++ b/player/core.h
@ -180,6 +180,8 @@ struct vo_chain {
    // - video consists of a single picture, which should be shown only once
    // - do not sync audio to video in any way
    bool is_coverart;
    // - video consists of sparse still images
    bool is_sparse;
 };
 // Like vo_chain, for audio.
--- a/player/playloop.c
+++ b/player/playloop.c
@ -950,7 +950,9 @@ static void handle_dummy_ticks(struct MPContext *mpctx)
 // Update current playback time.
 static void handle_playback_time(struct MPContext *mpctx)
 {
-    if (mpctx->vo_chain && !mpctx->vo_chain->is_coverart &&
+    if (mpctx->vo_chain &&
        !mpctx->vo_chain->is_coverart &&
        !mpctx->vo_chain->is_sparse &&
        mpctx->video_status >= STATUS_PLAYING &&
        mpctx->video_status < STATUS_EOF)
    {
@ -986,6 +988,13 @@ static void handle_playback_restart(struct MPContext *mpctx)
 {
    struct MPOpts *opts = mpctx->opts;
    // Do not wait for video stream if it only has sparse frames.
    if (mpctx->vo_chain &&
        mpctx->vo_chain->is_sparse &&
        mpctx->video_status < STATUS_READY) {
        mpctx->video_status = STATUS_READY;
    }
    if (mpctx->audio_status < STATUS_READY ||
        mpctx->video_status < STATUS_READY)
        return;
@ -1008,7 +1017,9 @@ static void handle_playback_restart(struct MPContext *mpctx)
        }
        // Video needed, but not started yet -> wait.
-        if (mpctx->vo_chain && !mpctx->vo_chain->is_coverart &&
+        if (mpctx->vo_chain &&
            !mpctx->vo_chain->is_coverart &&
            !mpctx->vo_chain->is_sparse &&
            mpctx->video_status <= STATUS_READY)
            return;
--- a/player/video.c
+++ b/player/video.c
@ -256,6 +256,7 @@ void reinit_video_chain_src(struct MPContext *mpctx, struct track *track)
        vo_c->dec_src = track->dec->f->pins[0];
        vo_c->filter->container_fps = track->dec->fps;
        vo_c->is_coverart = !!track->stream->attached_picture;
        vo_c->is_sparse = track->stream->still_image;
        track->vo_c = vo_c;
        vo_c->track = track;
@ -365,9 +366,12 @@ static void handle_new_frame(struct MPContext *mpctx)
    double frame_time = 0;
    double pts = mpctx->next_frames[0]->pts;
    bool is_sparse = mpctx->vo_chain && mpctx->vo_chain->is_sparse;
    if (mpctx->video_pts != MP_NOPTS_VALUE) {
        frame_time = pts - mpctx->video_pts;
-        double tolerance = mpctx->demuxer->ts_resets_possible ? 5 : 1e4;
+        double tolerance = mpctx->demuxer->ts_resets_possible &&
                           !is_sparse ? 5 : 1e4;
        if (frame_time <= 0 || frame_time >= tolerance) {
            // Assume a discontinuity.
            MP_WARN(mpctx, "Invalid video timestamp: %f -> %f\n",
@ -403,6 +407,9 @@ static int get_req_frames(struct MPContext *mpctx, bool eof)
    if (mpctx->video_out->driver->caps & VO_CAP_NORETAIN)
        return 1;
    if (mpctx->vo_chain && mpctx->vo_chain->is_sparse)
        return 1;
    if (mpctx->opts->untimed || mpctx->video_out->driver->untimed)
        return 1;
@ -594,6 +601,9 @@ static void update_av_diff(struct MPContext *mpctx, double offset)
        mpctx->video_status != STATUS_PLAYING)
        return;
    if (mpctx->vo_chain && mpctx->vo_chain->is_sparse)
        return;
    double a_pos = playing_audio_pts(mpctx);
    if (a_pos != MP_NOPTS_VALUE && mpctx->video_pts != MP_NOPTS_VALUE) {
        mpctx->last_av_difference = a_pos - mpctx->video_pts