video, audio: always read all frames before getting next packet

The old code tried to make sure at all times to try to read a new packet. Only once that was read, it tried to retrieve new video or audio frames the decoder might already have decoded. Change this to strictly read frames from the decoder until it signals that it wants a new packet, and only then read and feed a new packet. This is in theory nicer, follows the libavcodec recommended data flow, and and reduces the minimum latency by 1 frame. This merely requires switching the order in which those calls are done. Normally, the decoder will return only 1 frame until a new packet is required. If we would just feed it 1 packet, return DATA_AGAIN, and wait until the next frame is decoded, we would run the playloop 1 time too often for no reason (which is fine but might have some overhead). To avoid this, try to read a frame again after possibly feeding a packet. For this reason, move the feed/read code to its own functions each, instead of merely moving the code. The audio and video code for this particular thing is basically duplicated. The idea is to unify them one day, so make the change to both. (Doing this for video is the real motivation for this change, see below.) The video code change is slightly more complicated, because we have to care about the framedrop counting (which is just a heuristic, but for now considered better than nothing, and possibly considered required to warn the user of framedrops happening - maybe). Apparently this change helps with stalling streams on Android with the mediacodec wrapper and mpeg2 decoder implementations which deinterlace on decoding (and return 2 frames per packet). Based on an idea and observations by tmm1.
2025-02-20 23:07:02 +00:00 · 2017-12-30 13:14:15 +01:00 · 2017-12-30 13:14:15 +01:00 · 33e5755c23
commit 33e5755c23
parent 2dd020efc2
3 changed files with 42 additions and 3 deletions
--- a/audio/decode/dec_audio.c
+++ b/audio/decode/dec_audio.c
@ -202,7 +202,7 @@ static bool is_new_segment(struct dec_audio *da, struct demux_packet *p)
        (p->start != da->start || p->end != da->end || p->codec != da->codec);
 }

-void audio_work(struct dec_audio *da)
+static void feed_packet(struct dec_audio *da)
 {
    if (da->current_frame || !da->ad_driver)
        return;
@ -228,6 +228,14 @@ void audio_work(struct dec_audio *da)
        da->packet = NULL;
    }

+    da->current_state = DATA_AGAIN;
+}
+
+static void read_frame(struct dec_audio *da)
+{
+    if (da->current_frame || !da->ad_driver)
+        return;
+
    bool progress = da->ad_driver->receive_frame(da, &da->current_frame);

    da->current_state = da->current_frame ? DATA_OK : DATA_AGAIN;
@ -271,6 +279,15 @@ void audio_work(struct dec_audio *da)
    }
 }

+void audio_work(struct dec_audio *da)
+{
+    read_frame(da);
+    if (!da->current_frame) {
+        feed_packet(da);
+        read_frame(da); // retry, to avoid redundant iterations
+    }
+}
+
 // Fetch an audio frame decoded with audio_work(). Returns one of:
 //  DATA_OK:    *out_frame is set to a new image
 //  DATA_WAIT:  waiting for demuxer; will receive a wakeup signal
--- a/video/decode/dec_video.c
+++ b/video/decode/dec_video.c
@ -68,6 +68,7 @@ void video_reset(struct dec_video *d_video)
    d_video->has_broken_decoded_pts = 0;
    d_video->last_format = d_video->fixed_format = (struct mp_image_params){0};
    d_video->dropped_frames = 0;
+    d_video->may_decoder_framedrop = false;
    d_video->current_state = DATA_AGAIN;
    mp_image_unrefp(&d_video->current_mpi);
    talloc_free(d_video->packet);
@ -400,7 +401,7 @@ static bool is_new_segment(struct dec_video *d_video, struct demux_packet *p)
         p->codec != d_video->codec);
 }

-void video_work(struct dec_video *d_video)
+static void feed_packet(struct dec_video *d_video)
 {
    if (d_video->current_mpi || !d_video->vd_driver)
        return;
@ -439,18 +440,29 @@ void video_work(struct dec_video *d_video)

        talloc_free(d_video->packet);
        d_video->packet = NULL;
+
+        d_video->may_decoder_framedrop = framedrop_type == 1;
    }

+    d_video->current_state = DATA_AGAIN;
+}
+
+static void read_frame(struct dec_video *d_video)
+{
+    if (d_video->current_mpi || !d_video->vd_driver)
+        return;
+
    bool progress = receive_frame(d_video, &d_video->current_mpi);

    d_video->current_state = DATA_OK;
    if (!progress) {
        d_video->current_state = DATA_EOF;
    } else if (!d_video->current_mpi) {
-        if (framedrop_type == 1)
+        if (d_video->may_decoder_framedrop)
            d_video->dropped_frames += 1;
        d_video->current_state = DATA_AGAIN;
    }
+    d_video->may_decoder_framedrop = false;

    bool segment_ended = d_video->current_state == DATA_EOF;

@ -487,6 +499,15 @@ void video_work(struct dec_video *d_video)
    }
 }

+void video_work(struct dec_video *d_video)
+{
+    read_frame(d_video);
+    if (!d_video->current_mpi) {
+        feed_packet(d_video);
+        read_frame(d_video); // retry, to avoid redundant iterations
+    }
+}
+
 // Fetch an image decoded with video_work(). Returns one of:
 //  DATA_OK:    *out_mpi is set to a new image
 //  DATA_WAIT:  waiting for demuxer; will receive a wakeup signal
--- a/video/decode/dec_video.h
+++ b/video/decode/dec_video.h
@ -77,6 +77,7 @@ struct dec_video {
    struct demux_packet *new_segment;
    struct demux_packet *packet;
    bool framedrop_enabled;
+    bool may_decoder_framedrop;
    struct mp_image *current_mpi;
    int current_state;
 };