encode: rewrite half of it

The main change is that we wait with opening the muxer ("writing headers") until we have data from all streams. This fixes race conditions at init due to broken assumptions in the old code. This also changes a lot of other stuff. I found and fixed a few API violations (often things for which better mechanisms were invented, and the old ones are not valid anymore). I try to get away from the public mutex and shared fields in encode_lavc_context. For now it's still needed for some timestamp-related fields, but most are gone. It also removes some bad code duplication between audio and video paths.
2018-04-22 19:40:36 +02:00 · 2018-04-22 19:40:36 +02:00 · 6c8362ef54
parent 8135e25600
commit 6c8362ef54
11 changed files with 808 additions and 1168 deletions
--- a/DOCS/man/encode.rst
+++ b/DOCS/man/encode.rst
@ -8,9 +8,8 @@ You can encode files from one format/codec to another using this facility.

 ``--of=<format>``
    Specifies the output format (overrides autodetection by the file name
-    extension of the file specified by ``-o``). This can be a comma separated
-    list of possible formats to try. See ``--of=help`` for a full list of
-    supported formats.
+    extension of the file specified by ``-o``). See ``--of=help`` for a full
+    list of supported formats.

 ``--ofopts=<options>``
    Specifies the output format options for libavformat.
@ -59,9 +58,8 @@ You can encode files from one format/codec to another using this facility.
    avoid ``--oautofps``.

 ``--oac=<codec>``
-    Specifies the output audio codec. This can be a comma separated list of
-    possible codecs to try. See ``--oac=help`` for a full list of supported
-    codecs.
+    Specifies the output audio codec. See ``--oac=help`` for a full list of
+    supported codecs.

 ``--oaoffset=<value>``
    Shifts audio data by the given time (in seconds) by adding/removing
@ -97,9 +95,8 @@ You can encode files from one format/codec to another using this facility.
    By default, the order is unspecified. Deprecated.

 ``--ovc=<codec>``
-    Specifies the output video codec. This can be a comma separated list of
-    possible codecs to try. See ``--ovc=help`` for a full list of supported
-    codecs.
+    Specifies the output video codec. See ``--ovc=help`` for a full list of
+    supported codecs.

 ``--ovoffset=<value>``
    Shifts video data by the given time (in seconds) by shifting the pts
--- a/audio/out/ao_lavc.c
+++ b/audio/out/ao_lavc.c
@ -40,8 +40,8 @@
 #include "common/encode_lavc.h"

 struct priv {
-    AVStream *stream;
-    AVCodecContext *codec;
+    struct encoder_context *enc;
+
    int pcmhack;
    int aframesize;
    int aframecount;
@ -53,14 +53,13 @@ struct priv {
    double expected_next_pts;

    AVRational worst_time_base;
-    int worst_time_base_is_stream;

    bool shutdown;
 };

 static void encode(struct ao *ao, double apts, void **data);

-static bool supports_format(AVCodec *codec, int format)
+static bool supports_format(const AVCodec *codec, int format)
 {
    for (const enum AVSampleFormat *sampleformat = codec->sample_fmts;
         sampleformat && *sampleformat != AV_SAMPLE_FMT_NONE;
@ -72,7 +71,7 @@ static bool supports_format(AVCodec *codec, int format)
    return false;
 }

-static void select_format(struct ao *ao, AVCodec *codec)
+static void select_format(struct ao *ao, const AVCodec *codec)
 {
    int formats[AF_FORMAT_COUNT + 1];
    af_get_best_sample_formats(ao->format, formats);
@ -88,70 +87,53 @@ static void select_format(struct ao *ao, AVCodec *codec)
 // open & setup audio device
 static int init(struct ao *ao)
 {
-    struct priv *ac = talloc_zero(ao, struct priv);
-    AVCodec *codec;
+    struct priv *ac = ao->priv;

-    ao->priv = ac;
-
-    if (!encode_lavc_available(ao->encode_lavc_ctx)) {
-        MP_ERR(ao, "the option --o (output file) must be specified\n");
+    ac->enc = encoder_context_alloc(ao->encode_lavc_ctx, STREAM_AUDIO, ao->log);
+    if (!ac->enc)
        return -1;
-    }
+    talloc_steal(ac, ac->enc);

-    pthread_mutex_lock(&ao->encode_lavc_ctx->lock);
-
-    if (encode_lavc_alloc_stream(ao->encode_lavc_ctx,
-                                 AVMEDIA_TYPE_AUDIO,
-                                 &ac->stream, &ac->codec) < 0)
-    {
-        MP_ERR(ao, "could not get a new audio stream\n");
-        goto fail;
-    }
-
-    codec = ao->encode_lavc_ctx->ac;
+    AVCodecContext *encoder = ac->enc->encoder;
+    const AVCodec *codec = encoder->codec;

    int samplerate = af_select_best_samplerate(ao->samplerate,
                                               codec->supported_samplerates);
    if (samplerate > 0)
        ao->samplerate = samplerate;

-    // TODO: Remove this redundancy with encode_lavc_alloc_stream also
-    // setting the time base.
-    // Using codec->time_base is deprecated, but needed for older lavf.
-    ac->stream->time_base.num = 1;
-    ac->stream->time_base.den = ao->samplerate;
-    ac->codec->time_base.num = 1;
-    ac->codec->time_base.den = ao->samplerate;
+    encoder->time_base.num = 1;
+    encoder->time_base.den = ao->samplerate;

-    ac->codec->sample_rate = ao->samplerate;
+    encoder->sample_rate = ao->samplerate;

    struct mp_chmap_sel sel = {0};
    mp_chmap_sel_add_any(&sel);
    if (!ao_chmap_sel_adjust2(ao, &sel, &ao->channels, false))
        goto fail;
    mp_chmap_reorder_to_lavc(&ao->channels);
-    ac->codec->channels = ao->channels.num;
-    ac->codec->channel_layout = mp_chmap_to_lavc(&ao->channels);
+    encoder->channels = ao->channels.num;
+    encoder->channel_layout = mp_chmap_to_lavc(&ao->channels);

-    ac->codec->sample_fmt = AV_SAMPLE_FMT_NONE;
+    encoder->sample_fmt = AV_SAMPLE_FMT_NONE;

    select_format(ao, codec);

    ac->sample_size = af_fmt_to_bytes(ao->format);
-    ac->codec->sample_fmt = af_to_avformat(ao->format);
-    ac->codec->bits_per_raw_sample = ac->sample_size * 8;
+    encoder->sample_fmt = af_to_avformat(ao->format);
+    encoder->bits_per_raw_sample = ac->sample_size * 8;

-    if (encode_lavc_open_codec(ao->encode_lavc_ctx, ac->codec) < 0)
+    if (!encoder_init_codec_and_muxer(ac->enc))
        goto fail;

    ac->pcmhack = 0;
-    if (ac->codec->frame_size <= 1)
-        ac->pcmhack = av_get_bits_per_sample(ac->codec->codec_id) / 8;
+    if (encoder->frame_size <= 1)
+        ac->pcmhack = av_get_bits_per_sample(encoder->codec_id) / 8;

    if (ac->pcmhack) {
        ac->aframesize = 16384; // "enough"
    } else {
-        ac->aframesize = ac->codec->frame_size;
+        ac->aframesize = encoder->frame_size;
    }

    // enough frames for at least 0.25 seconds
@ -169,7 +151,6 @@ static int init(struct ao *ao)
    if (ao->channels.num > AV_NUM_DATA_POINTERS)
        goto fail;

-    pthread_mutex_unlock(&ao->encode_lavc_ctx->lock);
    return 0;

 fail:
@ -184,28 +165,17 @@ static void uninit(struct ao *ao)
    struct priv *ac = ao->priv;
    struct encode_lavc_context *ectx = ao->encode_lavc_ctx;

-    if (!ac || ac->shutdown)
-        return;
+    if (!ac->shutdown) {
+        double outpts = ac->expected_next_pts;

        pthread_mutex_lock(&ectx->lock);
-
-    if (!encode_lavc_start(ectx)) {
-        MP_WARN(ao, "not even ready to encode audio at end -> dropped\n");
-        pthread_mutex_unlock(&ectx->lock);
-        return;
-    }
-
-    if (ac->stream) {
-        double outpts = ac->expected_next_pts;
-        if (!ectx->options->rawts && ectx->options->copyts)
+        if (!ac->enc->options->rawts && ac->enc->options->copyts)
            outpts += ectx->discontinuity_pts_offset;
-        outpts += encode_lavc_getoffset(ectx, ac->codec);
+        pthread_mutex_unlock(&ectx->lock);
+
+        outpts += encoder_get_offset(ac->enc);
        encode(ao, outpts, NULL);
    }
-
-    pthread_mutex_unlock(&ectx->lock);
-
-    ac->shutdown = true;
 }

 // return: how many samples can be played without blocking
@ -216,106 +186,21 @@ static int get_space(struct ao *ao)
    return ac->aframesize * ac->framecount;
 }

-static void write_packet(struct ao *ao, AVPacket *packet)
-{
-    // TODO: Can we unify this with the equivalent video code path?
-    struct priv *ac = ao->priv;
-
-    packet->stream_index = ac->stream->index;
-    if (packet->pts != AV_NOPTS_VALUE) {
-        packet->pts = av_rescale_q(packet->pts,
-                                   ac->codec->time_base,
-                                   ac->stream->time_base);
-    } else {
-        // Do we need this at all? Better be safe than sorry...
-        MP_WARN(ao, "encoder lost pts, why?\n");
-        if (ac->savepts != MP_NOPTS_VALUE) {
-            packet->pts = av_rescale_q(ac->savepts,
-                                       ac->codec->time_base,
-                                       ac->stream->time_base);
-        }
-    }
-    if (packet->dts != AV_NOPTS_VALUE) {
-        packet->dts = av_rescale_q(packet->dts,
-                                   ac->codec->time_base,
-                                   ac->stream->time_base);
-    }
-    if (packet->duration > 0) {
-        packet->duration = av_rescale_q(packet->duration,
-                                        ac->codec->time_base,
-                                        ac->stream->time_base);
-    }
-
-    ac->savepts = AV_NOPTS_VALUE;
-
-    if (encode_lavc_write_frame(ao->encode_lavc_ctx, ac->stream, packet) < 0) {
-        MP_ERR(ao, "error writing at %d %d/%d\n",
-               (int) packet->pts,
-               ac->stream->time_base.num,
-               ac->stream->time_base.den);
-        return;
-    }
-}
-
-static void encode_audio_and_write(struct ao *ao, AVFrame *frame)
-{
-    // TODO: Can we unify this with the equivalent video code path?
-    struct priv *ac = ao->priv;
-    AVPacket packet = {0};
-
-    int status = avcodec_send_frame(ac->codec, frame);
-    if (status < 0) {
-        MP_ERR(ao, "error encoding at %d %d/%d\n",
-               frame ? (int) frame->pts : -1,
-               ac->codec->time_base.num,
-               ac->codec->time_base.den);
-        return;
-    }
-
-    for (;;) {
-        av_init_packet(&packet);
-        status = avcodec_receive_packet(ac->codec, &packet);
-        if (status == AVERROR(EAGAIN)) { // No more packets for now.
-            if (frame == NULL) {
-                MP_ERR(ao, "sent flush frame, got EAGAIN");
-            }
-            break;
-        }
-        if (status == AVERROR_EOF) { // No more packets, ever.
-            if (frame != NULL) {
-                MP_ERR(ao, "sent audio frame, got EOF");
-            }
-            break;
-        }
-        if (status < 0) {
-            MP_ERR(ao, "error encoding at %d %d/%d\n",
-                   frame ? (int) frame->pts : -1,
-                   ac->codec->time_base.num,
-                   ac->codec->time_base.den);
-            break;
-        }
-        if (frame) {
-            if (ac->savepts == AV_NOPTS_VALUE)
-                ac->savepts = frame->pts;
-        }
-        encode_lavc_write_stats(ao->encode_lavc_ctx, ac->codec);
-        write_packet(ao, &packet);
-        av_packet_unref(&packet);
-    }
-}
-
 // must get exactly ac->aframesize amount of data
 static void encode(struct ao *ao, double apts, void **data)
 {
    struct priv *ac = ao->priv;
    struct encode_lavc_context *ectx = ao->encode_lavc_ctx;
+    AVCodecContext *encoder = ac->enc->encoder;
    double realapts = ac->aframecount * (double) ac->aframesize /
                      ao->samplerate;

    ac->aframecount++;

+    pthread_mutex_lock(&ectx->lock);
    if (data)
        ectx->audio_pts_offset = realapts - apts;
+    pthread_mutex_unlock(&ectx->lock);

    if(data) {
        AVFrame *frame = av_frame_alloc();
@ -329,17 +214,17 @@ static void encode(struct ao *ao, double apts, void **data)

        frame->linesize[0] = frame->nb_samples * ao->sstride;

-        if (ectx->options->rawts || ectx->options->copyts) {
+        if (ac->enc->options->rawts || ac->enc->options->copyts) {
            // real audio pts
-            frame->pts = floor(apts * ac->codec->time_base.den /
-                               ac->codec->time_base.num + 0.5);
+            frame->pts = floor(apts * encoder->time_base.den /
+                               encoder->time_base.num + 0.5);
        } else {
            // audio playback time
-            frame->pts = floor(realapts * ac->codec->time_base.den /
-                               ac->codec->time_base.num + 0.5);
+            frame->pts = floor(realapts * encoder->time_base.den /
+                               encoder->time_base.num + 0.5);
        }

-        int64_t frame_pts = av_rescale_q(frame->pts, ac->codec->time_base,
+        int64_t frame_pts = av_rescale_q(frame->pts, encoder->time_base,
                                         ac->worst_time_base);
        if (ac->lastpts != AV_NOPTS_VALUE && frame_pts <= ac->lastpts) {
            // this indicates broken video
@ -348,15 +233,15 @@ static void encode(struct ao *ao, double apts, void **data)
                    (int)frame->pts, (int)ac->lastpts);
            frame_pts = ac->lastpts + 1;
            frame->pts = av_rescale_q(frame_pts, ac->worst_time_base,
-                                      ac->codec->time_base);
+                                      encoder->time_base);
        }
        ac->lastpts = frame_pts;

-        frame->quality = ac->codec->global_quality;
-        encode_audio_and_write(ao, frame);
+        frame->quality = encoder->global_quality;
+        encoder_encode(ac->enc, frame);
        av_frame_free(&frame);
    } else {
-        encode_audio_and_write(ao, NULL);
+        encoder_encode(ac->enc, NULL);
    }
 }

@ -365,20 +250,16 @@ static void encode(struct ao *ao, double apts, void **data)
 static int play(struct ao *ao, void **data, int samples, int flags)
 {
    struct priv *ac = ao->priv;
+    struct encoder_context *enc = ac->enc;
    struct encode_lavc_context *ectx = ao->encode_lavc_ctx;
    int bufpos = 0;
    double nextpts;
    double outpts;
    int orig_samples = samples;

+    // for ectx PTS fields
    pthread_mutex_lock(&ectx->lock);

-    if (!encode_lavc_start(ectx)) {
-        MP_WARN(ao, "not ready yet for encoding audio\n");
-        pthread_mutex_unlock(&ectx->lock);
-        return 0;
-    }
-
    double pts = ectx->last_audio_in_pts;
    pts += ectx->samples_since_last_pts / (double)ao->samplerate;

@ -407,26 +288,10 @@ static int play(struct ao *ao, void **data, int samples, int flags)
    }

    if (ac->worst_time_base.den == 0) {
-        if (ac->codec->time_base.num * (double) ac->stream->time_base.den >=
-                ac->stream->time_base.num * (double) ac->codec->time_base.den) {
-            MP_VERBOSE(ao, "NOTE: using codec time base (%d/%d) for pts "
-                       "adjustment; the stream base (%d/%d) is not worse.\n",
-                       (int)ac->codec->time_base.num,
-                       (int)ac->codec->time_base.den,
-                       (int)ac->stream->time_base.num,
-                       (int)ac->stream->time_base.den);
-            ac->worst_time_base = ac->codec->time_base;
-            ac->worst_time_base_is_stream = 0;
-        } else {
-            MP_WARN(ao, "NOTE: not using codec time base (%d/%d) for pts "
-                    "adjustment; the stream base (%d/%d) is worse.\n",
-                    (int)ac->codec->time_base.num,
-                    (int)ac->codec->time_base.den,
-                    (int)ac->stream->time_base.num,
-                    (int)ac->stream->time_base.den);
-            ac->worst_time_base = ac->stream->time_base;
-            ac->worst_time_base_is_stream = 1;
-        }
+        // We don't know the muxer time_base anymore, and can't, because we
+        // might start encoding before the muxer is opened. (The muxer decides
+        // the final AVStream.time_base when opening the muxer.)
+        ac->worst_time_base = enc->encoder->time_base;

        // NOTE: we use the following "axiom" of av_rescale_q:
        // if time base A is worse than time base B, then
@ -446,7 +311,7 @@ static int play(struct ao *ao, void **data, int samples, int flags)
    }

    // Fix and apply the discontinuity pts offset.
-    if (!ectx->options->rawts && ectx->options->copyts) {
+    if (!enc->options->rawts && enc->options->copyts) {
        // fix the discontinuity pts offset
        nextpts = pts;
        if (ectx->discontinuity_pts_offset == MP_NOPTS_VALUE) {
@ -465,8 +330,10 @@ static int play(struct ao *ao, void **data, int samples, int flags)
        outpts = pts;
    }

+    pthread_mutex_unlock(&ectx->lock);
+
    // Shift pts by the pts offset first.
-    outpts += encode_lavc_getoffset(ectx, ac->codec);
+    outpts += encoder_get_offset(enc);

    while (samples - bufpos >= ac->aframesize) {
        void *start[MP_NUM_CHANNELS] = {0};
@ -479,8 +346,10 @@ static int play(struct ao *ao, void **data, int samples, int flags)
    // Calculate expected pts of next audio frame (input side).
    ac->expected_next_pts = pts + bufpos / (double) ao->samplerate;

+    pthread_mutex_lock(&ectx->lock);
+
    // Set next allowed input pts value (input side).
-    if (!ectx->options->rawts && ectx->options->copyts) {
+    if (!enc->options->rawts && enc->options->copyts) {
        nextpts = ac->expected_next_pts + ectx->discontinuity_pts_offset;
        if (nextpts > ectx->next_in_pts)
            ectx->next_in_pts = nextpts;
@ -513,6 +382,7 @@ const struct ao_driver audio_out_lavc = {
    .encode = true,
    .description = "audio encoding using libavcodec",
    .name      = "lavc",
+    .priv_size = sizeof(struct priv),
    .init      = init,
    .uninit    = uninit,
    .get_space = get_space,
--- a/common/common.h
+++ b/common/common.h
@ -106,4 +106,15 @@ char *mp_tprintf_buf(char *buf, size_t buf_size, const char *format, ...)

 char **mp_dup_str_array(void *tctx, char **s);

+// We generally do not handle allocation failure of small malloc()s. This would
+// create a large number of rarely tested code paths, which would probably
+// regress and cause security issues. We prefer to fail fast.
+// This macro generally behaves like an assert(), except it will make sure to
+// kill the process even with NDEBUG.
+#define MP_HANDLE_OOM(x) do {   \
+        assert(x);              \
+        if (!(x))               \
+            abort();            \
+    } while (0)
+
 #endif /* MPLAYER_MPCOMMON_H */
--- a/common/encode.h
+++ b/common/encode.h
@ -54,17 +54,16 @@ struct encode_opts {
    char **remove_metadata;
 };

-// interface for mplayer.c
-struct encode_lavc_context *encode_lavc_init(struct encode_opts *options,
-                                             struct mpv_global *global);
-void encode_lavc_free(struct encode_lavc_context *ctx);
+// interface for player core
+struct encode_lavc_context *encode_lavc_init(struct mpv_global *global);
+bool encode_lavc_free(struct encode_lavc_context *ctx);
 void encode_lavc_discontinuity(struct encode_lavc_context *ctx);
 bool encode_lavc_showhelp(struct mp_log *log, struct encode_opts *options);
 int encode_lavc_getstatus(struct encode_lavc_context *ctx, char *buf, int bufsize, float relative_position);
-void encode_lavc_expect_stream(struct encode_lavc_context *ctx, int mt);
+void encode_lavc_expect_stream(struct encode_lavc_context *ctx,
+                               enum stream_type type);
 void encode_lavc_set_metadata(struct encode_lavc_context *ctx,
                              struct mp_tags *metadata);
-void encode_lavc_set_video_fps(struct encode_lavc_context *ctx, float fps);
 void encode_lavc_set_audio_pts(struct encode_lavc_context *ctx, double pts);
 bool encode_lavc_didfail(struct encode_lavc_context *ctx); // check if encoding failed

--- a/common/encode_lavc.c
+++ b/common/encode_lavc.c
--- a/common/encode_lavc.h
+++ b/common/encode_lavc.h
@ -31,43 +31,26 @@
 #include <libavutil/opt.h>
 #include <libavutil/mathematics.h>

+#include "common/common.h"
 #include "encode.h"
 #include "video/csputils.h"

 struct encode_lavc_context {
+    // --- Immutable after init
    struct mpv_global *global;
    struct encode_opts *options;
    struct mp_log *log;
-    struct mp_tags *metadata;
+    struct encode_priv *priv;
+    AVOutputFormat *oformat;
+    const char *filename;

    // All entry points must be guarded with the lock. Functions called by
    // the playback core lock this automatically, but ao_lavc.c and vo_lavc.c
    // must lock manually before accessing state.
    pthread_mutex_t lock;

-    float vo_fps;
-
-    // FFmpeg contexts.
-    AVFormatContext *avc;
-    AVStream *vst;
-    AVStream *ast;
-    AVCodecContext *vcc;
-    AVCodecContext *acc;
-
-    // these are processed from the options
-    AVRational timebase;
-    AVCodec *vc;
-    AVCodec *ac;
-    AVDictionary *foptions;
-    AVDictionary *aoptions;
-    AVDictionary *voptions;
-
-    // values created during encoding
-    int header_written; // -1 means currently writing
-
    // sync to audio mode
    double audio_pts_offset;
-    double last_video_in_pts;

    double last_audio_in_pts;
    int64_t samples_since_last_pts;
@ -75,40 +58,54 @@ struct encode_lavc_context {
    // anti discontinuity mode
    double next_in_pts;
    double discontinuity_pts_offset;
-
-    long long abytes;
-    long long vbytes;
-    struct stream *twopass_bytebuffer_a;
-    struct stream *twopass_bytebuffer_v;
-    double t0;
-    unsigned int frames;
-    double audioseconds;
-
-    bool expect_video;
-    bool expect_audio;
-    bool video_first;
-    bool audio_first;
-
-    // has encoding failed?
-    bool failed;
 };

-// interface for vo/ao drivers
-int encode_lavc_alloc_stream(struct encode_lavc_context *ctx,
-                             enum AVMediaType mt, AVStream **stream_out,
-                             AVCodecContext **codec_out);
-void encode_lavc_write_stats(struct encode_lavc_context *ctx,
-                             AVCodecContext *stream);
-int encode_lavc_write_frame(struct encode_lavc_context *ctx, AVStream *stream,
-                            AVPacket *packet);
-int encode_lavc_supports_pixfmt(struct encode_lavc_context *ctx, enum AVPixelFormat format);
-int encode_lavc_open_codec(struct encode_lavc_context *ctx,
-                           AVCodecContext *codec);
-int encode_lavc_available(struct encode_lavc_context *ctx);
-int encode_lavc_timesyncfailed(struct encode_lavc_context *ctx);
-int encode_lavc_start(struct encode_lavc_context *ctx); // returns 1 on success
-double encode_lavc_getoffset(struct encode_lavc_context *ctx,
-                             AVCodecContext *codec);
-void encode_lavc_fail(struct encode_lavc_context *ctx, const char *format, ...); // report failure of encoding
+// --- interface for vo/ao drivers
+
+// Static information after encoder init. This never changes (even if there are
+// dynamic runtime changes, they have to work over AVPacket side data).
+// For use in encoder_context, most fields are copied from encoder_context.encoder
+// by encoder_init_codec_and_muxer().
+struct encoder_stream_info {
+    AVRational timebase; // timebase used by the encoder (in frames/out packets)
+    AVCodecParameters *codecpar;
+};
+
+// The encoder parts for each stream (no muxing parts included).
+// This is private to each stream.
+struct encoder_context {
+    struct mpv_global *global;
+    struct encode_opts *options;
+    struct mp_log *log;
+    AVOutputFormat *oformat;
+
+    // (avoid using this)
+    struct encode_lavc_context *encode_lavc_ctx;
+
+    enum stream_type type;
+
+    // (different access restrictions before/after encoder init)
+    struct encoder_stream_info info;
+    AVCodecContext *encoder;
+    struct mux_stream *mux_stream;
+
+    struct stream *twopass_bytebuffer;
+};
+
+// Free with talloc_free(). (Keep in mind actual deinitialization requires
+// sending a flush packet.)
+// This can fail and return NULL.
+struct encoder_context *encoder_context_alloc(struct encode_lavc_context *ctx,
+                                              enum stream_type type,
+                                              struct mp_log *log);
+
+// After setting your codec parameters on p->encoder, you call this to "open"
+// the encoder. This also initializes p->mux_stream. Returns false on failure.
+bool encoder_init_codec_and_muxer(struct encoder_context *p);
+
+// Encode the frame and write the packet. frame is ref'ed as need.
+bool encoder_encode(struct encoder_context *p, AVFrame *frame);
+
+double encoder_get_offset(struct encoder_context *p);

 #endif
--- a/etc/encoding-profiles.conf
+++ b/etc/encoding-profiles.conf
@ -108,7 +108,7 @@ of = 3gp
 ocopyts = yes
 profile = enc-v-h263
 profile = enc-a-aac
-ofopts-clr = yes
+ofopts = ""

 [enc-f-avi]
 profile-desc = "MPEG-4 + MP3 (for AVI)"
@ -117,7 +117,7 @@ ocopyts = no
 oautofps = yes
 profile = enc-v-mpeg4
 profile = enc-a-mp3
-ofopts-clr = yes
+ofopts = ""

 [enc-f-mp4]
 profile-desc = "H.264 + AAC (for MP4)"
@ -135,7 +135,7 @@ of = webm
 ocopyts = yes
 profile = enc-v-vp9
 profile = enc-a-opus
-ofopts-clr = yes
+ofopts = ""

 ##################
 # target devices #
--- a/player/loadfile.c
+++ b/player/loadfile.c
@ -1302,9 +1302,9 @@ reopen_file:

 #if HAVE_ENCODING
    if (mpctx->encode_lavc_ctx && mpctx->current_track[0][STREAM_VIDEO])
-        encode_lavc_expect_stream(mpctx->encode_lavc_ctx, AVMEDIA_TYPE_VIDEO);
+        encode_lavc_expect_stream(mpctx->encode_lavc_ctx, STREAM_VIDEO);
    if (mpctx->encode_lavc_ctx && mpctx->current_track[0][STREAM_AUDIO])
-        encode_lavc_expect_stream(mpctx->encode_lavc_ctx, AVMEDIA_TYPE_AUDIO);
+        encode_lavc_expect_stream(mpctx->encode_lavc_ctx, STREAM_AUDIO);
    if (mpctx->encode_lavc_ctx) {
        encode_lavc_set_metadata(mpctx->encode_lavc_ctx,
                                 mpctx->demuxer->metadata);
@ -1556,6 +1556,20 @@ void mp_play_files(struct MPContext *mpctx)
    }

    cancel_open(mpctx);
+
+#if HAVE_ENCODING
+    if (mpctx->encode_lavc_ctx) {
+        // Make sure all streams get finished.
+        uninit_audio_out(mpctx);
+        uninit_video_out(mpctx);
+
+        if (!encode_lavc_free(mpctx->encode_lavc_ctx))
+            mpctx->stop_play = PT_ERROR;
+
+        mpctx->encode_lavc_ctx = NULL;
+    }
+#endif
+
 }

 // Abort current playback and set the given entry to play next.
--- a/player/main.c
+++ b/player/main.c
@ -165,10 +165,10 @@ void mp_destroy(struct MPContext *mpctx)
    uninit_video_out(mpctx);

 #if HAVE_ENCODING
+    // If it's still set here, it's an error.
    encode_lavc_free(mpctx->encode_lavc_ctx);
-#endif
-
    mpctx->encode_lavc_ctx = NULL;
+#endif

    command_uninit(mpctx);

@ -416,8 +416,7 @@ int mp_initialize(struct MPContext *mpctx, char **options)

 #if HAVE_ENCODING
    if (opts->encode_opts->file && opts->encode_opts->file[0]) {
-        mpctx->encode_lavc_ctx = encode_lavc_init(opts->encode_opts,
-                                                  mpctx->global);
+        mpctx->encode_lavc_ctx = encode_lavc_init(mpctx->global);
        if(!mpctx->encode_lavc_ctx) {
            MP_INFO(mpctx, "Encoding initialization failed.\n");
            return -1;
--- a/player/video.c
+++ b/player/video.c
@ -263,13 +263,6 @@ void reinit_video_chain_src(struct MPContext *mpctx, struct track *track)
        mp_pin_connect(vo_c->filter->f->pins[0], vo_c->dec_src);
    }

-#if HAVE_ENCODING
-    if (mpctx->encode_lavc_ctx) {
-        encode_lavc_set_video_fps(mpctx->encode_lavc_ctx,
-                                  vo_c->filter->container_fps);
-    }
-#endif
-
    if (!recreate_video_filters(mpctx))
        goto err_out;

--- a/video/out/vo_lavc.c
+++ b/video/out/vo_lavc.c
@ -36,9 +36,7 @@
 #include "sub/osd.h"

 struct priv {
-    AVStream *stream;
-    AVCodecContext *codec;
-    int have_first_packet;
+    struct encoder_context *enc;

    int harddup;

@ -51,70 +49,59 @@ struct priv {
    mp_image_t *lastimg;
    int lastdisplaycount;

+    double last_video_in_pts;
+
    AVRational worst_time_base;
-    int worst_time_base_is_stream;

    bool shutdown;
 };

-static void draw_image_unlocked(struct vo *vo, mp_image_t *mpi);
+static void draw_image(struct vo *vo, mp_image_t *mpi);

 static int preinit(struct vo *vo)
 {
-    struct priv *vc;
-    if (!encode_lavc_available(vo->encode_lavc_ctx)) {
-        MP_ERR(vo, "the option --o (output file) must be specified\n");
+    struct priv *vc = vo->priv;
+    vc->enc = encoder_context_alloc(vo->encode_lavc_ctx, STREAM_VIDEO, vo->log);
+    if (!vc->enc)
        return -1;
-    }
-    vo->priv = talloc_zero(vo, struct priv);
-    vc = vo->priv;
-    vc->harddup = vo->encode_lavc_ctx->options->harddup;
+    talloc_steal(vc, vc->enc);
+    vc->harddup = vc->enc->options->harddup;
+    vc->last_video_in_pts = MP_NOPTS_VALUE;
    return 0;
 }

 static void uninit(struct vo *vo)
 {
    struct priv *vc = vo->priv;
-    if (!vc || vc->shutdown)
-        return;

-    pthread_mutex_lock(&vo->encode_lavc_ctx->lock);
-
-    if (vc->lastipts >= 0 && vc->stream)
-        draw_image_unlocked(vo, NULL);
+    if (vc->lastipts >= 0 && !vc->shutdown)
+        draw_image(vo, NULL);

    mp_image_unrefp(&vc->lastimg);
-
-    pthread_mutex_unlock(&vo->encode_lavc_ctx->lock);
-
-    vc->shutdown = true;
 }

-static int reconfig(struct vo *vo, struct mp_image_params *params)
+static int reconfig2(struct vo *vo, struct mp_image *img)
 {
    struct priv *vc = vo->priv;
+    struct encode_lavc_context *ctx = vo->encode_lavc_ctx;
+    AVCodecContext *encoder = vc->enc->encoder;
+
+    struct mp_image_params *params = &img->params;
    enum AVPixelFormat pix_fmt = imgfmt2pixfmt(params->imgfmt);
    AVRational aspect = {params->p_w, params->p_h};
    int width = params->w;
    int height = params->h;

-    if (!vc || vc->shutdown)
+    if (vc->shutdown)
        return -1;

-    pthread_mutex_lock(&vo->encode_lavc_ctx->lock);
-
-    if (vc->stream) {
-        if (width == vc->codec->width && height == vc->codec->height) {
-            if (aspect.num != vc->codec->sample_aspect_ratio.num ||
-                aspect.den != vc->codec->sample_aspect_ratio.den)
+    if (avcodec_is_open(encoder)) {
+        if (width == encoder->width && height == encoder->height &&
+            pix_fmt == encoder->pix_fmt)
        {
-                /* aspect-only changes are not critical */
-                MP_WARN(vo, "unsupported pixel aspect ratio change from %d:%d to %d:%d\n",
-                        vc->codec->sample_aspect_ratio.num,
-                        vc->codec->sample_aspect_ratio.den,
-                        aspect.num, aspect.den);
-            }
-            goto done;
+            // consider these changes not critical
+            MP_ERR(vo, "Ignoring mid-stream parameter changes!\n");
+            return 0;
        }

        /* FIXME Is it possible with raw video? */
@ -128,7 +115,7 @@ static int reconfig(struct vo *vo, struct mp_image_params *params)
    // - Second calls after reconfigure() already failed once fail (due to the
    //   vc->shutdown check above).
    // - Second calls after reconfigure() already succeeded once return early
-    //   (due to the vc->stream check above).
+    //   (due to the avcodec_is_open() check above).

    vc->lastipts = AV_NOPTS_VALUE;
    vc->lastframeipts = AV_NOPTS_VALUE;
@ -140,138 +127,78 @@ static int reconfig(struct vo *vo, struct mp_image_params *params)
        goto error;
    }

-    if (encode_lavc_alloc_stream(vo->encode_lavc_ctx,
-                                 AVMEDIA_TYPE_VIDEO,
-                                 &vc->stream, &vc->codec) < 0)
-        goto error;
-    vc->stream->sample_aspect_ratio = vc->codec->sample_aspect_ratio = aspect;
-    vc->codec->width = width;
-    vc->codec->height = height;
-    vc->codec->pix_fmt = pix_fmt;
-    vc->codec->colorspace = mp_csp_to_avcol_spc(params->color.space);
-    vc->codec->color_range = mp_csp_levels_to_avcol_range(params->color.levels);
+    encoder->sample_aspect_ratio = aspect;
+    encoder->width = width;
+    encoder->height = height;
+    encoder->pix_fmt = pix_fmt;
+    encoder->colorspace = mp_csp_to_avcol_spc(params->color.space);
+    encoder->color_range = mp_csp_levels_to_avcol_range(params->color.levels);

-    if (encode_lavc_open_codec(vo->encode_lavc_ctx, vc->codec) < 0)
+    AVRational tb;
+
+    if (ctx->options->fps > 0) {
+        tb = av_d2q(ctx->options->fps, ctx->options->fps * 1001 + 2);
+    } else if (ctx->options->autofps && img->nominal_fps > 0) {
+        tb = av_d2q(img->nominal_fps, img->nominal_fps * 1001 + 2);
+        MP_INFO(vo, "option --ofps not specified "
+                "but --oautofps is active, using guess of %u/%u\n",
+                (unsigned)tb.num, (unsigned)tb.den);
+    } else {
+        // we want to handle:
+        //      1/25
+        //   1001/24000
+        //   1001/30000
+        // for this we would need 120000fps...
+        // however, mpeg-4 only allows 16bit values
+        // so let's take 1001/30000 out
+        tb.num = 24000;
+        tb.den = 1;
+        MP_INFO(vo, "option --ofps not specified "
+                "and fps could not be inferred, using guess of %u/%u\n",
+                (unsigned)tb.num, (unsigned)tb.den);
+    }
+
+    const AVRational *rates = encoder->codec->supported_framerates;
+    if (rates && rates[0].den)
+        tb = rates[av_find_nearest_q_idx(tb, rates)];
+
+    encoder->time_base = av_inv_q(tb);
+
+    if (!encoder_init_codec_and_muxer(vc->enc))
        goto error;

-done:
-    pthread_mutex_unlock(&vo->encode_lavc_ctx->lock);
    return 0;

 error:
-    pthread_mutex_unlock(&vo->encode_lavc_ctx->lock);
    vc->shutdown = true;
    return -1;
 }

 static int query_format(struct vo *vo, int format)
 {
+    struct priv *vc = vo->priv;
+
    enum AVPixelFormat pix_fmt = imgfmt2pixfmt(format);
+    const enum AVPixelFormat *p = vc->enc->encoder->codec->pix_fmts;
+
+    if (!p)
+        return 1;
+
+    while (*p != AV_PIX_FMT_NONE) {
+        if (*p == pix_fmt)
+            return 1;
+        p++;
+    }

-    if (!vo->encode_lavc_ctx)
    return 0;
-
-    pthread_mutex_lock(&vo->encode_lavc_ctx->lock);
-    int flags = 0;
-    if (encode_lavc_supports_pixfmt(vo->encode_lavc_ctx, pix_fmt))
-        flags = 1;
-    pthread_mutex_unlock(&vo->encode_lavc_ctx->lock);
-    return flags;
 }

-static void write_packet(struct vo *vo, AVPacket *packet)
+static void draw_image(struct vo *vo, mp_image_t *mpi)
 {
    struct priv *vc = vo->priv;
-
-    packet->stream_index = vc->stream->index;
-    if (packet->pts != AV_NOPTS_VALUE) {
-        packet->pts = av_rescale_q(packet->pts,
-                                   vc->codec->time_base,
-                                   vc->stream->time_base);
-    } else {
-        MP_VERBOSE(vo, "codec did not provide pts\n");
-        packet->pts = av_rescale_q(vc->lastipts,
-                                   vc->worst_time_base,
-                                   vc->stream->time_base);
-    }
-    if (packet->dts != AV_NOPTS_VALUE) {
-        packet->dts = av_rescale_q(packet->dts,
-                                   vc->codec->time_base,
-                                   vc->stream->time_base);
-    }
-    if (packet->duration > 0) {
-        packet->duration = av_rescale_q(packet->duration,
-                                        vc->codec->time_base,
-                                        vc->stream->time_base);
-    } else {
-        // HACK: libavformat calculates dts wrong if the initial packet
-        // duration is not set, but ONLY if the time base is "high" and if we
-        // have b-frames!
-        if (!packet->duration && !vc->have_first_packet &&
-            (vc->codec->has_b_frames || vc->codec->max_b_frames) &&
-            (vc->stream->time_base.num * 1000LL <= vc->stream->time_base.den))
-        {
-            packet->duration = FFMAX(1, av_rescale_q(1,
-                                vc->codec->time_base, vc->stream->time_base));
-        }
-    }
-
-    if (encode_lavc_write_frame(vo->encode_lavc_ctx,
-                                vc->stream, packet) < 0) {
-        MP_ERR(vo, "error writing at %d %d/%d\n",
-               (int) packet->pts,
-               vc->stream->time_base.num,
-               vc->stream->time_base.den);
-        return;
-    }
-
-    vc->have_first_packet = 1;
-}
-
-static void encode_video_and_write(struct vo *vo, AVFrame *frame)
-{
-    struct priv *vc = vo->priv;
-    AVPacket packet = {0};
-
-    int status = avcodec_send_frame(vc->codec, frame);
-    if (status < 0) {
-        MP_ERR(vo, "error encoding at %d %d/%d\n",
-               frame ? (int) frame->pts : -1,
-               vc->codec->time_base.num,
-               vc->codec->time_base.den);
-        return;
-    }
-    for (;;) {
-        av_init_packet(&packet);
-        status = avcodec_receive_packet(vc->codec, &packet);
-        if (status == AVERROR(EAGAIN)) { // No more packets for now.
-            if (frame == NULL)
-                MP_ERR(vo, "sent flush frame, got EAGAIN");
-            break;
-        }
-        if (status == AVERROR_EOF) { // No more packets, ever.
-            if (frame != NULL)
-                MP_ERR(vo, "sent image frame, got EOF");
-            break;
-        }
-        if (status < 0) {
-            MP_ERR(vo, "error encoding at %d %d/%d\n",
-                   frame ? (int) frame->pts : -1,
-                   vc->codec->time_base.num,
-                   vc->codec->time_base.den);
-            break;
-        }
-        encode_lavc_write_stats(vo->encode_lavc_ctx, vc->codec);
-        write_packet(vo, &packet);
-        av_packet_unref(&packet);
-    }
-}
-
-static void draw_image_unlocked(struct vo *vo, mp_image_t *mpi)
-{
-    struct priv *vc = vo->priv;
-    struct encode_lavc_context *ectx = vo->encode_lavc_ctx;
-    AVCodecContext *avc;
+    struct encoder_context *enc = vc->enc;
+    struct encode_lavc_context *ectx = enc->encode_lavc_ctx;
+    AVCodecContext *avc = enc->encoder;
    int64_t frameipts;
    double nextpts;

@ -285,41 +212,24 @@ static void draw_image_unlocked(struct vo *vo, mp_image_t *mpi)
        osd_draw_on_image(vo->osd, dim, mpi->pts, OSD_DRAW_SUB_ONLY, mpi);
    }

-    if (!vc || vc->shutdown)
+    if (vc->shutdown)
        goto done;
-    if (!encode_lavc_start(ectx)) {
-        MP_WARN(vo, "NOTE: skipped initial video frame (probably because audio is not there yet)\n");
-        goto done;
-    }
+
    if (pts == MP_NOPTS_VALUE) {
        if (mpi)
            MP_WARN(vo, "frame without pts, please report; synthesizing pts instead\n");
        pts = vc->expected_next_pts;
    }

-    avc = vc->codec;
-
    if (vc->worst_time_base.den == 0) {
-        if (avc->time_base.num * (double) vc->stream->time_base.den >=
-                vc->stream->time_base.num * (double) avc->time_base.den) {
-            MP_VERBOSE(vo, "NOTE: using codec time base "
-                       "(%d/%d) for frame dropping; the stream base (%d/%d) is "
-                       "not worse.\n", (int)avc->time_base.num,
-                       (int)avc->time_base.den, (int)vc->stream->time_base.num,
-                       (int)vc->stream->time_base.den);
+        // We don't know the muxer time_base anymore, and can't, because we
+        // might start encoding before the muxer is opened. (The muxer decides
+        // the final AVStream.time_base when opening the muxer.)
        vc->worst_time_base = avc->time_base;
-            vc->worst_time_base_is_stream = 0;
-        } else {
-            MP_WARN(vo, "NOTE: not using codec time base (%d/%d) for frame "
-                    "dropping; the stream base (%d/%d) is worse.\n",
-                    (int)avc->time_base.num, (int)avc->time_base.den,
-                    (int)vc->stream->time_base.num, (int)vc->stream->time_base.den);
-            vc->worst_time_base = vc->stream->time_base;
-            vc->worst_time_base_is_stream = 1;
-        }
-        if (ectx->options->maxfps) {
+
+        if (enc->options->maxfps) {
            vc->mindeltapts = ceil(vc->worst_time_base.den /
-                    (vc->worst_time_base.num * ectx->options->maxfps));
+                    (vc->worst_time_base.num * enc->options->maxfps));
        } else {
            vc->mindeltapts = 0;
        }
@ -343,10 +253,13 @@ static void draw_image_unlocked(struct vo *vo, mp_image_t *mpi)

    double timeunit = (double)vc->worst_time_base.num / vc->worst_time_base.den;

+    // Lock for shared timestamp fields.
+    pthread_mutex_lock(&ectx->lock);
+
    double outpts;
-    if (ectx->options->rawts)
+    if (enc->options->rawts) {
        outpts = pts;
-    else if (ectx->options->copyts) {
+    } else if (enc->options->copyts) {
        // fix the discontinuity pts offset
        nextpts = pts;
        if (ectx->discontinuity_pts_offset == MP_NOPTS_VALUE) {
@ -364,8 +277,8 @@ static void draw_image_unlocked(struct vo *vo, mp_image_t *mpi)
    } else {
        // adjust pts by knowledge of audio pts vs audio playback time
        double duration = 0;
-        if (ectx->last_video_in_pts != MP_NOPTS_VALUE)
-            duration = pts - ectx->last_video_in_pts;
+        if (vc->last_video_in_pts != MP_NOPTS_VALUE)
+            duration = pts - vc->last_video_in_pts;
        if (duration < 0)
            duration = timeunit;   // XXX warn about discontinuity?
        outpts = vc->lastpts + duration;
@ -377,28 +290,29 @@ static void draw_image_unlocked(struct vo *vo, mp_image_t *mpi)
        }
    }
    vc->lastpts = outpts;
-    ectx->last_video_in_pts = pts;
-    frameipts = floor((outpts + encode_lavc_getoffset(ectx, vc->codec))
-                      / timeunit + 0.5);
+    vc->last_video_in_pts = pts;
+    frameipts = floor((outpts + encoder_get_offset(enc)) / timeunit + 0.5);

    // calculate expected pts of next video frame
    vc->expected_next_pts = pts + timeunit;

-    if (!ectx->options->rawts && ectx->options->copyts) {
+    if (!enc->options->rawts && enc->options->copyts) {
        // set next allowed output pts value
        nextpts = vc->expected_next_pts + ectx->discontinuity_pts_offset;
        if (nextpts > ectx->next_in_pts)
            ectx->next_in_pts = nextpts;
    }

+    pthread_mutex_unlock(&ectx->lock);
+
    // never-drop mode
-    if (ectx->options->neverdrop) {
+    if (enc->options->neverdrop) {
        int64_t step = vc->mindeltapts ? vc->mindeltapts : 1;
        if (frameipts < vc->lastipts + step) {
            MP_INFO(vo, "--oneverdrop increased pts by %d\n",
                    (int) (vc->lastipts - frameipts + step));
            frameipts = vc->lastipts + step;
-            vc->lastpts = frameipts * timeunit - encode_lavc_getoffset(ectx, vc->codec);
+            vc->lastpts = frameipts * timeunit - encoder_get_offset(enc);
        }
    }

@ -424,7 +338,7 @@ static void draw_image_unlocked(struct vo *vo, mp_image_t *mpi)
                                          vc->worst_time_base, avc->time_base);
                frame->pict_type = 0; // keep this at unknown/undefined
                frame->quality = avc->global_quality;
-                encode_video_and_write(vo, frame);
+                encoder_encode(enc, frame);
                av_frame_free(&frame);

                vc->lastdisplaycount += 1;
@ -437,7 +351,7 @@ static void draw_image_unlocked(struct vo *vo, mp_image_t *mpi)

    if (!mpi) {
        // finish encoding
-        encode_video_and_write(vo, NULL);
+        encoder_encode(enc, NULL);
    } else {
        if (frameipts >= vc->lastframeipts) {
            if (vc->lastframeipts != AV_NOPTS_VALUE && vc->lastdisplaycount != 1)
@ -448,7 +362,7 @@ static void draw_image_unlocked(struct vo *vo, mp_image_t *mpi)
            mpi = NULL;

            vc->lastframeipts = vc->lastipts = frameipts;
-            if (ectx->options->rawts && vc->lastipts < 0) {
+            if (enc->options->rawts && vc->lastipts < 0) {
                MP_ERR(vo, "why does this happen? DEBUG THIS! vc->lastipts = %lld\n",
                       (long long) vc->lastipts);
                vc->lastipts = -1;
@ -464,19 +378,20 @@ done:
    talloc_free(mpi);
 }

-static void draw_image(struct vo *vo, mp_image_t *mpi)
-{
-    pthread_mutex_lock(&vo->encode_lavc_ctx->lock);
-    draw_image_unlocked(vo, mpi);
-    pthread_mutex_unlock(&vo->encode_lavc_ctx->lock);
-}
-
 static void flip_page(struct vo *vo)
 {
 }

 static int control(struct vo *vo, uint32_t request, void *data)
 {
+    struct priv *vc = vo->priv;
+
+    switch (request) {
+    case VOCTRL_RESET:
+        vc->last_video_in_pts = MP_NOPTS_VALUE;
+        break;
+    }
+
    return VO_NOTIMPL;
 }

@ -485,9 +400,10 @@ const struct vo_driver video_out_lavc = {
    .description = "video encoding using libavcodec",
    .name = "lavc",
    .untimed = true,
+    .priv_size = sizeof(struct priv),
    .preinit = preinit,
    .query_format = query_format,
-    .reconfig = reconfig,
+    .reconfig2 = reconfig2,
    .control = control,
    .uninit = uninit,
    .draw_image = draw_image,