From b83bdd1d17cc90b4d8cd2a32321cd7c5cc306422 Mon Sep 17 00:00:00 2001
From: wm4 <wm4@nowhere>
Date: Mon, 25 May 2020 01:53:41 +0200
Subject: [PATCH] audio: merge pull/push ring buffer glue code

This is preparation to further cleanups (and eventually actual
improvements) of the audio output code.

AOs are split into two classes: pull and push. Pull AOs let an audio
callback of the native audio API read from a ring buffer. Push AOs
expose a function that works similar to write(), and for which we start
a "feeder" thread. It seems making this split was beneficial, because of
the different data flow, and emulating the one or other in the AOs
directly would have created code duplication (all the "pull" AOs had
their own ring buffer implementation before it was cleaned up).
Unfortunately, both types had completely separate implementations (in
pull.c and push.c). The idea was that little can be shared anyway. But
that's very annoying now, because I want to change the API between AO
and player.

This commit attempts to merge them. I've moved everything from push.c to
pull.c, the trivial entrypoints from ao.c to pull.c, and attempted to
reconcile the differences. It's a mess, but at least there's only one
ring buffer within the AO code now. Everything should work mostly the
same. Pull AOs now always copy the audio data under a lock; before this
commit, all ring buffer access was lock-free (except for the decoder
wakeup callback, which acquired a mutex). In theory, this is "bad", and
people obsessed with lock-free stuff will hate me, but in practice
probably won't matter. The planned change will probably remove this
copying-under-lock again, but who knows when this will happen.

One change for the push AOs now makes it drop audio, where before only a
warning was logged. This is only in case of AOs or drivers which exhibit
unexpected (and now unsupported) behavior.

This is a risky change. Although it's completely trivial conceptually,
there are too many special cases. In addition, I barely tested it, and
I've messed with it in a half-motivated state over a longer time, barely
making any progress, and finishing it under a rush when I already should
have been asleep. Most things seem to work, and I made superficial tests
with alsa, sdl, and encode mode. This should cover most things, but
there are a lot of tricky things that received no coverage. All this
text means you should be prepared to roll back to an older commit and
report your problem.
---
 audio/out/ao.c       |  94 +-----
 audio/out/buffer.c   | 751 +++++++++++++++++++++++++++++++++++++++++++
 audio/out/internal.h |   8 +-
 audio/out/pull.c     | 340 --------------------
 audio/out/push.c     | 572 --------------------------------
 wscript_build.py     |   3 +-
 6 files changed, 762 insertions(+), 1006 deletions(-)
 create mode 100644 audio/out/buffer.c
 delete mode 100644 audio/out/pull.c
 delete mode 100644 audio/out/push.c

diff --git a/audio/out/ao.c b/audio/out/ao.c
index 480dad69e0..7b301cd2e7 100644
--- a/audio/out/ao.c
+++ b/audio/out/ao.c
@@ -200,13 +200,10 @@ static struct ao *ao_init(bool probing, struct mpv_global *global,
                af_fmt_to_str(ao->format));
 
     ao->device = talloc_strdup(ao, dev);
-
-    ao->api = ao->driver->play ? &ao_api_push : &ao_api_pull;
-    ao->api_priv = talloc_zero_size(ao, ao->api->priv_size);
-    assert(!ao->api->priv_defaults && !ao->api->options);
-
     ao->stream_silence = flags & AO_INIT_STREAM_SILENCE;
 
+    init_buffer_pre(ao);
+
     ao->period_size = 1;
 
     int r = ao->driver->init(ao);
@@ -216,13 +213,14 @@ static struct ao *ao_init(bool probing, struct mpv_global *global,
             char redirect[80], rdevice[80];
             snprintf(redirect, sizeof(redirect), "%s", ao->redirect);
             snprintf(rdevice, sizeof(rdevice), "%s", ao->device ? ao->device : "");
-            talloc_free(ao);
+            ao_uninit(ao);
             return ao_init(probing, global, wakeup_cb, wakeup_ctx,
                            encode_lavc_ctx, flags, samplerate, format, channels,
                            rdevice, redirect);
         }
         goto fail;
     }
+    ao->driver_initialized = true;
 
     if (ao->period_size < 1) {
         MP_ERR(ao, "Invalid period size set.\n");
@@ -249,12 +247,12 @@ static struct ao *ao_init(bool probing, struct mpv_global *global,
     ao->buffer = (ao->buffer + align - 1) / align * align;
     MP_VERBOSE(ao, "using soft-buffer of %d samples.\n", ao->buffer);
 
-    if (ao->api->init(ao) < 0)
+    if (!init_buffer_post(ao))
         goto fail;
     return ao;
 
 fail:
-    talloc_free(ao);
+    ao_uninit(ao);
     return NULL;
 }
 
@@ -348,86 +346,6 @@ struct ao *ao_init_best(struct mpv_global *global,
     return ao;
 }
 
-// Uninitialize and destroy the AO. Remaining audio must be dropped.
-void ao_uninit(struct ao *ao)
-{
-    if (ao)
-        ao->api->uninit(ao);
-    talloc_free(ao);
-}
-
-// Queue the given audio data. Start playback if it hasn't started yet. Return
-// the number of samples that was accepted (the core will try to queue the rest
-// again later). Should never block.
-//  data: start pointer for each plane. If the audio data is packed, only
-//        data[0] is valid, otherwise there is a plane for each channel.
-//  samples: size of the audio data (see ao->sstride)
-//  flags: currently AOPLAY_FINAL_CHUNK can be set
-int ao_play(struct ao *ao, void **data, int samples, int flags)
-{
-    return ao->api->play(ao, data, samples, flags);
-}
-
-int ao_control(struct ao *ao, enum aocontrol cmd, void *arg)
-{
-    return ao->api->control ? ao->api->control(ao, cmd, arg) : CONTROL_UNKNOWN;
-}
-
-// Return size of the buffered data in seconds. Can include the device latency.
-// Basically, this returns how much data there is still to play, and how long
-// it takes until the last sample in the buffer reaches the speakers. This is
-// used for audio/video synchronization, so it's very important to implement
-// this correctly.
-double ao_get_delay(struct ao *ao)
-{
-    return ao->api->get_delay(ao);
-}
-
-// Return free size of the internal audio buffer. This controls how much audio
-// the core should decode and try to queue with ao_play().
-int ao_get_space(struct ao *ao)
-{
-    return ao->api->get_space(ao);
-}
-
-// Stop playback and empty buffers. Essentially go back to the state after
-// ao->init().
-void ao_reset(struct ao *ao)
-{
-    if (ao->api->reset)
-        ao->api->reset(ao);
-    atomic_fetch_and(&ao->events_, ~(unsigned int)AO_EVENT_UNDERRUN);
-}
-
-// Pause playback. Keep the current buffer. ao_get_delay() must return the
-// same value as before pausing.
-void ao_pause(struct ao *ao)
-{
-    if (ao->api->pause)
-        ao->api->pause(ao);
-}
-
-// Resume playback. Play the remaining buffer. If the driver doesn't support
-// pausing, it has to work around this and e.g. use ao_play_silence() to fill
-// the lost audio.
-void ao_resume(struct ao *ao)
-{
-    if (ao->api->resume)
-        ao->api->resume(ao);
-}
-
-// Block until the current audio buffer has played completely.
-void ao_drain(struct ao *ao)
-{
-    if (ao->api->drain)
-        ao->api->drain(ao);
-}
-
-bool ao_eof_reached(struct ao *ao)
-{
-    return ao->api->get_eof ? ao->api->get_eof(ao) : true;
-}
-
 // Query the AO_EVENT_*s as requested by the events parameter, and return them.
 int ao_query_and_reset_events(struct ao *ao, int events)
 {
diff --git a/audio/out/buffer.c b/audio/out/buffer.c
new file mode 100644
index 0000000000..d890497113
--- /dev/null
+++ b/audio/out/buffer.c
@@ -0,0 +1,751 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stddef.h>
+#include <pthread.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <errno.h>
+#include <assert.h>
+
+#include "ao.h"
+#include "internal.h"
+#include "audio/aframe.h"
+#include "audio/format.h"
+
+#include "common/msg.h"
+#include "common/common.h"
+
+#include "input/input.h"
+
+#include "osdep/io.h"
+#include "osdep/timer.h"
+#include "osdep/threads.h"
+#include "osdep/atomic.h"
+#include "misc/ring.h"
+
+struct buffer_state {
+    pthread_mutex_t lock;
+    pthread_cond_t wakeup;
+
+    // Access from AO driver's thread only.
+    char *convert_buffer;
+
+    // --- protected by lock
+
+    struct mp_ring *buffers[MP_NUM_CHANNELS];
+
+
+    bool streaming;             // AO streaming active
+    bool playing;               // logically playing audio from buffer
+    bool paused;                // logically paused; implies playing=true
+    bool final_chunk;           // if buffer contains EOF
+
+    int64_t end_time_us;        // absolute output time of last played sample
+    int64_t underflow;          // number of samples missing since last check
+
+    bool need_wakeup;
+    bool initial_unblocked;
+
+    // "Push" AOs only (AOs with driver->play).
+    bool still_playing;
+    double expected_end_time;
+    bool wait_on_ao;
+    pthread_t thread;           // thread shoveling data to AO
+    bool thread_valid;          // thread is running
+    bool terminate;             // exit thread
+    struct mp_aframe *temp_buf;
+
+    int wakeup_pipe[2];
+};
+
+static void *playthread(void *arg);
+
+// lock must be held
+static void wakeup_playthread(struct ao *ao)
+{
+    struct buffer_state *p = ao->buffer_state;
+    if (ao->driver->wakeup)
+        ao->driver->wakeup(ao);
+    p->need_wakeup = true;
+    pthread_cond_signal(&p->wakeup);
+}
+
+static int unlocked_get_space(struct ao *ao)
+{
+    struct buffer_state *p = ao->buffer_state;
+
+    int space = mp_ring_available(p->buffers[0]) / ao->sstride;
+
+    // The following code attempts to keep the total buffered audio at
+    // ao->buffer in order to improve latency.
+    if (ao->driver->play && ao->driver->get_space) {
+        int align = af_format_sample_alignment(ao->format);
+        int device_space = ao->driver->get_space(ao);
+        int device_buffered = ao->device_buffer - device_space;
+        int soft_buffered = mp_ring_size(p->buffers[0]) / ao->sstride - space;
+        // The extra margin helps avoiding too many wakeups if the AO is fully
+        // byte based and doesn't do proper chunked processing.
+        int min_buffer = ao->buffer + 64;
+        int missing = min_buffer - device_buffered - soft_buffered;
+        missing = (missing + align - 1) / align * align;
+        // But always keep the device's buffer filled as much as we can.
+        int device_missing = device_space - soft_buffered;
+        missing = MPMAX(missing, device_missing);
+        space = MPMIN(space, missing);
+        space = MPMAX(0, space);
+    }
+
+    return space;
+}
+
+// Return free size of the internal audio buffer. This controls how much audio
+// the core should decode and try to queue with ao_play().
+int ao_get_space(struct ao *ao)
+{
+    struct buffer_state *p = ao->buffer_state;
+    pthread_mutex_lock(&p->lock);
+    int space = unlocked_get_space(ao);
+    pthread_mutex_unlock(&p->lock);
+    return space;
+}
+
+// Queue the given audio data. Start playback if it hasn't started yet. Return
+// the number of samples that was accepted (the core will try to queue the rest
+// again later). Should never block.
+//  data: start pointer for each plane. If the audio data is packed, only
+//        data[0] is valid, otherwise there is a plane for each channel.
+//  samples: size of the audio data (see ao->sstride)
+//  flags: currently AOPLAY_FINAL_CHUNK can be set
+int ao_play(struct ao *ao, void **data, int samples, int flags)
+{
+    struct buffer_state *p = ao->buffer_state;
+
+    pthread_mutex_lock(&p->lock);
+
+    int write_samples = mp_ring_available(p->buffers[0]) / ao->sstride;
+    write_samples = MPMIN(write_samples, samples);
+
+    int write_bytes = write_samples * ao->sstride;
+    for (int n = 0; n < ao->num_planes; n++) {
+        int r = mp_ring_write(p->buffers[n], data[n], write_bytes);
+        assert(r == write_bytes);
+    }
+
+    p->paused = false;
+    p->final_chunk = write_samples == samples && (flags & AOPLAY_FINAL_CHUNK);
+
+    if (p->underflow)
+        MP_DBG(ao, "Audio underrun by %lld samples.\n", (long long)p->underflow);
+    p->underflow = 0;
+
+    if (write_samples) {
+        p->playing = true;
+        p->still_playing = true;
+        p->expected_end_time = 0;
+
+        if (!ao->driver->play && !p->streaming) {
+            p->streaming = true;
+            ao->driver->resume(ao);
+        }
+
+        wakeup_playthread(ao);
+    }
+    pthread_mutex_unlock(&p->lock);
+
+    return write_samples;
+}
+
+// Read the given amount of samples in the user-provided data buffer. Returns
+// the number of samples copied. If there is not enough data (buffer underrun
+// or EOF), return the number of samples that could be copied, and fill the
+// rest of the user-provided buffer with silence.
+// This basically assumes that the audio device doesn't care about underruns.
+// If this is called in paused mode, it will always return 0.
+// The caller should set out_time_us to the expected delay until the last sample
+// reaches the speakers, in microseconds, using mp_time_us() as reference.
+int ao_read_data(struct ao *ao, void **data, int samples, int64_t out_time_us)
+{
+    struct buffer_state *p = ao->buffer_state;
+    int full_bytes = samples * ao->sstride;
+    bool need_wakeup = false;
+    int bytes = 0;
+
+    pthread_mutex_lock(&p->lock);
+
+    if (!p->playing || p->paused)
+        goto end;
+
+    int buffered_bytes = mp_ring_buffered(p->buffers[0]);
+    bytes = MPMIN(buffered_bytes, full_bytes);
+
+    if (full_bytes > bytes && !p->final_chunk) {
+        p->underflow += (full_bytes - bytes) / ao->sstride;
+        ao_underrun_event(ao);
+    }
+
+    if (bytes > 0)
+        p->end_time_us = out_time_us;
+
+    for (int n = 0; n < ao->num_planes; n++)
+        mp_ring_read(p->buffers[n], data[n], bytes);
+
+    // Half of the buffer played -> request more.
+    need_wakeup = buffered_bytes - bytes <= mp_ring_size(p->buffers[0]) / 2;
+
+end:
+
+    pthread_mutex_unlock(&p->lock);
+
+    if (need_wakeup)
+        ao->wakeup_cb(ao->wakeup_ctx);
+
+    // pad with silence (underflow/paused/eof)
+    for (int n = 0; n < ao->num_planes; n++)
+        af_fill_silence((char *)data[n] + bytes, full_bytes - bytes, ao->format);
+
+    ao_post_process_data(ao, data, samples);
+
+    return bytes / ao->sstride;
+}
+
+// Same as ao_read_data(), but convert data according to *fmt.
+// fmt->src_fmt and fmt->channels must be the same as the AO parameters.
+int ao_read_data_converted(struct ao *ao, struct ao_convert_fmt *fmt,
+                           void **data, int samples, int64_t out_time_us)
+{
+    struct buffer_state *p = ao->buffer_state;
+    void *ndata[MP_NUM_CHANNELS] = {0};
+
+    if (!ao_need_conversion(fmt))
+        return ao_read_data(ao, data, samples, out_time_us);
+
+    assert(ao->format == fmt->src_fmt);
+    assert(ao->channels.num == fmt->channels);
+
+    bool planar = af_fmt_is_planar(fmt->src_fmt);
+    int planes = planar ? fmt->channels : 1;
+    int plane_samples = samples * (planar ? 1: fmt->channels);
+    int src_plane_size = plane_samples * af_fmt_to_bytes(fmt->src_fmt);
+    int dst_plane_size = plane_samples * fmt->dst_bits / 8;
+
+    int needed = src_plane_size * planes;
+    if (needed > talloc_get_size(p->convert_buffer) || !p->convert_buffer) {
+        talloc_free(p->convert_buffer);
+        p->convert_buffer = talloc_size(NULL, needed);
+    }
+
+    for (int n = 0; n < planes; n++)
+        ndata[n] = p->convert_buffer + n * src_plane_size;
+
+    int res = ao_read_data(ao, ndata, samples, out_time_us);
+
+    ao_convert_inplace(fmt, ndata, samples);
+    for (int n = 0; n < planes; n++)
+        memcpy(data[n], ndata[n], dst_plane_size);
+
+    return res;
+}
+
+int ao_control(struct ao *ao, enum aocontrol cmd, void *arg)
+{
+    struct buffer_state *p = ao->buffer_state;
+    int r = CONTROL_UNKNOWN;
+    if (ao->driver->control) {
+        pthread_mutex_lock(&p->lock);
+        r = ao->driver->control(ao, cmd, arg);
+        pthread_mutex_unlock(&p->lock);
+    }
+    return r;
+}
+
+static double unlocked_get_delay(struct ao *ao)
+{
+    struct buffer_state *p = ao->buffer_state;
+    double driver_delay = 0;
+
+    if (ao->driver->get_delay)
+        driver_delay = ao->driver->get_delay(ao);
+
+    if (!ao->driver->play) {
+        int64_t end = p->end_time_us;
+        int64_t now = mp_time_us();
+        driver_delay += MPMAX(0, (end - now) / (1000.0 * 1000.0));
+    }
+
+    return mp_ring_buffered(p->buffers[0]) / (double)ao->bps + driver_delay;
+}
+
+// Return size of the buffered data in seconds. Can include the device latency.
+// Basically, this returns how much data there is still to play, and how long
+// it takes until the last sample in the buffer reaches the speakers. This is
+// used for audio/video synchronization, so it's very important to implement
+// this correctly.
+double ao_get_delay(struct ao *ao)
+{
+    struct buffer_state *p = ao->buffer_state;
+
+    pthread_mutex_lock(&p->lock);
+    double delay = unlocked_get_delay(ao);
+    pthread_mutex_unlock(&p->lock);
+    return delay;
+}
+
+// Stop playback and empty buffers. Essentially go back to the state after
+// ao->init().
+void ao_reset(struct ao *ao)
+{
+    struct buffer_state *p = ao->buffer_state;
+
+    pthread_mutex_lock(&p->lock);
+
+    for (int n = 0; n < ao->num_planes; n++)
+        mp_ring_reset(p->buffers[n]);
+
+    if (!ao->stream_silence && ao->driver->reset) {
+        ao->driver->reset(ao); // assumes the audio callback thread is stopped
+        p->streaming = false;
+    }
+    p->paused = false;
+    p->playing = false;
+    if (p->still_playing)
+        wakeup_playthread(ao);
+    p->still_playing = false;
+    p->end_time_us = 0;
+
+    atomic_fetch_and(&ao->events_, ~(unsigned int)AO_EVENT_UNDERRUN);
+
+    pthread_mutex_unlock(&p->lock);
+}
+
+// Pause playback. Keep the current buffer. ao_get_delay() must return the
+// same value as before pausing.
+void ao_pause(struct ao *ao)
+{
+    struct buffer_state *p = ao->buffer_state;
+
+    pthread_mutex_lock(&p->lock);
+
+    if (p->playing && !p->paused) {
+        if (p->streaming && !ao->stream_silence) {
+            if (ao->driver->pause) {
+                ao->driver->pause(ao);
+            } else if (ao->driver->reset) {
+                ao->driver->reset(ao);
+                p->streaming = false;
+            }
+        }
+        p->paused = true;
+        wakeup_playthread(ao);
+    }
+
+    pthread_mutex_unlock(&p->lock);
+}
+
+// Resume playback. Play the remaining buffer. If the driver doesn't support
+// pausing, it has to work around this and e.g. use ao_play_silence() to fill
+// the lost audio.
+void ao_resume(struct ao *ao)
+{
+    struct buffer_state *p = ao->buffer_state;
+
+    pthread_mutex_lock(&p->lock);
+
+    if (p->playing && p->paused) {
+        if (p->streaming && ao->driver->resume)
+            ao->driver->resume(ao);
+        p->paused = false;
+        p->expected_end_time = 0;
+        wakeup_playthread(ao);
+    }
+
+    pthread_mutex_unlock(&p->lock);
+}
+
+bool ao_eof_reached(struct ao *ao)
+{
+    struct buffer_state *p = ao->buffer_state;
+
+    pthread_mutex_lock(&p->lock);
+    bool eof = !p->playing;
+    if (ao->driver->play) {
+        eof |= !p->still_playing;
+    } else {
+        // For simplicity, ignore the latency. Otherwise, we would have to run
+        // an extra thread to time it.
+        eof |= mp_ring_buffered(p->buffers[0]) == 0;
+    }
+    pthread_mutex_unlock(&p->lock);
+
+    return eof;
+}
+
+// Block until the current audio buffer has played completely.
+void ao_drain(struct ao *ao)
+{
+    struct buffer_state *p = ao->buffer_state;
+
+    pthread_mutex_lock(&p->lock);
+    p->final_chunk = true;
+    wakeup_playthread(ao);
+    double left = 0;
+    if (p->playing && !p->paused)
+        left = mp_ring_buffered(p->buffers[0]) / (double)ao->bps * 1e6;
+    pthread_mutex_unlock(&p->lock);
+
+    // Wait for lower bound.
+    mp_sleep_us(left);
+    // And then poll for actual end. (Unfortunately, this code considers
+    // audio APIs which do not want you to use mutexes in the audio
+    // callback, and an extra semaphore would require slightly more effort.)
+    // Limit to arbitrary ~250ms max. waiting for robustness.
+    int64_t max = mp_time_us() + 250000;
+    while (mp_time_us() < max && !ao_eof_reached(ao))
+        mp_sleep_us(1);
+
+    ao_reset(ao);
+}
+
+// Uninitialize and destroy the AO. Remaining audio must be dropped.
+void ao_uninit(struct ao *ao)
+{
+    struct buffer_state *p = ao->buffer_state;
+
+    if (p->thread_valid) {
+        pthread_mutex_lock(&p->lock);
+        p->terminate = true;
+        wakeup_playthread(ao);
+        pthread_mutex_unlock(&p->lock);
+
+        pthread_join(p->thread, NULL);
+        p->thread_valid = false;
+    }
+
+    if (ao->driver_initialized)
+        ao->driver->uninit(ao);
+
+    talloc_free(p->convert_buffer);
+    talloc_free(p->temp_buf);
+
+    for (int n = 0; n < 2; n++) {
+        int h = p->wakeup_pipe[n];
+        if (h >= 0)
+            close(h);
+    }
+
+    pthread_cond_destroy(&p->wakeup);
+    pthread_mutex_destroy(&p->lock);
+
+    talloc_free(ao);
+}
+
+void init_buffer_pre(struct ao *ao)
+{
+    ao->buffer_state = talloc_zero(ao, struct buffer_state);
+}
+
+bool init_buffer_post(struct ao *ao)
+{
+    struct buffer_state *p = ao->buffer_state;
+
+    if (!ao->driver->play)
+        assert(ao->driver->resume);
+
+    for (int n = 0; n < ao->num_planes; n++)
+        p->buffers[n] = mp_ring_new(ao, ao->buffer * ao->sstride);
+
+    mpthread_mutex_init_recursive(&p->lock);
+    pthread_cond_init(&p->wakeup, NULL);
+    mp_make_wakeup_pipe(p->wakeup_pipe);
+
+    if (ao->driver->play) {
+        if (ao->device_buffer <= 0) {
+            MP_FATAL(ao, "Couldn't probe device buffer size.\n");
+            return false;
+        }
+
+        p->thread_valid = true;
+        if (pthread_create(&p->thread, NULL, playthread, ao)) {
+            p->thread_valid = false;
+            return false;
+        }
+    } else {
+        if (ao->stream_silence) {
+            ao->driver->resume(ao);
+            p->streaming = true;
+        }
+    }
+
+    return true;
+}
+
+static bool realloc_buf(struct ao *ao, int samples)
+{
+    struct buffer_state *p = ao->buffer_state;
+
+    samples = MPMAX(1, samples);
+
+    if (!p->temp_buf || samples > mp_aframe_get_size(p->temp_buf)) {
+        TA_FREEP(&p->temp_buf);
+        p->temp_buf = mp_aframe_create();
+        if (!mp_aframe_set_format(p->temp_buf, ao->format) ||
+            !mp_aframe_set_chmap(p->temp_buf, &ao->channels) ||
+            !mp_aframe_set_rate(p->temp_buf, ao->samplerate) ||
+            !mp_aframe_alloc_data(p->temp_buf, samples))
+        {
+            TA_FREEP(&p->temp_buf);
+            return false;
+        }
+    }
+
+    return true;
+}
+
+// called locked
+static void ao_play_data(struct ao *ao)
+{
+    struct buffer_state *p = ao->buffer_state;
+    int space = ao->driver->get_space(ao);
+    bool play_silence = p->paused || (ao->stream_silence && !p->still_playing);
+    space = MPMAX(space, 0);
+    // Most AOs want period-size aligned audio, and preferably as much as
+    // possible in one go, so the audio data is "linearized" into this buffer.
+    if (space % ao->period_size)
+        MP_ERR(ao, "Audio device reports unaligned available buffer size.\n");
+    if (!realloc_buf(ao, space)) {
+        MP_ERR(ao, "Failed to allocate buffer.\n");
+        return;
+    }
+    void **planes = (void **)mp_aframe_get_data_rw(p->temp_buf);
+    assert(planes);
+    int samples = mp_ring_buffered(p->buffers[0]) / ao->sstride;
+    if (samples > space)
+        samples = space;
+    if (play_silence)
+        samples = space;
+    samples = ao_read_data(ao, planes, samples, 0);
+    if (play_silence)
+        samples = space; // ao_read_data() sets remainder to silent
+    int max = samples;
+    int flags = 0;
+    if (p->final_chunk && samples < space) {
+        flags |= AOPLAY_FINAL_CHUNK;
+    } else {
+        samples = samples / ao->period_size * ao->period_size;
+    }
+    MP_STATS(ao, "start ao fill");
+    ao_post_process_data(ao, planes, samples);
+    int r = 0;
+    if (samples)
+        r = ao->driver->play(ao, planes, samples, flags);
+    MP_STATS(ao, "end ao fill");
+    if (r > samples) {
+        MP_ERR(ao, "Audio device returned nonsense value.\n");
+        r = samples;
+    } else if (r < 0) {
+        MP_ERR(ao, "Error writing audio to device.\n");
+    } else if (r != samples) {
+        MP_ERR(ao, "Audio device returned broken buffer state (sent %d samples, "
+               "got %d samples, %d period%s)! Discarding audio.\n", samples, r,
+               ao->period_size, flags & AOPLAY_FINAL_CHUNK ? " final" : "");
+    }
+    r = MPMAX(r, 0);
+    // Probably can't copy the rest of the buffer due to period alignment.
+    bool stuck_eof = r <= 0 && space >= max && samples > 0;
+    if ((flags & AOPLAY_FINAL_CHUNK) && stuck_eof) {
+        MP_ERR(ao, "Audio output driver seems to ignore AOPLAY_FINAL_CHUNK.\n");
+        r = max;
+    }
+    if (r > 0) {
+        p->expected_end_time = 0;
+        p->streaming = true;
+    }
+    // Nothing written, but more input data than space - this must mean the
+    // AO's get_space() doesn't do period alignment correctly.
+    bool stuck = r == 0 && max >= space && space > 0;
+    if (stuck)
+        MP_ERR(ao, "Audio output is reporting incorrect buffer status.\n");
+    // Wait until space becomes available. Also wait if we actually wrote data,
+    // so the AO wakes us up properly if it needs more data.
+    p->wait_on_ao = space == 0 || r > 0 || stuck;
+    p->still_playing |= r > 0 && !play_silence;
+    // If we just filled the AO completely (r == space), don't refill for a
+    // while. Prevents wakeup feedback with byte-granular AOs.
+    int needed = unlocked_get_space(ao);
+    bool more = needed >= (r == space ? ao->device_buffer / 4 : 1) && !stuck &&
+                !(flags & AOPLAY_FINAL_CHUNK);
+    if (more)
+        ao->wakeup_cb(ao->wakeup_ctx); // request more data
+    if (!samples && space && !ao->driver->reports_underruns && p->still_playing)
+        ao_underrun_event(ao);
+    MP_TRACE(ao, "in=%d flags=%d space=%d r=%d wa/pl=%d/%d needed=%d more=%d\n",
+             max, flags, space, r, p->wait_on_ao, p->still_playing, needed, more);
+}
+
+static void *playthread(void *arg)
+{
+    struct ao *ao = arg;
+    struct buffer_state *p = ao->buffer_state;
+    mpthread_set_name("ao");
+    pthread_mutex_lock(&p->lock);
+    while (!p->terminate) {
+        bool blocked = ao->driver->initially_blocked && !p->initial_unblocked;
+        bool playing = (!p->paused || ao->stream_silence) && !blocked;
+        if (playing)
+            ao_play_data(ao);
+
+        if (!p->need_wakeup) {
+            MP_STATS(ao, "start audio wait");
+            if (!p->wait_on_ao || !playing) {
+                // Avoid busy waiting, because the audio API will still report
+                // that it needs new data, even if we're not ready yet, or if
+                // get_space() decides that the amount of audio buffered in the
+                // device is enough, and p->buffer can be empty.
+                // The most important part is that the decoder is woken up, so
+                // that the decoder will wake up us in turn.
+                MP_TRACE(ao, "buffer inactive.\n");
+
+                bool was_playing = p->still_playing;
+                double timeout = -1;
+                if (p->still_playing && !p->paused && p->final_chunk &&
+                    !mp_ring_buffered(p->buffers[0]))
+                {
+                    double now = mp_time_sec();
+                    if (!p->expected_end_time)
+                        p->expected_end_time = now + unlocked_get_delay(ao);
+                    if (p->expected_end_time < now) {
+                        p->still_playing = false;
+                    } else {
+                        timeout = p->expected_end_time - now;
+                    }
+                }
+
+                if (was_playing && !p->still_playing)
+                    ao->wakeup_cb(ao->wakeup_ctx);
+                pthread_cond_signal(&p->wakeup); // for draining
+
+                if (p->still_playing && timeout > 0) {
+                    struct timespec ts = mp_rel_time_to_timespec(timeout);
+                    pthread_cond_timedwait(&p->wakeup, &p->lock, &ts);
+                } else {
+                    pthread_cond_wait(&p->wakeup, &p->lock);
+                }
+            } else {
+                // Wait until the device wants us to write more data to it.
+                if (!ao->driver->wait || ao->driver->wait(ao, &p->lock) < 0) {
+                    // Fallback to guessing.
+                    double timeout = 0;
+                    if (ao->driver->get_delay)
+                        timeout = ao->driver->get_delay(ao);
+                    timeout *= 0.25; // wake up if 25% played
+                    if (!p->need_wakeup) {
+                        struct timespec ts = mp_rel_time_to_timespec(timeout);
+                        pthread_cond_timedwait(&p->wakeup, &p->lock, &ts);
+                    }
+                }
+            }
+            MP_STATS(ao, "end audio wait");
+        }
+        p->need_wakeup = false;
+    }
+    pthread_mutex_unlock(&p->lock);
+    return NULL;
+}
+
+void ao_unblock(struct ao *ao)
+{
+    if (ao->driver->play) {
+        struct buffer_state *p = ao->buffer_state;
+        pthread_mutex_lock(&p->lock);
+        p->need_wakeup = true;
+        p->initial_unblocked = true;
+        wakeup_playthread(ao);
+        pthread_cond_signal(&p->wakeup);
+        pthread_mutex_unlock(&p->lock);
+    }
+}
+
+// Must be called locked.
+int ao_play_silence(struct ao *ao, int samples)
+{
+    assert(ao->driver->play);
+
+    struct buffer_state *p = ao->buffer_state;
+
+    if (!realloc_buf(ao, samples) || !ao->driver->play)
+        return 0;
+
+    void **planes = (void **)mp_aframe_get_data_rw(p->temp_buf);
+    assert(planes);
+
+    for (int n = 0; n < ao->num_planes; n++)
+        af_fill_silence(planes[n], ao->sstride * samples, ao->format);
+
+    return ao->driver->play(ao, planes, samples, 0);
+}
+
+#ifndef __MINGW32__
+
+#include <poll.h>
+
+#define MAX_POLL_FDS 20
+
+// Call poll() for the given fds. This will extend the given fds with the
+// wakeup pipe, so ao_wakeup_poll() will basically interrupt this function.
+// Unlocks the lock temporarily.
+// Returns <0 on error, 0 on success, 1 if the caller should return immediately.
+int ao_wait_poll(struct ao *ao, struct pollfd *fds, int num_fds,
+                 pthread_mutex_t *lock)
+{
+    struct buffer_state *p = ao->buffer_state;
+    assert(ao->driver->play);
+    assert(&p->lock == lock);
+
+    if (num_fds >= MAX_POLL_FDS || p->wakeup_pipe[0] < 0)
+        return -1;
+
+    struct pollfd p_fds[MAX_POLL_FDS];
+    memcpy(p_fds, fds, num_fds * sizeof(p_fds[0]));
+    p_fds[num_fds] = (struct pollfd){
+        .fd = p->wakeup_pipe[0],
+        .events = POLLIN,
+    };
+
+    pthread_mutex_unlock(&p->lock);
+    int r = poll(p_fds, num_fds + 1, -1);
+    r = r < 0 ? -errno : 0;
+    pthread_mutex_lock(&p->lock);
+
+    memcpy(fds, p_fds, num_fds * sizeof(fds[0]));
+    bool wakeup = false;
+    if (p_fds[num_fds].revents & POLLIN) {
+        wakeup = true;
+        // might "drown" some wakeups, but that's ok for our use-case
+        mp_flush_wakeup_pipe(p->wakeup_pipe[0]);
+    }
+    return (r >= 0 || r == -EINTR) ? wakeup : -1;
+}
+
+void ao_wakeup_poll(struct ao *ao)
+{
+    assert(ao->driver->play);
+    struct buffer_state *p = ao->buffer_state;
+
+    (void)write(p->wakeup_pipe[1], &(char){0}, 1);
+}
+
+#endif
diff --git a/audio/out/internal.h b/audio/out/internal.h
index 7bdd17ad8a..c036ebad16 100644
--- a/audio/out/internal.h
+++ b/audio/out/internal.h
@@ -37,8 +37,8 @@ struct ao {
     bool untimed;               // don't assume realtime playback
     int device_buffer;          // device buffer in samples (guessed by
                                 // common init code if not set by driver)
-    const struct ao_driver *api; // entrypoints to the wrapper (push.c/pull.c)
     const struct ao_driver *driver;
+    bool driver_initialized;
     void *priv;
     struct mpv_global *global;
     struct encode_lavc_context *encode_lavc_ctx;
@@ -75,12 +75,12 @@ struct ao {
 
     int buffer;
     double def_buffer;
+    struct buffer_state *buffer_state;
     void *api_priv;
 };
 
-extern const struct ao_driver ao_api_push;
-extern const struct ao_driver ao_api_pull;
-
+void init_buffer_pre(struct ao *ao);
+bool init_buffer_post(struct ao *ao);
 
 /* Note:
  *
diff --git a/audio/out/pull.c b/audio/out/pull.c
deleted file mode 100644
index fe8204e01b..0000000000
--- a/audio/out/pull.c
+++ /dev/null
@@ -1,340 +0,0 @@
-/*
- * This file is part of mpv.
- *
- * mpv is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * mpv is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <stddef.h>
-#include <inttypes.h>
-#include <assert.h>
-
-#include "ao.h"
-#include "internal.h"
-#include "audio/format.h"
-
-#include "common/msg.h"
-#include "common/common.h"
-
-#include "input/input.h"
-
-#include "osdep/timer.h"
-#include "osdep/threads.h"
-#include "osdep/atomic.h"
-#include "misc/ring.h"
-
-/*
- * Note: there is some stupid stuff in this file in order to avoid mutexes.
- * This requirement is dictated by several audio APIs, at least jackaudio.
- */
-
-enum {
-    AO_STATE_NONE,  // idle (e.g. before playback started, or after playback
-                    // finished, but device is open)
-    AO_STATE_WAIT,  // wait for callback to go into AO_STATE_NONE state
-    AO_STATE_PLAY,  // play the buffer
-    AO_STATE_BUSY,  // like AO_STATE_PLAY, but ao_read_data() is being called
-};
-
-#define IS_PLAYING(st) ((st) == AO_STATE_PLAY || (st) == AO_STATE_BUSY)
-
-struct ao_pull_state {
-    // Be very careful with the order when accessing planes.
-    struct mp_ring *buffers[MP_NUM_CHANNELS];
-
-    // AO_STATE_*
-    atomic_int state;
-
-    // Set when the buffer is intentionally not fed anymore in PLAY state.
-    atomic_bool draining;
-
-    // Set by the audio thread when an underflow was detected.
-    // It adds the number of samples.
-    atomic_int underflow;
-
-    // Device delay of the last written sample, in realtime.
-    atomic_llong end_time_us;
-
-    char *convert_buffer;
-};
-
-static void set_state(struct ao *ao, int new_state)
-{
-    struct ao_pull_state *p = ao->api_priv;
-    while (1) {
-        int old = atomic_load(&p->state);
-        if (old == AO_STATE_BUSY) {
-            // A spinlock, because some audio APIs don't want us to use mutexes.
-            mp_sleep_us(1);
-            continue;
-        }
-        if (atomic_compare_exchange_strong(&p->state, &old, new_state))
-            break;
-    }
-}
-
-static int get_space(struct ao *ao)
-{
-    struct ao_pull_state *p = ao->api_priv;
-
-    // Since the reader will read the last plane last, its free space is the
-    // minimum free space across all planes.
-    return mp_ring_available(p->buffers[ao->num_planes - 1]) / ao->sstride;
-}
-
-static int play(struct ao *ao, void **data, int samples, int flags)
-{
-    struct ao_pull_state *p = ao->api_priv;
-
-    int write_samples = get_space(ao);
-    write_samples = MPMIN(write_samples, samples);
-
-    // Write starting from the last plane - this way, the first plane will
-    // always contain the minimum amount of data readable across all planes
-    // (assumes the reader starts with the first plane).
-    int write_bytes = write_samples * ao->sstride;
-    for (int n = ao->num_planes - 1; n >= 0; n--) {
-        int r = mp_ring_write(p->buffers[n], data[n], write_bytes);
-        assert(r == write_bytes);
-    }
-
-    int state = atomic_load(&p->state);
-    if (!IS_PLAYING(state)) {
-        atomic_store(&p->draining, false);
-        atomic_store(&p->underflow, 0);
-        set_state(ao, AO_STATE_PLAY);
-        if (!ao->stream_silence)
-            ao->driver->resume(ao);
-    }
-
-    bool draining = write_samples == samples && (flags & AOPLAY_FINAL_CHUNK);
-    atomic_store(&p->draining, draining);
-
-    int underflow = atomic_fetch_and(&p->underflow, 0);
-    if (underflow)
-        MP_DBG(ao, "Audio underrun by %d samples.\n", underflow);
-
-    return write_samples;
-}
-
-// Read the given amount of samples in the user-provided data buffer. Returns
-// the number of samples copied. If there is not enough data (buffer underrun
-// or EOF), return the number of samples that could be copied, and fill the
-// rest of the user-provided buffer with silence.
-// This basically assumes that the audio device doesn't care about underruns.
-// If this is called in paused mode, it will always return 0.
-// The caller should set out_time_us to the expected delay until the last sample
-// reaches the speakers, in microseconds, using mp_time_us() as reference.
-int ao_read_data(struct ao *ao, void **data, int samples, int64_t out_time_us)
-{
-    assert(ao->api == &ao_api_pull);
-
-    struct ao_pull_state *p = ao->api_priv;
-    int full_bytes = samples * ao->sstride;
-    bool need_wakeup = false;
-    int bytes = 0;
-
-    // Play silence in states other than AO_STATE_PLAY.
-    if (!atomic_compare_exchange_strong(&p->state, &(int){AO_STATE_PLAY},
-                                        AO_STATE_BUSY))
-        goto end;
-
-    // Since the writer will write the first plane last, its buffered amount
-    // of data is the minimum amount across all planes.
-    int buffered_bytes = mp_ring_buffered(p->buffers[0]);
-    bytes = MPMIN(buffered_bytes, full_bytes);
-
-    if (full_bytes > bytes && !atomic_load(&p->draining)) {
-        atomic_fetch_add(&p->underflow, (full_bytes - bytes) / ao->sstride);
-        ao_underrun_event(ao);
-    }
-
-    if (bytes > 0)
-        atomic_store(&p->end_time_us, out_time_us);
-
-    for (int n = 0; n < ao->num_planes; n++) {
-        int r = mp_ring_read(p->buffers[n], data[n], bytes);
-        bytes = MPMIN(bytes, r);
-    }
-
-    // Half of the buffer played -> request more.
-    need_wakeup = buffered_bytes - bytes <= mp_ring_size(p->buffers[0]) / 2;
-
-    // Should never fail.
-    atomic_compare_exchange_strong(&p->state, &(int){AO_STATE_BUSY}, AO_STATE_PLAY);
-
-end:
-
-    if (need_wakeup)
-        ao->wakeup_cb(ao->wakeup_ctx);
-
-    // pad with silence (underflow/paused/eof)
-    for (int n = 0; n < ao->num_planes; n++)
-        af_fill_silence((char *)data[n] + bytes, full_bytes - bytes, ao->format);
-
-    ao_post_process_data(ao, data, samples);
-
-    return bytes / ao->sstride;
-}
-
-// Same as ao_read_data(), but convert data according to *fmt.
-// fmt->src_fmt and fmt->channels must be the same as the AO parameters.
-int ao_read_data_converted(struct ao *ao, struct ao_convert_fmt *fmt,
-                           void **data, int samples, int64_t out_time_us)
-{
-    assert(ao->api == &ao_api_pull);
-
-    struct ao_pull_state *p = ao->api_priv;
-    void *ndata[MP_NUM_CHANNELS] = {0};
-
-    if (!ao_need_conversion(fmt))
-        return ao_read_data(ao, data, samples, out_time_us);
-
-    assert(ao->format == fmt->src_fmt);
-    assert(ao->channels.num == fmt->channels);
-
-    bool planar = af_fmt_is_planar(fmt->src_fmt);
-    int planes = planar ? fmt->channels : 1;
-    int plane_samples = samples * (planar ? 1: fmt->channels);
-    int src_plane_size = plane_samples * af_fmt_to_bytes(fmt->src_fmt);
-    int dst_plane_size = plane_samples * fmt->dst_bits / 8;
-
-    int needed = src_plane_size * planes;
-    if (needed > talloc_get_size(p->convert_buffer) || !p->convert_buffer) {
-        talloc_free(p->convert_buffer);
-        p->convert_buffer = talloc_size(NULL, needed);
-    }
-
-    for (int n = 0; n < planes; n++)
-        ndata[n] = p->convert_buffer + n * src_plane_size;
-
-    int res = ao_read_data(ao, ndata, samples, out_time_us);
-
-    ao_convert_inplace(fmt, ndata, samples);
-    for (int n = 0; n < planes; n++)
-        memcpy(data[n], ndata[n], dst_plane_size);
-
-    return res;
-}
-
-static int control(struct ao *ao, enum aocontrol cmd, void *arg)
-{
-    if (ao->driver->control)
-        return ao->driver->control(ao, cmd, arg);
-    return CONTROL_UNKNOWN;
-}
-
-// Return size of the buffered data in seconds. Can include the device latency.
-// Basically, this returns how much data there is still to play, and how long
-// it takes until the last sample in the buffer reaches the speakers. This is
-// used for audio/video synchronization, so it's very important to implement
-// this correctly.
-static double get_delay(struct ao *ao)
-{
-    struct ao_pull_state *p = ao->api_priv;
-
-    int64_t end = atomic_load(&p->end_time_us);
-    int64_t now = mp_time_us();
-    double driver_delay = MPMAX(0, (end - now) / (1000.0 * 1000.0));
-    return mp_ring_buffered(p->buffers[0]) / (double)ao->bps + driver_delay;
-}
-
-static void reset(struct ao *ao)
-{
-    struct ao_pull_state *p = ao->api_priv;
-    if (!ao->stream_silence && ao->driver->reset)
-        ao->driver->reset(ao); // assumes the audio callback thread is stopped
-    set_state(ao, AO_STATE_NONE);
-    for (int n = 0; n < ao->num_planes; n++)
-        mp_ring_reset(p->buffers[n]);
-    atomic_store(&p->end_time_us, 0);
-}
-
-static void pause(struct ao *ao)
-{
-    if (!ao->stream_silence && ao->driver->reset)
-        ao->driver->reset(ao);
-    set_state(ao, AO_STATE_NONE);
-}
-
-static void resume(struct ao *ao)
-{
-    set_state(ao, AO_STATE_PLAY);
-    if (!ao->stream_silence)
-        ao->driver->resume(ao);
-}
-
-static bool get_eof(struct ao *ao)
-{
-    struct ao_pull_state *p = ao->api_priv;
-    // For simplicity, ignore the latency. Otherwise, we would have to run an
-    // extra thread to time it.
-    return mp_ring_buffered(p->buffers[0]) == 0;
-}
-
-static void drain(struct ao *ao)
-{
-    struct ao_pull_state *p = ao->api_priv;
-    int state = atomic_load(&p->state);
-    if (IS_PLAYING(state)) {
-        atomic_store(&p->draining, true);
-        // Wait for lower bound.
-        mp_sleep_us(mp_ring_buffered(p->buffers[0]) / (double)ao->bps * 1e6);
-        // And then poll for actual end. (Unfortunately, this code considers
-        // audio APIs which do not want you to use mutexes in the audio
-        // callback, and an extra semaphore would require slightly more effort.)
-        // Limit to arbitrary ~250ms max. waiting for robustness.
-        int64_t max = mp_time_us() + 250000;
-        while (mp_time_us() < max && !get_eof(ao))
-            mp_sleep_us(1);
-    }
-    reset(ao);
-}
-
-static void uninit(struct ao *ao)
-{
-    struct ao_pull_state *p = ao->api_priv;
-
-    ao->driver->uninit(ao);
-
-    talloc_free(p->convert_buffer);
-}
-
-static int init(struct ao *ao)
-{
-    struct ao_pull_state *p = ao->api_priv;
-    for (int n = 0; n < ao->num_planes; n++)
-        p->buffers[n] = mp_ring_new(ao, ao->buffer * ao->sstride);
-    atomic_store(&p->state, AO_STATE_NONE);
-    assert(ao->driver->resume);
-
-    if (ao->stream_silence)
-        ao->driver->resume(ao);
-
-    return 0;
-}
-
-const struct ao_driver ao_api_pull = {
-    .init = init,
-    .control = control,
-    .uninit = uninit,
-    .drain = drain,
-    .reset = reset,
-    .get_space = get_space,
-    .play = play,
-    .get_delay = get_delay,
-    .get_eof = get_eof,
-    .pause = pause,
-    .resume = resume,
-    .priv_size = sizeof(struct ao_pull_state),
-};
diff --git a/audio/out/push.c b/audio/out/push.c
deleted file mode 100644
index 92fd53631b..0000000000
--- a/audio/out/push.c
+++ /dev/null
@@ -1,572 +0,0 @@
-/*
- * This file is part of mpv.
- *
- * mpv is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * mpv is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <stddef.h>
-#include <pthread.h>
-#include <inttypes.h>
-#include <unistd.h>
-#include <errno.h>
-#include <assert.h>
-
-#include "osdep/io.h"
-
-#include "ao.h"
-#include "internal.h"
-#include "audio/format.h"
-
-#include "common/msg.h"
-#include "common/common.h"
-
-#include "input/input.h"
-
-#include "osdep/threads.h"
-#include "osdep/timer.h"
-#include "osdep/atomic.h"
-
-#include "audio/audio_buffer.h"
-
-struct ao_push_state {
-    pthread_t thread;
-    pthread_mutex_t lock;
-    pthread_cond_t wakeup;
-
-    // --- protected by lock
-
-    struct mp_audio_buffer *buffer;
-
-    uint8_t *silence[MP_NUM_CHANNELS];
-    int silence_samples;
-
-    bool terminate;
-    bool wait_on_ao;
-    bool still_playing;
-    bool need_wakeup;
-    bool paused;
-    bool initial_unblocked;
-
-    // Whether the current buffer contains the complete audio.
-    bool final_chunk;
-    double expected_end_time;
-
-    int wakeup_pipe[2];
-};
-
-// lock must be held
-static void wakeup_playthread(struct ao *ao)
-{
-    struct ao_push_state *p = ao->api_priv;
-    if (ao->driver->wakeup)
-        ao->driver->wakeup(ao);
-    p->need_wakeup = true;
-    pthread_cond_signal(&p->wakeup);
-}
-
-static int control(struct ao *ao, enum aocontrol cmd, void *arg)
-{
-    int r = CONTROL_UNKNOWN;
-    if (ao->driver->control) {
-        struct ao_push_state *p = ao->api_priv;
-        pthread_mutex_lock(&p->lock);
-        r = ao->driver->control(ao, cmd, arg);
-        pthread_mutex_unlock(&p->lock);
-    }
-    return r;
-}
-
-static double unlocked_get_delay(struct ao *ao)
-{
-    struct ao_push_state *p = ao->api_priv;
-    double driver_delay = 0;
-    if (ao->driver->get_delay)
-        driver_delay = ao->driver->get_delay(ao);
-    return driver_delay + mp_audio_buffer_seconds(p->buffer);
-}
-
-static double get_delay(struct ao *ao)
-{
-    struct ao_push_state *p = ao->api_priv;
-    pthread_mutex_lock(&p->lock);
-    double delay = unlocked_get_delay(ao);
-    pthread_mutex_unlock(&p->lock);
-    return delay;
-}
-
-static void reset(struct ao *ao)
-{
-    struct ao_push_state *p = ao->api_priv;
-    pthread_mutex_lock(&p->lock);
-    if (ao->driver->reset)
-        ao->driver->reset(ao);
-    mp_audio_buffer_clear(p->buffer);
-    p->paused = false;
-    if (p->still_playing)
-        wakeup_playthread(ao);
-    p->still_playing = false;
-    pthread_mutex_unlock(&p->lock);
-}
-
-static void audio_pause(struct ao *ao)
-{
-    struct ao_push_state *p = ao->api_priv;
-    pthread_mutex_lock(&p->lock);
-    if (ao->driver->pause)
-        ao->driver->pause(ao);
-    p->paused = true;
-    wakeup_playthread(ao);
-    pthread_mutex_unlock(&p->lock);
-}
-
-static void resume(struct ao *ao)
-{
-    struct ao_push_state *p = ao->api_priv;
-    pthread_mutex_lock(&p->lock);
-    if (ao->driver->resume)
-        ao->driver->resume(ao);
-    p->paused = false;
-    p->expected_end_time = 0;
-    wakeup_playthread(ao);
-    pthread_mutex_unlock(&p->lock);
-}
-
-static void drain(struct ao *ao)
-{
-    struct ao_push_state *p = ao->api_priv;
-    double maxbuffer = ao->buffer / (double)ao->samplerate + 1;
-
-    MP_VERBOSE(ao, "draining...\n");
-
-    pthread_mutex_lock(&p->lock);
-    if (p->paused)
-        goto done;
-
-    p->final_chunk = true;
-    wakeup_playthread(ao);
-
-    // Wait until everything is done. Since the audio API (especially ALSA)
-    // can't be trusted to do this right, and we're hard-blocking here, apply
-    // an upper bound timeout.
-    struct timespec until = mp_rel_time_to_timespec(maxbuffer);
-    while (p->still_playing && mp_audio_buffer_samples(p->buffer) > 0) {
-        if (pthread_cond_timedwait(&p->wakeup, &p->lock, &until)) {
-            MP_WARN(ao, "Draining is taking too long, aborting.\n");
-            goto done;
-        }
-    }
-
-    if (ao->driver->drain) {
-        ao->driver->drain(ao);
-    } else {
-        double time = unlocked_get_delay(ao);
-        mp_sleep_us(MPMIN(time, maxbuffer) * 1e6);
-    }
-
-done:
-    pthread_mutex_unlock(&p->lock);
-
-    reset(ao);
-}
-
-static int unlocked_get_space(struct ao *ao)
-{
-    struct ao_push_state *p = ao->api_priv;
-    int space = mp_audio_buffer_get_write_available(p->buffer);
-    if (ao->driver->get_space) {
-        int align = af_format_sample_alignment(ao->format);
-        // The following code attempts to keep the total buffered audio to
-        // ao->buffer in order to improve latency.
-        int device_space = ao->driver->get_space(ao);
-        int device_buffered = ao->device_buffer - device_space;
-        int soft_buffered = mp_audio_buffer_samples(p->buffer);
-        // The extra margin helps avoiding too many wakeups if the AO is fully
-        // byte based and doesn't do proper chunked processing.
-        int min_buffer = ao->buffer + 64;
-        int missing = min_buffer - device_buffered - soft_buffered;
-        missing = (missing + align - 1) / align * align;
-        // But always keep the device's buffer filled as much as we can.
-        int device_missing = device_space - soft_buffered;
-        missing = MPMAX(missing, device_missing);
-        space = MPMIN(space, missing);
-        space = MPMAX(0, space);
-    }
-    return space;
-}
-
-static int get_space(struct ao *ao)
-{
-    struct ao_push_state *p = ao->api_priv;
-    pthread_mutex_lock(&p->lock);
-    int space = unlocked_get_space(ao);
-    pthread_mutex_unlock(&p->lock);
-    return space;
-}
-
-static bool get_eof(struct ao *ao)
-{
-    struct ao_push_state *p = ao->api_priv;
-    pthread_mutex_lock(&p->lock);
-    bool eof = !p->still_playing;
-    pthread_mutex_unlock(&p->lock);
-    return eof;
-}
-
-static int play(struct ao *ao, void **data, int samples, int flags)
-{
-    struct ao_push_state *p = ao->api_priv;
-
-    pthread_mutex_lock(&p->lock);
-
-    int write_samples = mp_audio_buffer_get_write_available(p->buffer);
-    write_samples = MPMIN(write_samples, samples);
-
-    MP_TRACE(ao, "samples=%d flags=%d r=%d\n", samples, flags, write_samples);
-
-    if (write_samples < samples)
-        flags = flags & ~AOPLAY_FINAL_CHUNK;
-    bool is_final = flags & AOPLAY_FINAL_CHUNK;
-
-    mp_audio_buffer_append(p->buffer, data, samples);
-
-    bool got_data = write_samples > 0 || p->paused || p->final_chunk != is_final;
-
-    p->final_chunk = is_final;
-    p->paused = false;
-    if (got_data) {
-        p->still_playing = true;
-        p->expected_end_time = 0;
-
-        // If we don't have new data, the decoder thread basically promises it
-        // will send new data as soon as it's available.
-        wakeup_playthread(ao);
-    }
-    pthread_mutex_unlock(&p->lock);
-    return write_samples;
-}
-
-static bool realloc_silence(struct ao *ao, int samples)
-{
-    struct ao_push_state *p = ao->api_priv;
-
-    if (samples <= 0 || !af_fmt_is_pcm(ao->format))
-        return false;
-
-    if (samples > p->silence_samples) {
-        talloc_free(p->silence[0]);
-
-        int bytes = af_fmt_to_bytes(ao->format) * samples * ao->channels.num;
-        p->silence[0] = talloc_size(p, bytes);
-        for (int n = 1; n < MP_NUM_CHANNELS; n++)
-            p->silence[n] = p->silence[0];
-        p->silence_samples = samples;
-
-        af_fill_silence(p->silence[0], bytes, ao->format);
-    }
-
-    return true;
-}
-
-// called locked
-static void ao_play_data(struct ao *ao)
-{
-    struct ao_push_state *p = ao->api_priv;
-    int space = ao->driver->get_space(ao);
-    bool play_silence = p->paused || (ao->stream_silence && !p->still_playing);
-    space = MPMAX(space, 0);
-    if (space % ao->period_size)
-        MP_ERR(ao, "Audio device reports unaligned available buffer size.\n");
-    uint8_t **planes;
-    int samples;
-    if (play_silence) {
-        planes = p->silence;
-        samples = realloc_silence(ao, space) ? space : 0;
-    } else {
-        mp_audio_buffer_peek(p->buffer, &planes, &samples);
-    }
-    int max = samples;
-    if (samples > space)
-        samples = space;
-    int flags = 0;
-    if (p->final_chunk && samples == max) {
-        flags |= AOPLAY_FINAL_CHUNK;
-    } else {
-        samples = samples / ao->period_size * ao->period_size;
-    }
-    MP_STATS(ao, "start ao fill");
-    ao_post_process_data(ao, (void **)planes, samples);
-    int r = 0;
-    if (samples)
-        r = ao->driver->play(ao, (void **)planes, samples, flags);
-    MP_STATS(ao, "end ao fill");
-    if (r > samples) {
-        MP_ERR(ao, "Audio device returned nonsense value.\n");
-        r = samples;
-    } else if (r < 0) {
-        MP_ERR(ao, "Error writing audio to device.\n");
-    } else if (r != samples) {
-        MP_ERR(ao, "Audio device returned broken buffer state (sent %d samples, "
-               "got %d samples, %d period%s)!\n", samples, r,
-               ao->period_size, flags & AOPLAY_FINAL_CHUNK ? " final" : "");
-    }
-    r = MPMAX(r, 0);
-    // Probably can't copy the rest of the buffer due to period alignment.
-    bool stuck_eof = r <= 0 && space >= max && samples > 0;
-    if ((flags & AOPLAY_FINAL_CHUNK) && stuck_eof) {
-        MP_ERR(ao, "Audio output driver seems to ignore AOPLAY_FINAL_CHUNK.\n");
-        r = max;
-    }
-    if (!play_silence)
-        mp_audio_buffer_skip(p->buffer, r);
-    if (r > 0)
-        p->expected_end_time = 0;
-    // Nothing written, but more input data than space - this must mean the
-    // AO's get_space() doesn't do period alignment correctly.
-    bool stuck = r == 0 && max >= space && space > 0;
-    if (stuck)
-        MP_ERR(ao, "Audio output is reporting incorrect buffer status.\n");
-    // Wait until space becomes available. Also wait if we actually wrote data,
-    // so the AO wakes us up properly if it needs more data.
-    p->wait_on_ao = space == 0 || r > 0 || stuck;
-    p->still_playing |= r > 0 && !play_silence;
-    // If we just filled the AO completely (r == space), don't refill for a
-    // while. Prevents wakeup feedback with byte-granular AOs.
-    int needed = unlocked_get_space(ao);
-    bool more = needed >= (r == space ? ao->device_buffer / 4 : 1) && !stuck &&
-                !(flags & AOPLAY_FINAL_CHUNK);
-    if (more)
-        ao->wakeup_cb(ao->wakeup_ctx); // request more data
-    if (!samples && space && !ao->driver->reports_underruns && p->still_playing)
-        ao_underrun_event(ao);
-    MP_TRACE(ao, "in=%d flags=%d space=%d r=%d wa/pl=%d/%d needed=%d more=%d\n",
-             max, flags, space, r, p->wait_on_ao, p->still_playing, needed, more);
-}
-
-static void *playthread(void *arg)
-{
-    struct ao *ao = arg;
-    struct ao_push_state *p = ao->api_priv;
-    mpthread_set_name("ao");
-    pthread_mutex_lock(&p->lock);
-    while (!p->terminate) {
-        bool blocked = ao->driver->initially_blocked && !p->initial_unblocked;
-        bool playing = (!p->paused || ao->stream_silence) && !blocked;
-        if (playing)
-            ao_play_data(ao);
-
-        if (!p->need_wakeup) {
-            MP_STATS(ao, "start audio wait");
-            if (!p->wait_on_ao || !playing) {
-                // Avoid busy waiting, because the audio API will still report
-                // that it needs new data, even if we're not ready yet, or if
-                // get_space() decides that the amount of audio buffered in the
-                // device is enough, and p->buffer can be empty.
-                // The most important part is that the decoder is woken up, so
-                // that the decoder will wake up us in turn.
-                MP_TRACE(ao, "buffer inactive.\n");
-
-                bool was_playing = p->still_playing;
-                double timeout = -1;
-                if (p->still_playing && !p->paused && p->final_chunk &&
-                    !mp_audio_buffer_samples(p->buffer))
-                {
-                    double now = mp_time_sec();
-                    if (!p->expected_end_time)
-                        p->expected_end_time = now + unlocked_get_delay(ao);
-                    if (p->expected_end_time < now) {
-                        p->still_playing = false;
-                    } else {
-                        timeout = p->expected_end_time - now;
-                    }
-                }
-
-                if (was_playing && !p->still_playing)
-                    ao->wakeup_cb(ao->wakeup_ctx);
-                pthread_cond_signal(&p->wakeup); // for draining
-
-                if (p->still_playing && timeout > 0) {
-                    struct timespec ts = mp_rel_time_to_timespec(timeout);
-                    pthread_cond_timedwait(&p->wakeup, &p->lock, &ts);
-                } else {
-                    pthread_cond_wait(&p->wakeup, &p->lock);
-                }
-            } else {
-                // Wait until the device wants us to write more data to it.
-                if (!ao->driver->wait || ao->driver->wait(ao, &p->lock) < 0) {
-                    // Fallback to guessing.
-                    double timeout = 0;
-                    if (ao->driver->get_delay)
-                        timeout = ao->driver->get_delay(ao);
-                    timeout *= 0.25; // wake up if 25% played
-                    if (!p->need_wakeup) {
-                        struct timespec ts = mp_rel_time_to_timespec(timeout);
-                        pthread_cond_timedwait(&p->wakeup, &p->lock, &ts);
-                    }
-                }
-            }
-            MP_STATS(ao, "end audio wait");
-        }
-        p->need_wakeup = false;
-    }
-    pthread_mutex_unlock(&p->lock);
-    return NULL;
-}
-
-static void destroy_no_thread(struct ao *ao)
-{
-    struct ao_push_state *p = ao->api_priv;
-
-    ao->driver->uninit(ao);
-
-    for (int n = 0; n < 2; n++) {
-        int h = p->wakeup_pipe[n];
-        if (h >= 0)
-            close(h);
-    }
-
-    pthread_cond_destroy(&p->wakeup);
-    pthread_mutex_destroy(&p->lock);
-}
-
-static void uninit(struct ao *ao)
-{
-    struct ao_push_state *p = ao->api_priv;
-
-    pthread_mutex_lock(&p->lock);
-    p->terminate = true;
-    wakeup_playthread(ao);
-    pthread_mutex_unlock(&p->lock);
-
-    pthread_join(p->thread, NULL);
-
-    destroy_no_thread(ao);
-}
-
-static int init(struct ao *ao)
-{
-    struct ao_push_state *p = ao->api_priv;
-
-    pthread_mutex_init(&p->lock, NULL);
-    pthread_cond_init(&p->wakeup, NULL);
-    mp_make_wakeup_pipe(p->wakeup_pipe);
-
-    if (ao->device_buffer <= 0) {
-        MP_FATAL(ao, "Couldn't probe device buffer size.\n");
-        goto err;
-    }
-
-    p->buffer = mp_audio_buffer_create(ao);
-    mp_audio_buffer_reinit_fmt(p->buffer, ao->format,
-                               &ao->channels, ao->samplerate);
-    mp_audio_buffer_preallocate_min(p->buffer, ao->buffer);
-    if (pthread_create(&p->thread, NULL, playthread, ao))
-        goto err;
-    return 0;
-err:
-    destroy_no_thread(ao);
-    return -1;
-}
-
-const struct ao_driver ao_api_push = {
-    .init = init,
-    .control = control,
-    .uninit = uninit,
-    .reset = reset,
-    .get_space = get_space,
-    .play = play,
-    .get_delay = get_delay,
-    .pause = audio_pause,
-    .resume = resume,
-    .drain = drain,
-    .get_eof = get_eof,
-    .priv_size = sizeof(struct ao_push_state),
-};
-
-// Must be called locked.
-int ao_play_silence(struct ao *ao, int samples)
-{
-    assert(ao->api == &ao_api_push);
-
-    struct ao_push_state *p = ao->api_priv;
-
-    if (!realloc_silence(ao, samples) || !ao->driver->play)
-        return 0;
-
-    return ao->driver->play(ao, (void **)p->silence, samples, 0);
-}
-
-void ao_unblock(struct ao *ao)
-{
-    if (ao->api == &ao_api_push) {
-        struct ao_push_state *p = ao->api_priv;
-        pthread_mutex_lock(&p->lock);
-        p->need_wakeup = true;
-        p->initial_unblocked = true;
-        wakeup_playthread(ao);
-        pthread_cond_signal(&p->wakeup);
-        pthread_mutex_unlock(&p->lock);
-    }
-}
-
-#ifndef __MINGW32__
-
-#include <poll.h>
-
-#define MAX_POLL_FDS 20
-
-// Call poll() for the given fds. This will extend the given fds with the
-// wakeup pipe, so ao_wakeup_poll() will basically interrupt this function.
-// Unlocks the lock temporarily.
-// Returns <0 on error, 0 on success, 1 if the caller should return immediately.
-int ao_wait_poll(struct ao *ao, struct pollfd *fds, int num_fds,
-                 pthread_mutex_t *lock)
-{
-    struct ao_push_state *p = ao->api_priv;
-    assert(ao->api == &ao_api_push);
-    assert(&p->lock == lock);
-
-    if (num_fds >= MAX_POLL_FDS || p->wakeup_pipe[0] < 0)
-        return -1;
-
-    struct pollfd p_fds[MAX_POLL_FDS];
-    memcpy(p_fds, fds, num_fds * sizeof(p_fds[0]));
-    p_fds[num_fds] = (struct pollfd){
-        .fd = p->wakeup_pipe[0],
-        .events = POLLIN,
-    };
-
-    pthread_mutex_unlock(&p->lock);
-    int r = poll(p_fds, num_fds + 1, -1);
-    r = r < 0 ? -errno : 0;
-    pthread_mutex_lock(&p->lock);
-
-    memcpy(fds, p_fds, num_fds * sizeof(fds[0]));
-    bool wakeup = false;
-    if (p_fds[num_fds].revents & POLLIN) {
-        wakeup = true;
-        // might "drown" some wakeups, but that's ok for our use-case
-        mp_flush_wakeup_pipe(p->wakeup_pipe[0]);
-    }
-    return (r >= 0 || r == -EINTR) ? wakeup : -1;
-}
-
-void ao_wakeup_poll(struct ao *ao)
-{
-    assert(ao->api == &ao_api_push);
-    struct ao_push_state *p = ao->api_priv;
-
-    (void)write(p->wakeup_pipe[1], &(char){0}, 1);
-}
-
-#endif
diff --git a/wscript_build.py b/wscript_build.py
index 34398c10cd..1c7a681a77 100644
--- a/wscript_build.py
+++ b/wscript_build.py
@@ -262,8 +262,7 @@ def build(ctx):
         ( "audio/out/ao_wasapi.c",               "wasapi" ),
         ( "audio/out/ao_wasapi_changenotify.c",  "wasapi" ),
         ( "audio/out/ao_wasapi_utils.c",         "wasapi" ),
-        ( "audio/out/pull.c" ),
-        ( "audio/out/push.c" ),
+        ( "audio/out/buffer.c" ),
 
         ## Core
         ( "common/av_common.c" ),