atomics: switch to C11 stdatomic.h

In my opinion, we shouldn't use atomics at all, but ok.

This switches the mpv code to use C11 stdatomic.h, and for compilers
that don't support stdatomic.h yet, we emulate the subset used by mpv
using the builtins commonly provided by gcc and clang.

This supersedes an earlier similar attempt by Kovensky. That attempt
unfortunately relied on a big copypasted freebsd header (which also
depended on much more highly compiler-specific functionality, defined
reserved symbols, etc.), so it had to be NIH'ed.

Some issues:
- C11 says default initialization of atomics "produces a valid state",
  but it's not sure whether the stored value is really 0. But we rely on
  this.
- I'm pretty sure our use of the __atomic... builtins is/was incorrect.
  We don't use atomic load/store intrinsics, and access stuff directly.
- Our wrapper actually does stricter typechecking than the stdatomic.h
  implementation by gcc 4.9. We make the atomic types incompatible with
  normal types by wrapping them into structs. (The FreeBSD wrapper does
  the same.)
- I couldn't test on MinGW.
This commit is contained in:
wm4 2014-05-21 01:04:47 +02:00
parent 2f65f0e254
commit 8e7cf4bc99
9 changed files with 104 additions and 79 deletions

View File

@ -33,7 +33,6 @@
#include "audio/out/ao_wasapi_utils.h"
#include "audio/format.h"
#include "compat/atomics.h"
#include "osdep/timer.h"
#include "osdep/io.h"
@ -43,7 +42,7 @@
do { if ((unk) != NULL) { release; (unk) = NULL; } } while(0)
static double get_device_delay(struct wasapi_state *state) {
UINT64 sample_count = state->sample_count;
UINT64 sample_count = atomic_load(&state->sample_count);
UINT64 position, qpc_position;
HRESULT hr;
@ -100,8 +99,7 @@ static void thread_feed(struct ao *ao)
frame_count, 0);
EXIT_ON_ERROR(hr);
mp_atomic_add_and_fetch(&state->sample_count, frame_count);
mp_memory_barrier();
atomic_fetch_add(&state->sample_count, frame_count);
return;
exit_label:
@ -318,8 +316,7 @@ static void audio_pause(struct ao *ao)
IAudioClient_Stop(state->pAudioClientProxy);
IAudioClient_Reset(state->pAudioClientProxy);
state->sample_count = 0;
mp_memory_barrier();
atomic_store(&state->sample_count, 0);
}
static void audio_resume(struct ao *ao)

View File

@ -28,6 +28,8 @@
#include <mmdeviceapi.h>
#include <avrt.h>
#include "compat/atomics.h"
typedef struct wasapi_state {
struct mp_log *log;
HANDLE threadLoop;
@ -85,7 +87,7 @@ typedef struct wasapi_state {
/* WASAPI internal clock information, for estimating delay */
IAudioClock *pAudioClock;
UINT64 clock_frequency; /* scale for the "samples" returned by the clock */
UINT64 sample_count; /* the amount of samples per channel written to a GetBuffer buffer */
atomic_ullong sample_count; /* the amount of samples per channel written to a GetBuffer buffer */
LARGE_INTEGER qpc_frequency; /* frequency of windows' high resolution timer */
int opt_exclusive;

View File

@ -426,7 +426,7 @@ static int init_clock(struct wasapi_state *state) {
QueryPerformanceFrequency(&state->qpc_frequency);
state->sample_count = 0;
atomic_store(&state->sample_count, 0);
MP_VERBOSE(state, "IAudioClock::GetFrequency gave a frequency of %"PRIu64".\n", (uint64_t) state->clock_frequency);

View File

@ -45,13 +45,13 @@ struct ao_pull_state {
struct mp_ring *buffers[MP_NUM_CHANNELS];
// AO_STATE_*
int state;
atomic_int state;
// Whether buffers[] can be accessed.
int ready;
atomic_bool ready;
// Device delay of the last written sample, in realtime.
int64_t end_time_us;
atomic_llong end_time_us;
};
static int get_space(struct ao *ao)
@ -77,10 +77,9 @@ static int play(struct ao *ao, void **data, int samples, int flags)
int r = mp_ring_write(p->buffers[n], data[n], write_bytes);
assert(r == write_bytes);
}
if (p->state != AO_STATE_PLAY) {
p->end_time_us = 0;
p->state = AO_STATE_PLAY;
mp_memory_barrier();
if (atomic_load(&p->state) != AO_STATE_PLAY) {
atomic_store(&p->end_time_us, 0);
atomic_store(&p->state, AO_STATE_PLAY);
if (ao->driver->resume)
ao->driver->resume(ao);
}
@ -103,8 +102,7 @@ int ao_read_data(struct ao *ao, void **data, int samples, int64_t out_time_us)
struct ao_pull_state *p = ao->api_priv;
int full_bytes = samples * ao->sstride;
mp_memory_barrier();
if (!p->ready) {
if (!atomic_load(&p->ready)) {
for (int n = 0; n < ao->num_planes; n++)
af_fill_silence(data[n], full_bytes, ao->format);
return 0;
@ -116,10 +114,9 @@ int ao_read_data(struct ao *ao, void **data, int samples, int64_t out_time_us)
int bytes = MPMIN(buffered_bytes, full_bytes);
if (bytes > 0)
p->end_time_us = out_time_us;
atomic_store(&p->end_time_us, out_time_us);
mp_memory_barrier();
if (p->state == AO_STATE_PAUSE)
if (atomic_load(&p->state) == AO_STATE_PAUSE)
bytes = 0;
for (int n = 0; n < ao->num_planes; n++) {
@ -154,8 +151,7 @@ static float get_delay(struct ao *ao)
{
struct ao_pull_state *p = ao->api_priv;
mp_memory_barrier();
int64_t end = p->end_time_us;
int64_t end = atomic_load(&p->end_time_us);
int64_t now = mp_time_us();
double driver_delay = MPMAX(0, (end - now) / (1000.0 * 1000.0));
return mp_ring_buffered(p->buffers[0]) / (double)ao->bps + driver_delay;
@ -168,15 +164,12 @@ static void reset(struct ao *ao)
ao->driver->reset(ao);
// Not like this is race-condition free. It will work if ->reset
// stops the audio callback, though.
p->ready = 0;
p->state = AO_STATE_NONE;
mp_memory_barrier();
atomic_store(&p->ready, false);
atomic_store(&p->state, AO_STATE_NONE);
for (int n = 0; n < ao->num_planes; n++)
mp_ring_reset(p->buffers[n]);
p->end_time_us = 0;
mp_memory_barrier();
p->ready = 1;
mp_memory_barrier();
atomic_store(&p->end_time_us, 0);
atomic_store(&p->ready, true);
}
static void pause(struct ao *ao)
@ -184,15 +177,13 @@ static void pause(struct ao *ao)
struct ao_pull_state *p = ao->api_priv;
if (ao->driver->pause)
ao->driver->pause(ao);
p->state = AO_STATE_PAUSE;
mp_memory_barrier();
atomic_store(&p->state, AO_STATE_PAUSE);
}
static void resume(struct ao *ao)
{
struct ao_pull_state *p = ao->api_priv;
p->state = AO_STATE_PLAY;
mp_memory_barrier();
atomic_store(&p->state, AO_STATE_PLAY);
if (ao->driver->resume)
ao->driver->resume(ao);
}
@ -207,8 +198,7 @@ static int init(struct ao *ao)
struct ao_pull_state *p = ao->api_priv;
for (int n = 0; n < ao->num_planes; n++)
p->buffers[n] = mp_ring_new(ao, ao->buffer * ao->sstride);
p->ready = 1;
mp_memory_barrier();
atomic_store(&p->ready, true);
return 0;
}

View File

@ -66,7 +66,7 @@ struct mp_log_root {
/* This is incremented every time the msglevels must be reloaded.
* (This is perhaps better than maintaining a globally accessible and
* synchronized mp_log tree.) */
int64_t reload_counter;
atomic_ulong reload_counter;
};
struct mp_log {
@ -75,7 +75,7 @@ struct mp_log {
const char *verbose_prefix;
int level; // minimum log level for any outputs
int terminal_level; // minimum log level for terminal output
int64_t reload_counter;
atomic_ulong reload_counter;
};
struct mp_log_buffer {
@ -119,7 +119,7 @@ static void update_loglevel(struct mp_log *log)
log->level = MPMAX(log->level, log->root->buffers[n]->level);
if (log->root->stats_file)
log->level = MPMAX(log->level, MSGL_STATS);
log->reload_counter = log->root->reload_counter;
atomic_store(&log->reload_counter, atomic_load(&log->root->reload_counter));
pthread_mutex_unlock(&mp_msg_lock);
}
@ -127,10 +127,10 @@ static void update_loglevel(struct mp_log *log)
// Thread-safety: see mp_msg().
bool mp_msg_test(struct mp_log *log, int lev)
{
mp_memory_barrier();
if (!log->root || log->root->mute)
struct mp_log_root *root = log->root;
if (!root || root->mute)
return false;
if (log->reload_counter != log->root->reload_counter)
if (atomic_load(&log->reload_counter) != atomic_load(&root->reload_counter))
update_loglevel(log);
return lev <= log->level;
}
@ -395,9 +395,11 @@ void mp_msg_init(struct mpv_global *global)
assert(!global->log);
struct mp_log_root *root = talloc_zero(NULL, struct mp_log_root);
root->global = global;
root->header = true;
root->reload_counter = 1;
*root = (struct mp_log_root){
.global = global,
.header = true,
.reload_counter = ATOMIC_VAR_INIT(1),
};
struct mp_log dummy = { .root = root };
struct mp_log *log = mp_log_new(root, &dummy, "");
@ -429,8 +431,7 @@ void mp_msg_update_msglevels(struct mpv_global *global)
talloc_free(root->msglevels);
root->msglevels = talloc_strdup(root, global->opts->msglevels);
mp_atomic_add_and_fetch(&root->reload_counter, 1);
mp_memory_barrier();
atomic_fetch_add(&root->reload_counter, 1);
pthread_mutex_unlock(&mp_msg_lock);
}
@ -475,9 +476,7 @@ struct mp_log_buffer *mp_msg_log_buffer_new(struct mpv_global *global,
MP_TARRAY_APPEND(root, root->buffers, root->num_buffers, buffer);
mp_atomic_add_and_fetch(&root->reload_counter, 1);
mp_memory_barrier();
atomic_fetch_add(&root->reload_counter, 1);
pthread_mutex_unlock(&mp_msg_lock);
return buffer;
@ -510,9 +509,7 @@ found:
}
talloc_free(buffer);
mp_atomic_add_and_fetch(&root->reload_counter, 1);
mp_memory_barrier();
atomic_fetch_add(&root->reload_counter, 1);
pthread_mutex_unlock(&mp_msg_lock);
}

View File

@ -16,17 +16,47 @@
* with mpv. If not, see <http://www.gnu.org/licenses/>.
*/
// At this point both gcc and clang had __sync_synchronize support for some
// time. We only support a full memory barrier.
#ifndef MP_ATOMICS_H
#define MP_ATOMICS_H
#include <inttypes.h>
#include "config.h"
#if HAVE_STDATOMIC
#include <stdatomic.h>
#else
// Emulate the parts of C11 stdatomic.h needed by mpv.
// Still relies on gcc/clang atomic builtins.
typedef struct { volatile unsigned long v; } atomic_ulong;
typedef struct { volatile int v; } atomic_int;
typedef struct { volatile _Bool v; } atomic_bool;
typedef struct { volatile long long v; } atomic_llong;
typedef struct { volatile uint_least32_t v; } atomic_uint_least32_t;
typedef struct { volatile unsigned long long v; } atomic_ullong;
#define ATOMIC_VAR_INIT(x) \
{.v = (x)}
#define atomic_load(p) \
(mp_memory_barrier(), (p)->v)
#define atomic_store(p, val) \
((p)->v = (val), mp_memory_barrier())
#if HAVE_ATOMIC_BUILTINS
# define mp_memory_barrier() __atomic_thread_fence(__ATOMIC_SEQ_CST)
# define mp_atomic_add_and_fetch(a, b) __atomic_add_fetch(a, b,__ATOMIC_SEQ_CST)
# define mp_memory_barrier() \
__atomic_thread_fence(__ATOMIC_SEQ_CST)
# define atomic_fetch_add(a, b) \
__atomic_add_fetch(&(a)->v, b, __ATOMIC_SEQ_CST)
#elif HAVE_SYNC_BUILTINS
# define mp_memory_barrier() __sync_synchronize()
# define mp_atomic_add_and_fetch(a, b) __sync_add_and_fetch(a, b)
# define mp_memory_barrier() \
__sync_synchronize()
# define atomic_fetch_add(a, b) \
(__sync_add_and_fetch(&(a)->v, b), mp_memory_barrier())
#else
# error "this should have been a configuration error, report a bug please"
#endif
#endif /* else HAVE_STDATOMIC */
#endif

View File

@ -30,19 +30,17 @@ struct mp_ring {
/* Positions of the first readable/writeable chunks. Do not read this
* fields but use the atomic private accessors `mp_ring_get_wpos`
* and `mp_ring_get_rpos`. */
uint32_t rpos, wpos;
atomic_uint_least32_t rpos, wpos;
};
static uint32_t mp_ring_get_wpos(struct mp_ring *buffer)
{
mp_memory_barrier();
return buffer->wpos;
return atomic_load(&buffer->wpos);
}
static uint32_t mp_ring_get_rpos(struct mp_ring *buffer)
{
mp_memory_barrier();
return buffer->rpos;
return atomic_load(&buffer->rpos);
}
struct mp_ring *mp_ring_new(void *talloc_ctx, int size)
@ -61,8 +59,7 @@ int mp_ring_drain(struct mp_ring *buffer, int len)
{
int buffered = mp_ring_buffered(buffer);
int drain_len = FFMIN(len, buffered);
mp_atomic_add_and_fetch(&buffer->rpos, drain_len);
mp_memory_barrier();
atomic_fetch_add(&buffer->rpos, drain_len);
return drain_len;
}
@ -81,8 +78,7 @@ int mp_ring_read(struct mp_ring *buffer, unsigned char *dest, int len)
memcpy(dest, buffer->buffer + read_ptr, len1);
memcpy(dest + len1, buffer->buffer, len2);
mp_atomic_add_and_fetch(&buffer->rpos, read_len);
mp_memory_barrier();
atomic_fetch_add(&buffer->rpos, read_len);
return read_len;
}
@ -100,16 +96,15 @@ int mp_ring_write(struct mp_ring *buffer, unsigned char *src, int len)
memcpy(buffer->buffer + write_ptr, src, len1);
memcpy(buffer->buffer, src + len1, len2);
mp_atomic_add_and_fetch(&buffer->wpos, write_len);
mp_memory_barrier();
atomic_fetch_add(&buffer->wpos, write_len);
return write_len;
}
void mp_ring_reset(struct mp_ring *buffer)
{
buffer->wpos = buffer->rpos = 0;
mp_memory_barrier();
atomic_store(&buffer->wpos, 0);
atomic_store(&buffer->rpos, 0);
}
int mp_ring_available(struct mp_ring *buffer)

View File

@ -268,7 +268,7 @@ exit 0
# If autodetection is available then the third state is: auto
_pkg_config=auto
_cc=auto
test "$CC" && _cc="$CC"
test -n "$CC" && _cc="$CC"
_opt=-O2
_prefix="/usr/local"
options_state_machine init
@ -485,19 +485,25 @@ extra_cflags="$extra_cflags -D_REENTRANT -D_THREAD_SAFE"
compile_check waftools/fragments/pthreads.c "$_ld_pthread" || die "Unable to find pthreads support."
echores "yes"
check_statement_libs "compiler support for __atomic built-ins" auto ATOMIC_BUILTINS \
check_statement_libs "support for stdatomic.h" auto STDATOMIC \
stdatomic.h '_Atomic int test = ATOMIC_VAR_INIT(123); int test2 = atomic_load(&test)'
_stdatomic=$(defretval)
_atomic=auto
test "$_stdatomic" = yes && _atomic=no
check_statement_libs "compiler support for __atomic built-ins" $_atomic ATOMIC_BUILTINS \
stdint.h 'int64_t test = 0; test = __atomic_add_fetch(&test, 1, __ATOMIC_SEQ_CST)' \
" " "-latomic"
_atomic=$(defretval)
_sync=auto
test "$_atomic" = yes && _sync=no
(test "$_atomic" = yes || test "$_stdatomic" = yes ) && _sync=no
check_statement_libs "compiler support for __sync built-ins" $_sync SYNC_BUILTINS \
stdint.h 'int64_t test = 0; test = __sync_add_and_fetch(&test, 1)'
_sync=$(defretval)
if test "$_atomic" = no && test "$_sync" = no ; then
die "your compiler must support either __atomic or __sync built-ins."
if test "$_atomic" = no && test "$_sync" = no && test "$_stdatomic" = no ; then
die "your compiler must support either stdatomic.h, or __atomic, or __sync built-ins."
fi
check_compile "iconv" $_iconv ICONV waftools/fragments/iconv.c " " "-liconv" "-liconv $_ld_dl"

14
wscript
View File

@ -104,25 +104,33 @@ main_dependencies = [
'func': check_pthreads,
'req': True,
'fmsg': 'Unable to find pthreads support.'
}, {
'name': 'stdatomic',
'desc': 'stdatomic.h',
'func':
check_statement('stdatomic.h',
'_Atomic int test = ATOMIC_VAR_INIT(123);'
'int test2 = atomic_load(&test)')
}, {
'name': 'atomic-builtins',
'desc': 'compiler support for __atomic built-ins',
'func': check_libs(['atomic'],
check_statement('stdint.h',
'int64_t test = 0;'
'test = __atomic_add_fetch(&test, 1, __ATOMIC_SEQ_CST)'))
'test = __atomic_add_fetch(&test, 1, __ATOMIC_SEQ_CST)')),
'deps_neg': [ 'stdatomic' ],
}, {
'name': 'sync-builtins',
'desc': 'compiler support for __sync built-ins',
'func': check_statement('stdint.h',
'int64_t test = 0;'
'test = __sync_add_and_fetch(&test, 1)'),
'deps_neg': [ 'atomic-builtins' ],
'deps_neg': [ 'stdatomic', 'atomic-builtins' ],
}, {
'name': 'thread-synchronization-builtins',
'desc': 'compiler support for usable thread synchronization built-ins',
'func': check_true,
'deps_any': ['atomic-builtins', 'sync-builtins'],
'deps_any': ['stdatomic', 'atomic-builtins', 'sync-builtins'],
'req': True,
'fmsg': 'your compiler must support either __atomic or __sync built-ins',
}, {