mirror of
git://git.musl-libc.org/musl
synced 2025-03-01 09:10:25 +00:00
at the very least, a compiler barrier is required no matter what, and that was missing. current or1k implementations have strong ordering, but this is not guaranteed as part of the ISA, so some sort of synchronizing operation is necessary. in principle we should use l.msync, but due to misinterpretation of the spec, it was wrongly treated as an optional instruction and is not supported by some implementations. if future kernels trap it and treat it as a nop (rather than illegal instruction) when the hardware/emulator does not support it, we could consider using it. in the absence of l.msync support, the l.lwa/l.swa instructions, which are specified to have a built-in l.msync, need to be used. the easiest way to use them to implement atomic store is to perform an atomic swap and throw away the result. using compare-and-swap would be lighter, and would probably be sufficient for all actual usage cases, but checking this is difficult and error-prone: with store implemented in terms of swap, it's guaranteed that, when another atomic operation is performed at the same time as the store, either the result of the store followed by the other operation, or just the store (clobbering the other operation's result) is seen. if store were implemented in terms of cas, there are cases where this invariant would fail to hold, and we would need detailed rules for the situations in which the store operation is well-defined.
123 lines
2.1 KiB
C
123 lines
2.1 KiB
C
#ifndef _INTERNAL_ATOMIC_H
|
|
#define _INTERNAL_ATOMIC_H
|
|
|
|
#include <stdint.h>
|
|
|
|
static inline int a_ctz_l(unsigned long x)
|
|
{
|
|
static const char debruijn32[32] = {
|
|
0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
|
|
31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
|
|
};
|
|
return debruijn32[(x&-x)*0x076be629 >> 27];
|
|
}
|
|
|
|
static inline int a_ctz_64(uint64_t x)
|
|
{
|
|
uint32_t y = x;
|
|
if (!y) {
|
|
y = x>>32;
|
|
return 32 + a_ctz_l(y);
|
|
}
|
|
return a_ctz_l(y);
|
|
}
|
|
|
|
static inline int a_cas(volatile int *p, int t, int s)
|
|
{
|
|
__asm__("1: l.lwa %0, %1\n"
|
|
" l.sfeq %0, %2\n"
|
|
" l.bnf 1f\n"
|
|
" l.nop\n"
|
|
" l.swa %1, %3\n"
|
|
" l.bnf 1b\n"
|
|
" l.nop\n"
|
|
"1: \n"
|
|
: "=&r"(t), "+m"(*p) : "r"(t), "r"(s) : "cc", "memory" );
|
|
return t;
|
|
}
|
|
|
|
static inline void *a_cas_p(volatile void *p, void *t, void *s)
|
|
{
|
|
return (void *)a_cas(p, (int)t, (int)s);
|
|
}
|
|
|
|
static inline long a_cas_l(volatile void *p, long t, long s)
|
|
{
|
|
return a_cas(p, t, s);
|
|
}
|
|
|
|
static inline int a_swap(volatile int *x, int v)
|
|
{
|
|
int old;
|
|
do old = *x;
|
|
while (a_cas(x, old, v) != old);
|
|
return old;
|
|
}
|
|
|
|
static inline int a_fetch_add(volatile int *x, int v)
|
|
{
|
|
int old;
|
|
do old = *x;
|
|
while (a_cas(x, old, old+v) != old);
|
|
return old;
|
|
}
|
|
|
|
static inline void a_inc(volatile int *x)
|
|
{
|
|
a_fetch_add(x, 1);
|
|
}
|
|
|
|
static inline void a_dec(volatile int *x)
|
|
{
|
|
a_fetch_add(x, -1);
|
|
}
|
|
|
|
static inline void a_store(volatile int *p, int x)
|
|
{
|
|
a_swap(p, x);
|
|
}
|
|
|
|
static inline void a_spin()
|
|
{
|
|
}
|
|
|
|
static inline void a_crash()
|
|
{
|
|
*(volatile char *)0=0;
|
|
}
|
|
|
|
static inline void a_and(volatile int *p, int v)
|
|
{
|
|
int old;
|
|
do old = *p;
|
|
while (a_cas(p, old, old&v) != old);
|
|
}
|
|
|
|
static inline void a_or(volatile int *p, int v)
|
|
{
|
|
int old;
|
|
do old = *p;
|
|
while (a_cas(p, old, old|v) != old);
|
|
}
|
|
|
|
static inline void a_or_l(volatile void *p, long v)
|
|
{
|
|
a_or(p, v);
|
|
}
|
|
|
|
static inline void a_and_64(volatile uint64_t *p, uint64_t v)
|
|
{
|
|
union { uint64_t v; uint32_t r[2]; } u = { v };
|
|
a_and((int *)p, u.r[0]);
|
|
a_and((int *)p+1, u.r[1]);
|
|
}
|
|
|
|
static inline void a_or_64(volatile uint64_t *p, uint64_t v)
|
|
{
|
|
union { uint64_t v; uint32_t r[2]; } u = { v };
|
|
a_or((int *)p, u.r[0]);
|
|
a_or((int *)p+1, u.r[1]);
|
|
}
|
|
|
|
#endif
|