2014-07-17 19:09:10 +00:00
|
|
|
#ifndef _INTERNAL_ATOMIC_H
|
|
|
|
#define _INTERNAL_ATOMIC_H
|
|
|
|
|
|
|
|
#include <stdint.h>
|
|
|
|
|
|
|
|
static inline int a_ctz_l(unsigned long x)
|
|
|
|
{
|
|
|
|
static const char debruijn32[32] = {
|
|
|
|
0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
|
|
|
|
31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
|
|
|
|
};
|
|
|
|
return debruijn32[(x&-x)*0x076be629 >> 27];
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int a_ctz_64(uint64_t x)
|
|
|
|
{
|
|
|
|
uint32_t y = x;
|
|
|
|
if (!y) {
|
|
|
|
y = x>>32;
|
|
|
|
return 32 + a_ctz_l(y);
|
|
|
|
}
|
|
|
|
return a_ctz_l(y);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int a_cas(volatile int *p, int t, int s)
|
|
|
|
{
|
|
|
|
__asm__("1: l.lwa %0, %1\n"
|
|
|
|
" l.sfeq %0, %2\n"
|
|
|
|
" l.bnf 1f\n"
|
|
|
|
" l.nop\n"
|
|
|
|
" l.swa %1, %3\n"
|
|
|
|
" l.bnf 1b\n"
|
|
|
|
" l.nop\n"
|
|
|
|
"1: \n"
|
|
|
|
: "=&r"(t), "+m"(*p) : "r"(t), "r"(s) : "cc", "memory" );
|
|
|
|
return t;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void *a_cas_p(volatile void *p, void *t, void *s)
|
|
|
|
{
|
|
|
|
return (void *)a_cas(p, (int)t, (int)s);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int a_swap(volatile int *x, int v)
|
|
|
|
{
|
|
|
|
int old;
|
|
|
|
do old = *x;
|
|
|
|
while (a_cas(x, old, v) != old);
|
|
|
|
return old;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int a_fetch_add(volatile int *x, int v)
|
|
|
|
{
|
|
|
|
int old;
|
|
|
|
do old = *x;
|
|
|
|
while (a_cas(x, old, old+v) != old);
|
|
|
|
return old;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void a_inc(volatile int *x)
|
|
|
|
{
|
|
|
|
a_fetch_add(x, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void a_dec(volatile int *x)
|
|
|
|
{
|
|
|
|
a_fetch_add(x, -1);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void a_store(volatile int *p, int x)
|
|
|
|
{
|
fix or1k atomic store
at the very least, a compiler barrier is required no matter what, and
that was missing. current or1k implementations have strong ordering,
but this is not guaranteed as part of the ISA, so some sort of
synchronizing operation is necessary.
in principle we should use l.msync, but due to misinterpretation of
the spec, it was wrongly treated as an optional instruction and is not
supported by some implementations. if future kernels trap it and treat
it as a nop (rather than illegal instruction) when the
hardware/emulator does not support it, we could consider using it.
in the absence of l.msync support, the l.lwa/l.swa instructions, which
are specified to have a built-in l.msync, need to be used. the easiest
way to use them to implement atomic store is to perform an atomic swap
and throw away the result. using compare-and-swap would be lighter,
and would probably be sufficient for all actual usage cases, but
checking this is difficult and error-prone:
with store implemented in terms of swap, it's guaranteed that, when
another atomic operation is performed at the same time as the store,
either the result of the store followed by the other operation, or
just the store (clobbering the other operation's result) is seen. if
store were implemented in terms of cas, there are cases where this
invariant would fail to hold, and we would need detailed rules for the
situations in which the store operation is well-defined.
2014-07-20 00:42:15 +00:00
|
|
|
a_swap(p, x);
|
2014-07-17 19:09:10 +00:00
|
|
|
}
|
|
|
|
|
2014-10-10 22:17:09 +00:00
|
|
|
#define a_spin a_barrier
|
|
|
|
|
|
|
|
static inline void a_barrier()
|
2014-07-17 19:09:10 +00:00
|
|
|
{
|
2014-08-25 19:43:40 +00:00
|
|
|
a_cas(&(int){0}, 0, 0);
|
2014-07-17 19:09:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void a_crash()
|
|
|
|
{
|
|
|
|
*(volatile char *)0=0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void a_and(volatile int *p, int v)
|
|
|
|
{
|
|
|
|
int old;
|
|
|
|
do old = *p;
|
|
|
|
while (a_cas(p, old, old&v) != old);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void a_or(volatile int *p, int v)
|
|
|
|
{
|
|
|
|
int old;
|
|
|
|
do old = *p;
|
|
|
|
while (a_cas(p, old, old|v) != old);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void a_or_l(volatile void *p, long v)
|
|
|
|
{
|
|
|
|
a_or(p, v);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void a_and_64(volatile uint64_t *p, uint64_t v)
|
|
|
|
{
|
|
|
|
union { uint64_t v; uint32_t r[2]; } u = { v };
|
|
|
|
a_and((int *)p, u.r[0]);
|
|
|
|
a_and((int *)p+1, u.r[1]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void a_or_64(volatile uint64_t *p, uint64_t v)
|
|
|
|
{
|
|
|
|
union { uint64_t v; uint32_t r[2]; } u = { v };
|
|
|
|
a_or((int *)p, u.r[0]);
|
|
|
|
a_or((int *)p+1, u.r[1]);
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|