musl/arch/arm/atomic.h

#ifndef _INTERNAL_ATOMIC_H
#define _INTERNAL_ATOMIC_H

#include <stdint.h>

static inline int a_ctz_l(unsigned long x)
{
	static const char debruijn32[32] = {
		0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
		31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
	};
	return debruijn32[(x&-x)*0x076be629 >> 27];
}

static inline int a_ctz_64(uint64_t x)
{
	uint32_t y = x;
	if (!y) {
		y = x>>32;
		return 32 + a_ctz_l(y);
	}
	return a_ctz_l(y);
}

#define __k_cas ((int (*)(int, int, volatile int *))0xffff0fc0)

static inline int a_cas(volatile int *p, int t, int s)
{
	int old;
	for (;;) {
		if (!__k_cas(t, s, p))
			return t;
		if ((old=*p) != t)
			return old;
	}
}

static inline void *a_cas_p(volatile void *p, void *t, void *s)
{
	return (void *)a_cas(p, (int)t, (int)s);
}

static inline long a_cas_l(volatile void *p, long t, long s)
{
	return a_cas(p, t, s);
}

static inline int a_swap(volatile int *x, int v)
{
	int old;
	do old = *x;
	while (__k_cas(old, v, x));
	return old;
}

static inline int a_fetch_add(volatile int *x, int v)
{
	int old;
	do old = *x;
	while (__k_cas(old, old+v, x));
	return old;
}

static inline void a_inc(volatile int *x)
{
	a_fetch_add(x, 1);
}

static inline void a_dec(volatile int *x)
{
	a_fetch_add(x, -1);
}

static inline void a_store(volatile int *p, int x)
{
	while (__k_cas(*p, x, p));
}

static inline void a_spin()
{
}

static inline void a_crash()
{
	*(volatile char *)0=0;
}

static inline void a_and(volatile int *p, int v)
{
	int old;
	do old = *p;
	while (__k_cas(old, old&v, p));
}

static inline void a_or(volatile int *p, int v)
{
	int old;
	do old = *p;
	while (__k_cas(old, old|v, p));
}

static inline void a_or_l(volatile void *p, long v)
{
	a_or(p, v);
}

static inline void a_and_64(volatile uint64_t *p, uint64_t v)
{
	union { uint64_t v; uint32_t r[2]; } u = { v };
	a_and((int *)p, u.r[0]);
	a_and((int *)p+1, u.r[1]);
}

static inline void a_or_64(volatile uint64_t *p, uint64_t v)
{
	union { uint64_t v; uint32_t r[2]; } u = { v };
	a_or((int *)p, u.r[0]);
	a_or((int *)p+1, u.r[1]);
}

#endif
initial commit of the arm port this port assumes eabi calling conventions, eabi linux syscall convention, and presence of the kernel helpers at 0xffff0f?0 needed for threads support. otherwise it makes very few assumptions, and the code should work even on armv4 without thumb support, as well as on systems with thumb interworking. the bits headers declare this a little endian system, but as far as i can tell the code should work equally well on big endian. some small details are probably broken; so far, testing has been limited to qemu/aboriginal linux. 2011-09-18 20:44:54 +00:00			`#ifndef _INTERNAL_ATOMIC_H`
			`#define _INTERNAL_ATOMIC_H`

			`#include <stdint.h>`

			`static inline int a_ctz_l(unsigned long x)`
			`{`
			`static const char debruijn32[32] = {`
			`0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,`
			`31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14`
			`};`
			`return debruijn32[(x&-x)*0x076be629 >> 27];`
			`}`

			`static inline int a_ctz_64(uint64_t x)`
			`{`
			`uint32_t y = x;`
			`if (!y) {`
			`y = x>>32;`
			`return 32 + a_ctz_l(y);`
			`}`
			`return a_ctz_l(y);`
			`}`

fix arm atomic store and generate simpler/less-bloated/faster code atomic store was lacking a barrier, which was fine for legacy arm with no real smp and kernel-emulated cas, but unsuitable for more modern systems. the kernel provides another "kuser" function, at 0xffff0fa0, which could be used for the barrier, but using that would drop support for kernels 2.6.12 through 2.6.14 unless an extra conditional were added to check for barrier availability. just using the barrier in the kernel cas is easier, and, based on my reading of the assembly code in the kernel, does not appear to be significantly slower. at the same time, other atomic operations are adapted to call the kernel cas function directly rather than using a_cas; due to small differences in their interface contracts, this makes the generated code much simpler. 2013-09-22 07:06:17 +00:00			`#define __k_cas ((int ()(int, int, volatile int ))0xffff0fc0)`

initial commit of the arm port this port assumes eabi calling conventions, eabi linux syscall convention, and presence of the kernel helpers at 0xffff0f?0 needed for threads support. otherwise it makes very few assumptions, and the code should work even on armv4 without thumb support, as well as on systems with thumb interworking. the bits headers declare this a little endian system, but as far as i can tell the code should work equally well on big endian. some small details are probably broken; so far, testing has been limited to qemu/aboriginal linux. 2011-09-18 20:44:54 +00:00			`static inline int a_cas(volatile int *p, int t, int s)`
			`{`
			`int old;`
			`for (;;) {`
fix arm atomic store and generate simpler/less-bloated/faster code atomic store was lacking a barrier, which was fine for legacy arm with no real smp and kernel-emulated cas, but unsuitable for more modern systems. the kernel provides another "kuser" function, at 0xffff0fa0, which could be used for the barrier, but using that would drop support for kernels 2.6.12 through 2.6.14 unless an extra conditional were added to check for barrier availability. just using the barrier in the kernel cas is easier, and, based on my reading of the assembly code in the kernel, does not appear to be significantly slower. at the same time, other atomic operations are adapted to call the kernel cas function directly rather than using a_cas; due to small differences in their interface contracts, this makes the generated code much simpler. 2013-09-22 07:06:17 +00:00			`if (!__k_cas(t, s, p))`
initial commit of the arm port this port assumes eabi calling conventions, eabi linux syscall convention, and presence of the kernel helpers at 0xffff0f?0 needed for threads support. otherwise it makes very few assumptions, and the code should work even on armv4 without thumb support, as well as on systems with thumb interworking. the bits headers declare this a little endian system, but as far as i can tell the code should work equally well on big endian. some small details are probably broken; so far, testing has been limited to qemu/aboriginal linux. 2011-09-18 20:44:54 +00:00			`return t;`
			`if ((old=*p) != t)`
			`return old;`
			`}`
			`}`

			`static inline void a_cas_p(volatile void p, void t, void s)`
			`{`
			`return (void *)a_cas(p, (int)t, (int)s);`
			`}`

			`static inline long a_cas_l(volatile void *p, long t, long s)`
			`{`
			`return a_cas(p, t, s);`
			`}`

			`static inline int a_swap(volatile int *x, int v)`
			`{`
			`int old;`
			`do old = *x;`
fix arm atomic store and generate simpler/less-bloated/faster code atomic store was lacking a barrier, which was fine for legacy arm with no real smp and kernel-emulated cas, but unsuitable for more modern systems. the kernel provides another "kuser" function, at 0xffff0fa0, which could be used for the barrier, but using that would drop support for kernels 2.6.12 through 2.6.14 unless an extra conditional were added to check for barrier availability. just using the barrier in the kernel cas is easier, and, based on my reading of the assembly code in the kernel, does not appear to be significantly slower. at the same time, other atomic operations are adapted to call the kernel cas function directly rather than using a_cas; due to small differences in their interface contracts, this makes the generated code much simpler. 2013-09-22 07:06:17 +00:00			`while (__k_cas(old, v, x));`
initial commit of the arm port this port assumes eabi calling conventions, eabi linux syscall convention, and presence of the kernel helpers at 0xffff0f?0 needed for threads support. otherwise it makes very few assumptions, and the code should work even on armv4 without thumb support, as well as on systems with thumb interworking. the bits headers declare this a little endian system, but as far as i can tell the code should work equally well on big endian. some small details are probably broken; so far, testing has been limited to qemu/aboriginal linux. 2011-09-18 20:44:54 +00:00			`return old;`
			`}`

			`static inline int a_fetch_add(volatile int *x, int v)`
			`{`
			`int old;`
			`do old = *x;`
fix arm atomic store and generate simpler/less-bloated/faster code atomic store was lacking a barrier, which was fine for legacy arm with no real smp and kernel-emulated cas, but unsuitable for more modern systems. the kernel provides another "kuser" function, at 0xffff0fa0, which could be used for the barrier, but using that would drop support for kernels 2.6.12 through 2.6.14 unless an extra conditional were added to check for barrier availability. just using the barrier in the kernel cas is easier, and, based on my reading of the assembly code in the kernel, does not appear to be significantly slower. at the same time, other atomic operations are adapted to call the kernel cas function directly rather than using a_cas; due to small differences in their interface contracts, this makes the generated code much simpler. 2013-09-22 07:06:17 +00:00			`while (__k_cas(old, old+v, x));`
initial commit of the arm port this port assumes eabi calling conventions, eabi linux syscall convention, and presence of the kernel helpers at 0xffff0f?0 needed for threads support. otherwise it makes very few assumptions, and the code should work even on armv4 without thumb support, as well as on systems with thumb interworking. the bits headers declare this a little endian system, but as far as i can tell the code should work equally well on big endian. some small details are probably broken; so far, testing has been limited to qemu/aboriginal linux. 2011-09-18 20:44:54 +00:00			`return old;`
			`}`

			`static inline void a_inc(volatile int *x)`
			`{`
			`a_fetch_add(x, 1);`
			`}`

			`static inline void a_dec(volatile int *x)`
			`{`
			`a_fetch_add(x, -1);`
			`}`

			`static inline void a_store(volatile int *p, int x)`
			`{`
fix arm atomic store and generate simpler/less-bloated/faster code atomic store was lacking a barrier, which was fine for legacy arm with no real smp and kernel-emulated cas, but unsuitable for more modern systems. the kernel provides another "kuser" function, at 0xffff0fa0, which could be used for the barrier, but using that would drop support for kernels 2.6.12 through 2.6.14 unless an extra conditional were added to check for barrier availability. just using the barrier in the kernel cas is easier, and, based on my reading of the assembly code in the kernel, does not appear to be significantly slower. at the same time, other atomic operations are adapted to call the kernel cas function directly rather than using a_cas; due to small differences in their interface contracts, this makes the generated code much simpler. 2013-09-22 07:06:17 +00:00			`while (__k_cas(*p, x, p));`
initial commit of the arm port this port assumes eabi calling conventions, eabi linux syscall convention, and presence of the kernel helpers at 0xffff0f?0 needed for threads support. otherwise it makes very few assumptions, and the code should work even on armv4 without thumb support, as well as on systems with thumb interworking. the bits headers declare this a little endian system, but as far as i can tell the code should work equally well on big endian. some small details are probably broken; so far, testing has been limited to qemu/aboriginal linux. 2011-09-18 20:44:54 +00:00			`}`

			`static inline void a_spin()`
			`{`
			`}`

			`static inline void a_crash()`
			`{`
			`(volatile char )0=0;`
			`}`

			`static inline void a_and(volatile int *p, int v)`
			`{`
			`int old;`
			`do old = *p;`
fix arm atomic store and generate simpler/less-bloated/faster code atomic store was lacking a barrier, which was fine for legacy arm with no real smp and kernel-emulated cas, but unsuitable for more modern systems. the kernel provides another "kuser" function, at 0xffff0fa0, which could be used for the barrier, but using that would drop support for kernels 2.6.12 through 2.6.14 unless an extra conditional were added to check for barrier availability. just using the barrier in the kernel cas is easier, and, based on my reading of the assembly code in the kernel, does not appear to be significantly slower. at the same time, other atomic operations are adapted to call the kernel cas function directly rather than using a_cas; due to small differences in their interface contracts, this makes the generated code much simpler. 2013-09-22 07:06:17 +00:00			`while (__k_cas(old, old&v, p));`
initial commit of the arm port this port assumes eabi calling conventions, eabi linux syscall convention, and presence of the kernel helpers at 0xffff0f?0 needed for threads support. otherwise it makes very few assumptions, and the code should work even on armv4 without thumb support, as well as on systems with thumb interworking. the bits headers declare this a little endian system, but as far as i can tell the code should work equally well on big endian. some small details are probably broken; so far, testing has been limited to qemu/aboriginal linux. 2011-09-18 20:44:54 +00:00			`}`

			`static inline void a_or(volatile int *p, int v)`
			`{`
			`int old;`
			`do old = *p;`
fix arm atomic store and generate simpler/less-bloated/faster code atomic store was lacking a barrier, which was fine for legacy arm with no real smp and kernel-emulated cas, but unsuitable for more modern systems. the kernel provides another "kuser" function, at 0xffff0fa0, which could be used for the barrier, but using that would drop support for kernels 2.6.12 through 2.6.14 unless an extra conditional were added to check for barrier availability. just using the barrier in the kernel cas is easier, and, based on my reading of the assembly code in the kernel, does not appear to be significantly slower. at the same time, other atomic operations are adapted to call the kernel cas function directly rather than using a_cas; due to small differences in their interface contracts, this makes the generated code much simpler. 2013-09-22 07:06:17 +00:00			`while (__k_cas(old, old\|v, p));`
initial commit of the arm port this port assumes eabi calling conventions, eabi linux syscall convention, and presence of the kernel helpers at 0xffff0f?0 needed for threads support. otherwise it makes very few assumptions, and the code should work even on armv4 without thumb support, as well as on systems with thumb interworking. the bits headers declare this a little endian system, but as far as i can tell the code should work equally well on big endian. some small details are probably broken; so far, testing has been limited to qemu/aboriginal linux. 2011-09-18 20:44:54 +00:00			`}`

add missing a_or_l to atomic.h for non-x86 archs this is needed for recently committed sigaction code 2013-08-11 07:43:25 +00:00			`static inline void a_or_l(volatile void *p, long v)`
			`{`
			`a_or(p, v);`
			`}`

initial commit of the arm port this port assumes eabi calling conventions, eabi linux syscall convention, and presence of the kernel helpers at 0xffff0f?0 needed for threads support. otherwise it makes very few assumptions, and the code should work even on armv4 without thumb support, as well as on systems with thumb interworking. the bits headers declare this a little endian system, but as far as i can tell the code should work equally well on big endian. some small details are probably broken; so far, testing has been limited to qemu/aboriginal linux. 2011-09-18 20:44:54 +00:00			`static inline void a_and_64(volatile uint64_t *p, uint64_t v)`
			`{`
remove little-endian assumption from arm atomic.h this hidden endian dependency had left big endian arm badly broken. 2012-07-08 04:05:08 +00:00			`union { uint64_t v; uint32_t r[2]; } u = { v };`
			`a_and((int *)p, u.r[0]);`
			`a_and((int *)p+1, u.r[1]);`
initial commit of the arm port this port assumes eabi calling conventions, eabi linux syscall convention, and presence of the kernel helpers at 0xffff0f?0 needed for threads support. otherwise it makes very few assumptions, and the code should work even on armv4 without thumb support, as well as on systems with thumb interworking. the bits headers declare this a little endian system, but as far as i can tell the code should work equally well on big endian. some small details are probably broken; so far, testing has been limited to qemu/aboriginal linux. 2011-09-18 20:44:54 +00:00			`}`

			`static inline void a_or_64(volatile uint64_t *p, uint64_t v)`
			`{`
remove little-endian assumption from arm atomic.h this hidden endian dependency had left big endian arm badly broken. 2012-07-08 04:05:08 +00:00			`union { uint64_t v; uint32_t r[2]; } u = { v };`
			`a_or((int *)p, u.r[0]);`
			`a_or((int *)p+1, u.r[1]);`
initial commit of the arm port this port assumes eabi calling conventions, eabi linux syscall convention, and presence of the kernel helpers at 0xffff0f?0 needed for threads support. otherwise it makes very few assumptions, and the code should work even on armv4 without thumb support, as well as on systems with thumb interworking. the bits headers declare this a little endian system, but as far as i can tell the code should work equally well on big endian. some small details are probably broken; so far, testing has been limited to qemu/aboriginal linux. 2011-09-18 20:44:54 +00:00			`}`

			`#endif`