use inline atomics and thread pointer on arm models supporting them

this is perhaps not the optimal implementation; a_cas still compiles
to nested loops due to the different interface contracts of the kuser
helper cas function (whose contract this patch implements) and the
a_cas function (whose contract mimics the x86 cmpxchg). fixing this
may be possible, but it's more complicated and thus deferred until a
later time.

aside from improving performance and code size, this patch also
provides a means of producing binaries which can run on hardened
kernels where the kuser helpers have been disabled. however, at
present this requires producing binaries for armv6k or later, which
will not run on older cpus. a real solution to the problem of kernels
that omit the kuser helpers would be runtime detection, so that
universal binaries which run on all arm cpu models can also be
compatible with all kernel hardening profiles. robust detection
however is a much harder problem, and will be addressed at a later
time.
This commit is contained in:
Rich Felker 2014-04-07 04:03:18 -04:00
parent 21ada94c4b
commit 1974bffa2d
2 changed files with 36 additions and 1 deletions

View File

@ -22,7 +22,28 @@ static inline int a_ctz_64(uint64_t x)
return a_ctz_l(y);
}
#if __ARM_ARCH_6__ || __ARM_ARCH_6K__ || __ARM_ARCH_6ZK__ \
|| __ARM_ARCH_7A__ || __ARM_ARCH_7R__ \
|| __ARM_ARCH >= 7
static inline int __k_cas(int t, int s, volatile int *p)
{
int ret;
__asm__(
" mcr p15,0,r0,c7,c10,5\n"
"1: ldrex %0,%3\n"
" subs %0,%0,%1\n"
" strexeq %0,%2,%3\n"
" teqeq %0,#1\n"
" beq 1b\n"
" mcr p15,0,r0,c7,c10,5\n"
: "=&r"(ret)
: "r"(t), "r"(s), "m"(*p)
: "memory", "cc" );
return ret;
}
#else
#define __k_cas ((int (*)(int, int, volatile int *))0xffff0fc0)
#endif
static inline int a_cas(volatile int *p, int t, int s)
{

View File

@ -1,8 +1,22 @@
typedef char *(*__ptr_func_t)(void) __attribute__((const));
#if __ARM_ARCH_6K__ || __ARM_ARCH_6ZK__ \
|| __ARM_ARCH_7A__ || __ARM_ARCH_7R__ \
|| __ARM_ARCH >= 7
static inline __attribute__((const)) pthread_t __pthread_self()
{
char *p;
__asm__( "mrc p15,0,%0,c13,c0,3" : "=r"(p) );
return (void *)(p+8-sizeof(struct pthread));
}
#else
typedef char *(*__ptr_func_t)(void) __attribute__((const));
#define __pthread_self() \
((pthread_t)(((__ptr_func_t)0xffff0fe0)()+8-sizeof(struct pthread)))
#endif
#define TLS_ABOVE_TP
#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) - 8)