mirror of git://git.musl-libc.org/musl
overhaul sh atomics for new atomics framework, add j-core cas.l backend
sh needs runtime-selected atomic backends since there are a number of supported models that use non-forwards-compatible (non-smp-compatible) atomic mechanisms. previously, the code paths for this were highly inefficient since they involved C function calls with multiple branches in the callee and heavy spills in the caller. the new code performs calls the runtime-selected asm fragment from inline asm with extremely minimal clobbers, rather than using a function call. for the sh4a case where the atomic mechanism is known and there is no forward-compatibility issue, the movli.l and movco.l instructions are provided as a_ll and a_sc, allowing the new shared atomic.h to generate efficient inline versions of all the basic atomic operations without needing a cas loop.
This commit is contained in:
parent
1315596b51
commit
61b1e75f7d
|
@ -1,96 +1,46 @@
|
||||||
#define LLSC_CLOBBERS "r0", "t", "memory"
|
#if defined(__SH4A__)
|
||||||
#define LLSC_START(mem) "synco\n" \
|
|
||||||
"0: movli.l @" mem ", r0\n"
|
|
||||||
#define LLSC_END(mem) \
|
|
||||||
"1: movco.l r0, @" mem "\n" \
|
|
||||||
" bf 0b\n" \
|
|
||||||
" synco\n"
|
|
||||||
|
|
||||||
static inline int __sh_cas_llsc(volatile int *p, int t, int s)
|
#define a_ll a_ll
|
||||||
|
static inline int a_ll(volatile int *p)
|
||||||
{
|
{
|
||||||
int old;
|
int v;
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__ ("movli.l @%1, %0" : "=z"(v) : "r"(p), "m"(*p));
|
||||||
LLSC_START("%1")
|
return v;
|
||||||
" mov r0, %0\n"
|
|
||||||
" cmp/eq %0, %2\n"
|
|
||||||
" bf 1f\n"
|
|
||||||
" mov %3, r0\n"
|
|
||||||
LLSC_END("%1")
|
|
||||||
: "=&r"(old) : "r"(p), "r"(t), "r"(s) : LLSC_CLOBBERS);
|
|
||||||
return old;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int __sh_swap_llsc(volatile int *x, int v)
|
#define a_sc a_sc
|
||||||
|
static inline int a_sc(volatile int *p, int v)
|
||||||
{
|
{
|
||||||
int old;
|
int r;
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
LLSC_START("%1")
|
"movco.l %2, @%3 ; movt %0"
|
||||||
" mov r0, %0\n"
|
: "=r"(r), "=m"(*p) : "z"(v), "r"(p) : "memory", "cc");
|
||||||
" mov %2, r0\n"
|
return r;
|
||||||
LLSC_END("%1")
|
|
||||||
: "=&r"(old) : "r"(x), "r"(v) : LLSC_CLOBBERS);
|
|
||||||
return old;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int __sh_fetch_add_llsc(volatile int *x, int v)
|
#define a_barrier a_barrier
|
||||||
|
static inline void a_barrier()
|
||||||
{
|
{
|
||||||
int old;
|
__asm__ __volatile__ ("synco" : : "memory");
|
||||||
__asm__ __volatile__(
|
|
||||||
LLSC_START("%1")
|
|
||||||
" mov r0, %0\n"
|
|
||||||
" add %2, r0\n"
|
|
||||||
LLSC_END("%1")
|
|
||||||
: "=&r"(old) : "r"(x), "r"(v) : LLSC_CLOBBERS);
|
|
||||||
return old;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void __sh_store_llsc(volatile int *p, int x)
|
#define a_pre_llsc a_barrier
|
||||||
{
|
#define a_post_llsc a_barrier
|
||||||
__asm__ __volatile__(
|
|
||||||
" synco\n"
|
|
||||||
" mov.l %1, @%0\n"
|
|
||||||
" synco\n"
|
|
||||||
: : "r"(p), "r"(x) : "memory");
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void __sh_and_llsc(volatile int *x, int v)
|
|
||||||
{
|
|
||||||
__asm__ __volatile__(
|
|
||||||
LLSC_START("%0")
|
|
||||||
" and %1, r0\n"
|
|
||||||
LLSC_END("%0")
|
|
||||||
: : "r"(x), "r"(v) : LLSC_CLOBBERS);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void __sh_or_llsc(volatile int *x, int v)
|
|
||||||
{
|
|
||||||
__asm__ __volatile__(
|
|
||||||
LLSC_START("%0")
|
|
||||||
" or %1, r0\n"
|
|
||||||
LLSC_END("%0")
|
|
||||||
: : "r"(x), "r"(v) : LLSC_CLOBBERS);
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef __SH4A__
|
|
||||||
#define a_cas(p,t,s) __sh_cas_llsc(p,t,s)
|
|
||||||
#define a_swap(x,v) __sh_swap_llsc(x,v)
|
|
||||||
#define a_fetch_add(x,v) __sh_fetch_add_llsc(x, v)
|
|
||||||
#define a_store(x,v) __sh_store_llsc(x, v)
|
|
||||||
#define a_and(x,v) __sh_and_llsc(x, v)
|
|
||||||
#define a_or(x,v) __sh_or_llsc(x, v)
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
int __sh_cas(volatile int *, int, int);
|
#define a_cas a_cas
|
||||||
int __sh_swap(volatile int *, int);
|
__attribute__((__visibility__("hidden"))) extern const void *__sh_cas_ptr;
|
||||||
int __sh_fetch_add(volatile int *, int);
|
static inline int a_cas(volatile int *p, int t, int s)
|
||||||
void __sh_store(volatile int *, int);
|
{
|
||||||
void __sh_and(volatile int *, int);
|
register int r1 __asm__("r1");
|
||||||
void __sh_or(volatile int *, int);
|
register int r2 __asm__("r2") = t;
|
||||||
|
register int r3 __asm__("r3") = s;
|
||||||
|
__asm__ __volatile__ (
|
||||||
|
"jsr @%4 ; nop"
|
||||||
|
: "=r"(r1), "+r"(r3) : "z"(p), "r"(r2), "r"(__sh_cas_ptr)
|
||||||
|
: "memory", "pr", "cc");
|
||||||
|
return r3;
|
||||||
|
}
|
||||||
|
|
||||||
#define a_cas(p,t,s) __sh_cas(p,t,s)
|
|
||||||
#define a_swap(x,v) __sh_swap(x,v)
|
|
||||||
#define a_fetch_add(x,v) __sh_fetch_add(x, v)
|
|
||||||
#define a_store(x,v) __sh_store(x, v)
|
|
||||||
#define a_and(x,v) __sh_and(x, v)
|
|
||||||
#define a_or(x,v) __sh_or(x, v)
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,158 +0,0 @@
|
||||||
#ifndef __SH4A__
|
|
||||||
|
|
||||||
#include "sh_atomic.h"
|
|
||||||
#include "atomic.h"
|
|
||||||
#include "libc.h"
|
|
||||||
|
|
||||||
static inline unsigned mask()
|
|
||||||
{
|
|
||||||
unsigned sr;
|
|
||||||
__asm__ __volatile__ ( "\n"
|
|
||||||
" stc sr,r0 \n"
|
|
||||||
" mov r0,%0 \n"
|
|
||||||
" or #0xf0,r0 \n"
|
|
||||||
" ldc r0,sr \n"
|
|
||||||
: "=&r"(sr) : : "memory", "r0" );
|
|
||||||
return sr;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void unmask(unsigned sr)
|
|
||||||
{
|
|
||||||
__asm__ __volatile__ ( "ldc %0,sr" : : "r"(sr) : "memory" );
|
|
||||||
}
|
|
||||||
|
|
||||||
/* gusa is a hack in the kernel which lets you create a sequence of instructions
|
|
||||||
* which will be restarted if the process is preempted in the middle of the
|
|
||||||
* sequence. It will do for implementing atomics on non-smp systems. ABI is:
|
|
||||||
* r0 = address of first instruction after the atomic sequence
|
|
||||||
* r1 = original stack pointer
|
|
||||||
* r15 = -1 * length of atomic sequence in bytes
|
|
||||||
*/
|
|
||||||
#define GUSA_CLOBBERS "r0", "r1", "memory"
|
|
||||||
#define GUSA_START(mem,old,nop) \
|
|
||||||
" .align 2\n" \
|
|
||||||
" mova 1f, r0\n" \
|
|
||||||
nop \
|
|
||||||
" mov r15, r1\n" \
|
|
||||||
" mov #(0f-1f), r15\n" \
|
|
||||||
"0: mov.l @" mem ", " old "\n"
|
|
||||||
/* the target of mova must be 4 byte aligned, so we may need a nop */
|
|
||||||
#define GUSA_START_ODD(mem,old) GUSA_START(mem,old,"")
|
|
||||||
#define GUSA_START_EVEN(mem,old) GUSA_START(mem,old,"\tnop\n")
|
|
||||||
#define GUSA_END(mem,new) \
|
|
||||||
" mov.l " new ", @" mem "\n" \
|
|
||||||
"1: mov r1, r15\n"
|
|
||||||
|
|
||||||
int __sh_cas(volatile int *p, int t, int s)
|
|
||||||
{
|
|
||||||
if (__sh_atomic_model == SH_A_LLSC) return __sh_cas_llsc(p, t, s);
|
|
||||||
|
|
||||||
if (__sh_atomic_model == SH_A_IMASK) {
|
|
||||||
unsigned sr = mask();
|
|
||||||
int old = *p;
|
|
||||||
if (old==t) *p = s;
|
|
||||||
unmask(sr);
|
|
||||||
return old;
|
|
||||||
}
|
|
||||||
|
|
||||||
int old;
|
|
||||||
__asm__ __volatile__(
|
|
||||||
GUSA_START_EVEN("%1", "%0")
|
|
||||||
" cmp/eq %0, %2\n"
|
|
||||||
" bf 1f\n"
|
|
||||||
GUSA_END("%1", "%3")
|
|
||||||
: "=&r"(old) : "r"(p), "r"(t), "r"(s) : GUSA_CLOBBERS, "t");
|
|
||||||
return old;
|
|
||||||
}
|
|
||||||
|
|
||||||
int __sh_swap(volatile int *x, int v)
|
|
||||||
{
|
|
||||||
if (__sh_atomic_model == SH_A_LLSC) return __sh_swap_llsc(x, v);
|
|
||||||
|
|
||||||
if (__sh_atomic_model == SH_A_IMASK) {
|
|
||||||
unsigned sr = mask();
|
|
||||||
int old = *x;
|
|
||||||
*x = v;
|
|
||||||
unmask(sr);
|
|
||||||
return old;
|
|
||||||
}
|
|
||||||
|
|
||||||
int old;
|
|
||||||
__asm__ __volatile__(
|
|
||||||
GUSA_START_EVEN("%1", "%0")
|
|
||||||
GUSA_END("%1", "%2")
|
|
||||||
: "=&r"(old) : "r"(x), "r"(v) : GUSA_CLOBBERS);
|
|
||||||
return old;
|
|
||||||
}
|
|
||||||
|
|
||||||
int __sh_fetch_add(volatile int *x, int v)
|
|
||||||
{
|
|
||||||
if (__sh_atomic_model == SH_A_LLSC) return __sh_fetch_add_llsc(x, v);
|
|
||||||
|
|
||||||
if (__sh_atomic_model == SH_A_IMASK) {
|
|
||||||
unsigned sr = mask();
|
|
||||||
int old = *x;
|
|
||||||
*x = old + v;
|
|
||||||
unmask(sr);
|
|
||||||
return old;
|
|
||||||
}
|
|
||||||
|
|
||||||
int old, dummy;
|
|
||||||
__asm__ __volatile__(
|
|
||||||
GUSA_START_EVEN("%2", "%0")
|
|
||||||
" mov %0, %1\n"
|
|
||||||
" add %3, %1\n"
|
|
||||||
GUSA_END("%2", "%1")
|
|
||||||
: "=&r"(old), "=&r"(dummy) : "r"(x), "r"(v) : GUSA_CLOBBERS);
|
|
||||||
return old;
|
|
||||||
}
|
|
||||||
|
|
||||||
void __sh_store(volatile int *p, int x)
|
|
||||||
{
|
|
||||||
if (__sh_atomic_model == SH_A_LLSC) return __sh_store_llsc(p, x);
|
|
||||||
__asm__ __volatile__(
|
|
||||||
" mov.l %1, @%0\n"
|
|
||||||
: : "r"(p), "r"(x) : "memory");
|
|
||||||
}
|
|
||||||
|
|
||||||
void __sh_and(volatile int *x, int v)
|
|
||||||
{
|
|
||||||
if (__sh_atomic_model == SH_A_LLSC) return __sh_and_llsc(x, v);
|
|
||||||
|
|
||||||
if (__sh_atomic_model == SH_A_IMASK) {
|
|
||||||
unsigned sr = mask();
|
|
||||||
int old = *x;
|
|
||||||
*x = old & v;
|
|
||||||
unmask(sr);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
int dummy;
|
|
||||||
__asm__ __volatile__(
|
|
||||||
GUSA_START_ODD("%1", "%0")
|
|
||||||
" and %2, %0\n"
|
|
||||||
GUSA_END("%1", "%0")
|
|
||||||
: "=&r"(dummy) : "r"(x), "r"(v) : GUSA_CLOBBERS);
|
|
||||||
}
|
|
||||||
|
|
||||||
void __sh_or(volatile int *x, int v)
|
|
||||||
{
|
|
||||||
if (__sh_atomic_model == SH_A_LLSC) return __sh_or_llsc(x, v);
|
|
||||||
|
|
||||||
if (__sh_atomic_model == SH_A_IMASK) {
|
|
||||||
unsigned sr = mask();
|
|
||||||
int old = *x;
|
|
||||||
*x = old | v;
|
|
||||||
unmask(sr);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
int dummy;
|
|
||||||
__asm__ __volatile__(
|
|
||||||
GUSA_START_ODD("%1", "%0")
|
|
||||||
" or %2, %0\n"
|
|
||||||
GUSA_END("%1", "%0")
|
|
||||||
: "=&r"(dummy) : "r"(x), "r"(v) : GUSA_CLOBBERS);
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,15 +0,0 @@
|
||||||
#ifndef _SH_ATOMIC_H
|
|
||||||
#define _SH_ATOMIC_H
|
|
||||||
|
|
||||||
#define SH_A_GUSA 0
|
|
||||||
#define SH_A_LLSC 1
|
|
||||||
#define SH_A_CAS 2
|
|
||||||
#if !defined(__SH3__) && !defined(__SH4__)
|
|
||||||
#define SH_A_IMASK 3
|
|
||||||
#else
|
|
||||||
#define SH_A_IMASK -1LL /* unmatchable by unsigned int */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
extern __attribute__((__visibility__("hidden"))) unsigned __sh_atomic_model;
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,34 +1,40 @@
|
||||||
#include "pthread_impl.h"
|
#include "pthread_impl.h"
|
||||||
#include "libc.h"
|
#include "libc.h"
|
||||||
#include "sh_atomic.h"
|
|
||||||
#include <elf.h>
|
#include <elf.h>
|
||||||
|
|
||||||
/* Also perform sh-specific init */
|
/* Also perform sh-specific init */
|
||||||
|
|
||||||
#define CPU_HAS_LLSC 0x0040
|
#define CPU_HAS_LLSC 0x0040
|
||||||
|
#define CPU_HAS_CAS_L 0x0400
|
||||||
|
|
||||||
__attribute__((__visibility__("hidden"))) unsigned __sh_atomic_model, __sh_nommu;
|
__attribute__((__visibility__("hidden")))
|
||||||
|
extern const char __sh_cas_gusa[], __sh_cas_llsc[], __sh_cas_imask[], __sh_cas_cas_l[];
|
||||||
|
|
||||||
|
__attribute__((__visibility__("hidden")))
|
||||||
|
const void *__sh_cas_ptr;
|
||||||
|
|
||||||
|
__attribute__((__visibility__("hidden")))
|
||||||
|
unsigned __sh_nommu;
|
||||||
|
|
||||||
int __set_thread_area(void *p)
|
int __set_thread_area(void *p)
|
||||||
{
|
{
|
||||||
size_t *aux;
|
size_t *aux;
|
||||||
__asm__ __volatile__ ( "ldc %0, gbr" : : "r"(p) : "memory" );
|
__asm__ __volatile__ ( "ldc %0, gbr" : : "r"(p) : "memory" );
|
||||||
#ifndef __SH4A__
|
#ifndef __SH4A__
|
||||||
if (__hwcap & CPU_HAS_LLSC) {
|
__sh_cas_ptr = __sh_cas_gusa;
|
||||||
__sh_atomic_model = SH_A_LLSC;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
#if !defined(__SH3__) && !defined(__SH4__)
|
#if !defined(__SH3__) && !defined(__SH4__)
|
||||||
for (aux=libc.auxv; *aux; aux+=2) {
|
for (aux=libc.auxv; *aux; aux+=2) {
|
||||||
if (*aux != AT_PLATFORM) continue;
|
if (*aux != AT_PLATFORM) continue;
|
||||||
const char *s = (void *)aux[1];
|
const char *s = (void *)aux[1];
|
||||||
if (s[0]!='s' || s[1]!='h' || s[2]!='2' || s[3]-'0'<10u) break;
|
if (s[0]!='s' || s[1]!='h' || s[2]!='2' || s[3]-'0'<10u) break;
|
||||||
__sh_atomic_model = SH_A_IMASK;
|
__sh_cas_ptr = __sh_cas_imask;
|
||||||
__sh_nommu = 1;
|
__sh_nommu = 1;
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
/* __sh_atomic_model = SH_A_GUSA; */ /* 0, default */
|
if (__hwcap & CPU_HAS_CAS_L)
|
||||||
|
__sh_cas_ptr = __sh_cas_cas_l;
|
||||||
|
else if (__hwcap & CPU_HAS_LLSC)
|
||||||
|
__sh_cas_ptr = __sh_cas_llsc;
|
||||||
#endif
|
#endif
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
|
@ -0,0 +1,65 @@
|
||||||
|
/* Contract for all versions is same as cas.l r2,r3,@r0
|
||||||
|
* pr and r1 are also clobbered (by jsr & r1 as temp).
|
||||||
|
* r0,r2,r4-r15 must be preserved.
|
||||||
|
* r3 contains result (==r2 iff cas succeeded). */
|
||||||
|
|
||||||
|
.align 2
|
||||||
|
.global __sh_cas_gusa
|
||||||
|
.hidden __sh_cas_gusa
|
||||||
|
__sh_cas_gusa:
|
||||||
|
mov.l r5,@-r15
|
||||||
|
mov.l r4,@-r15
|
||||||
|
mov r0,r4
|
||||||
|
mova 1f,r0
|
||||||
|
mov r15,r1
|
||||||
|
mov #(0f-1f),r15
|
||||||
|
0: mov.l @r4,r5
|
||||||
|
cmp/eq r5,r2
|
||||||
|
bf 1f
|
||||||
|
mov.l r3,@r4
|
||||||
|
1: mov r1,r15
|
||||||
|
mov r5,r3
|
||||||
|
mov r4,r0
|
||||||
|
mov.l @r15+,r4
|
||||||
|
rts
|
||||||
|
mov.l @r15+,r5
|
||||||
|
|
||||||
|
.global __sh_cas_llsc
|
||||||
|
.hidden __sh_cas_llsc
|
||||||
|
__sh_cas_llsc:
|
||||||
|
mov r0,r1
|
||||||
|
synco
|
||||||
|
0: movli.l @r1,r0
|
||||||
|
cmp/eq r0,r2
|
||||||
|
bf 1f
|
||||||
|
mov r3,r0
|
||||||
|
movco.l r0,@r1
|
||||||
|
bf 0b
|
||||||
|
mov r2,r0
|
||||||
|
1: synco
|
||||||
|
mov r0,r3
|
||||||
|
rts
|
||||||
|
mov r1,r0
|
||||||
|
|
||||||
|
.global __sh_cas_imask
|
||||||
|
.hidden __sh_cas_imask
|
||||||
|
__sh_cas_imask:
|
||||||
|
mov r0,r1
|
||||||
|
stc sr,r0
|
||||||
|
mov.l r0,@-r15
|
||||||
|
or #0xf0,r0
|
||||||
|
ldc r0,sr
|
||||||
|
mov.l @r1,r0
|
||||||
|
cmp/eq r0,r2
|
||||||
|
bf 1f
|
||||||
|
mov.l r3,@r1
|
||||||
|
1: ldc.l @r15+,sr
|
||||||
|
mov r0,r3
|
||||||
|
rts
|
||||||
|
mov r1,r0
|
||||||
|
|
||||||
|
.global __sh_cas_cas_l
|
||||||
|
.hidden __sh_cas_cas_l
|
||||||
|
__sh_cas_cas_l:
|
||||||
|
rts
|
||||||
|
.word 0x2323 /* cas.l r2,r3,@r0 */
|
Loading…
Reference in New Issue