add support for sh2 interrupt-masking-based atomics to sh port

the sh2 target is being considered an ISA subset of sh3/sh4, in the sense that binaries built for sh2 are intended to be usable on later cpu models/kernels with mmu support. so rather than hard-coding sh2-specific atomics, the runtime atomic selection mechanisms that was already in place has been extended to add sh2 atomics. at this time, the sh2 atomics are not SMP-compatible; since the ISA lacks actual atomic operations, the new code instead masks interrupts for the duration of the atomic operation, producing an atomic result on single-core. this is only possible because the kernel/hardware does not impose protections against userspace doing so. additional changes will be needed to support future SMP systems. care has been taken to avoid producing significant additional code size in the case where it's known at compile-time that the target is not sh2 and does not need sh2-specific code.
2015-06-16 14:28:30 +00:00 · 2015-06-16 14:28:30 +00:00 · f9d84554ba
parent 1b0cdc8700
commit f9d84554ba
4 changed files with 113 additions and 14 deletions
--- a/arch/sh/src/__set_thread_area.c
+++ b/arch/sh/src/__set_thread_area.c
@ -0,0 +1,34 @@
+#include "pthread_impl.h"
+#include "libc.h"
+#include "sh_atomic.h"
+#include <elf.h>
+
+/* Also perform sh-specific init */
+
+#define CPU_HAS_LLSC 0x0040
+
+__attribute__((__visibility__("hidden"))) unsigned __sh_atomic_model, __sh_nommu;
+
+int __set_thread_area(void *p)
+{
+	size_t *aux;
+	__asm__ __volatile__ ( "ldc %0, gbr" : : "r"(p) : "memory" );
+#ifndef __SH4A__
+	if (__hwcap & CPU_HAS_LLSC) {
+		__sh_atomic_model = SH_A_LLSC;
+		return 0;
+	}
+#if !defined(__SH3__) && !defined(__SH4__)
+	for (aux=libc.auxv; *aux; aux+=2) {
+		if (*aux != AT_PLATFORM) continue;
+		const char *s = (void *)aux[1];
+		if (s[0]!='s' || s[1]!='h' || s[2]!='2' || s[3]-'0'<10u) break;
+		__sh_atomic_model = SH_A_IMASK;
+		__sh_nommu = 1;
+		return 0;
+	}
+#endif
+	/* __sh_atomic_model = SH_A_GUSA; */ /* 0, default */
+#endif
+	return 0;
+}
--- a/arch/sh/src/atomic.c
+++ b/arch/sh/src/atomic.c
@ -1,8 +1,26 @@
 #ifndef __SH4A__

+#include "sh_atomic.h"
 #include "atomic.h"
 #include "libc.h"

+static inline unsigned mask()
+{
+	unsigned sr;
+	__asm__ __volatile__ ( "\n"
+	"	stc sr,r0 \n"
+	"	mov r0,%0 \n"
+	"	or #0xf0,r0 \n"
+	"	ldc r0,sr \n"
+	: "=&r"(sr) : : "memory", "r0" );
+	return sr;
+}
+
+static inline void unmask(unsigned sr)
+{
+	__asm__ __volatile__ ( "ldc %0,sr" : : "r"(sr) : "memory" );
+}
+
 /* gusa is a hack in the kernel which lets you create a sequence of instructions
 * which will be restarted if the process is preempted in the middle of the
 * sequence. It will do for implementing atomics on non-smp systems. ABI is:
@ -25,11 +43,17 @@
 	"	mov.l " new ", @" mem "\n" \
 	"1:	mov r1, r15\n"

-#define CPU_HAS_LLSC 0x0040
-
 int __sh_cas(volatile int *p, int t, int s)
 {
-	if (__hwcap & CPU_HAS_LLSC) return __sh_cas_llsc(p, t, s);
+	if (__sh_atomic_model == SH_A_LLSC) return __sh_cas_llsc(p, t, s);
+
+	if (__sh_atomic_model == SH_A_IMASK) {
+		unsigned sr = mask();
+		int old = *p;
+		if (old==t) *p = s;
+		unmask(sr);
+		return old;
+	}

 	int old;
 	__asm__ __volatile__(
@ -43,7 +67,15 @@ int __sh_cas(volatile int *p, int t, int s)

 int __sh_swap(volatile int *x, int v)
 {
-	if (__hwcap & CPU_HAS_LLSC) return __sh_swap_llsc(x, v);
+	if (__sh_atomic_model == SH_A_LLSC) return __sh_swap_llsc(x, v);
+
+	if (__sh_atomic_model == SH_A_IMASK) {
+		unsigned sr = mask();
+		int old = *x;
+		*x = v;
+		unmask(sr);
+		return old;
+	}

 	int old;
 	__asm__ __volatile__(
@ -55,7 +87,15 @@ int __sh_swap(volatile int *x, int v)

 int __sh_fetch_add(volatile int *x, int v)
 {
-	if (__hwcap & CPU_HAS_LLSC) return __sh_fetch_add_llsc(x, v);
+	if (__sh_atomic_model == SH_A_LLSC) return __sh_fetch_add_llsc(x, v);
+
+	if (__sh_atomic_model == SH_A_IMASK) {
+		unsigned sr = mask();
+		int old = *x;
+		*x = old + v;
+		unmask(sr);
+		return old;
+	}

 	int old, dummy;
 	__asm__ __volatile__(
@ -69,7 +109,7 @@ int __sh_fetch_add(volatile int *x, int v)

 void __sh_store(volatile int *p, int x)
 {
-	if (__hwcap & CPU_HAS_LLSC) return __sh_store_llsc(p, x);
+	if (__sh_atomic_model == SH_A_LLSC) return __sh_store_llsc(p, x);
 	__asm__ __volatile__(
 		"	mov.l %1, @%0\n"
 		: : "r"(p), "r"(x) : "memory");
@ -77,7 +117,15 @@ void __sh_store(volatile int *p, int x)

 void __sh_and(volatile int *x, int v)
 {
-	if (__hwcap & CPU_HAS_LLSC) return __sh_and_llsc(x, v);
+	if (__sh_atomic_model == SH_A_LLSC) return __sh_and_llsc(x, v);
+
+	if (__sh_atomic_model == SH_A_IMASK) {
+		unsigned sr = mask();
+		int old = *x;
+		*x = old & v;
+		unmask(sr);
+		return;
+	}

 	int dummy;
 	__asm__ __volatile__(
@ -89,7 +137,15 @@ void __sh_and(volatile int *x, int v)

 void __sh_or(volatile int *x, int v)
 {
-	if (__hwcap & CPU_HAS_LLSC) return __sh_or_llsc(x, v);
+	if (__sh_atomic_model == SH_A_LLSC) return __sh_or_llsc(x, v);
+
+	if (__sh_atomic_model == SH_A_IMASK) {
+		unsigned sr = mask();
+		int old = *x;
+		*x = old | v;
+		unmask(sr);
+		return;
+	}

 	int dummy;
 	__asm__ __volatile__(
--- a/arch/sh/src/sh_atomic.h
+++ b/arch/sh/src/sh_atomic.h
@ -0,0 +1,15 @@
+#ifndef _SH_ATOMIC_H
+#define _SH_ATOMIC_H
+
+#define SH_A_GUSA 0
+#define SH_A_LLSC 1
+#define SH_A_CAS 2
+#if !defined(__SH3__) && !defined(__SH4__)
+#define SH_A_IMASK 3
+#else
+#define SH_A_IMASK -1LL /* unmatchable by unsigned int */
+#endif
+
+extern __attribute__((__visibility__("hidden"))) unsigned __sh_atomic_model;
+
+#endif
--- a/src/thread/sh/__set_thread_area.s
+++ b/src/thread/sh/__set_thread_area.s
@ -1,6 +0,0 @@
-.global __set_thread_area
-.type   __set_thread_area, @function
-__set_thread_area:
-	ldc r4, gbr
-	rts
-	 mov #0, r0