diff --git a/include/import/atomic-ops.h b/include/import/atomic-ops.h index 1d9c98ba5..29674db46 100644 --- a/include/import/atomic-ops.h +++ b/include/import/atomic-ops.h @@ -1,78 +1,149 @@ +/* generic atomic operations used by progressive locks + * + * Copyright (C) 2012-2022 Willy Tarreau + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + #ifndef PL_ATOMIC_OPS_H #define PL_ATOMIC_OPS_H +/* The definitions below exist in two forms: + * - fallback form (_pl_*) + * - preferred form (pl_*) + * + * As a general rule, given that C11 atomics tend to offer more flexibility to + * the compiler, these should set the preferred form, and the arch-specific + * code should set the fallback code. But it's possible for arch-specific code + * to set a preferred form, in which case it will simply be used over the other + * ones. + */ -/* compiler-only memory barrier, for use around locks */ -#define pl_barrier() do { \ - asm volatile("" ::: "memory"); \ - } while (0) +/* + * Architecture-specific versions of the various operations + */ +/* + * ###### ix86 / x86_64 below ###### + */ #if defined(__i386__) || defined (__i486__) || defined (__i586__) || defined (__i686__) || defined (__x86_64__) +/* for compilers supporting condition flags on output, let's directly return them */ +#if defined(__GCC_ASM_FLAG_OUTPUTS__) +#define X86_COND_C_TO_REG(reg) "" +#define X86_COND_Z_TO_REG(reg) "" +#define X86_COND_NZ_TO_REG(reg) "" +#define X86_COND_C_RESULT(var) "=@ccc"(var) +#define X86_COND_Z_RESULT(var) "=@ccz"(var) +#define X86_COND_NZ_RESULT(var) "=@ccnz"(var) +#else +#define X86_COND_C_TO_REG(reg) "sbb %" #reg ", %" #reg "\n\t" +#define X86_COND_Z_TO_REG(reg) "sete %" #reg "\n\t" +#define X86_COND_NZ_TO_REG(reg) "setne %" #reg "\n\t" +#define X86_COND_C_RESULT(var) "=r"(var) +#define X86_COND_Z_RESULT(var) "=qm"(var) +#define X86_COND_NZ_RESULT(var) "=qm"(var) +#endif + +/* CPU relaxation while waiting (PAUSE instruction on x86) */ +#define pl_cpu_relax() do { \ + asm volatile("rep;nop\n"); \ + } while (0) + /* full memory barrier using mfence when SSE2 is supported, falling back to * "lock add %esp" (gcc uses "lock add" or "lock or"). */ #if defined(__SSE2__) -#define pl_mb() do { \ +#define _pl_mb() do { \ asm volatile("mfence" ::: "memory"); \ } while (0) #elif defined(__x86_64__) -#define pl_mb() do { \ +#define _pl_mb() do { \ asm volatile("lock addl $0,0 (%%rsp)" ::: "memory", "cc"); \ } while (0) #else /* ix86 */ -#define pl_mb() do { \ +#define _pl_mb() do { \ asm volatile("lock addl $0,0 (%%esp)" ::: "memory", "cc"); \ } while (0) #endif /* end of pl_mb() case for sse2/x86_64/x86 */ -/* - * Generic functions common to the x86 family - */ +/* load/store barriers are nops on x86 */ +#define _pl_mb_load() do { asm volatile("" ::: "memory"); } while (0) +#define _pl_mb_store() do { asm volatile("" ::: "memory"); } while (0) -#define pl_cpu_relax() do { \ - asm volatile("rep;nop\n"); \ - } while (0) +/* atomic full/load/store are also nops on x86 */ +#define _pl_mb_ato() do { asm volatile("" ::: "memory"); } while (0) +#define _pl_mb_ato_load() do { asm volatile("" ::: "memory"); } while (0) +#define _pl_mb_ato_store() do { asm volatile("" ::: "memory"); } while (0) + +/* atomic load: on x86 it's just a volatile read */ +#define _pl_load_lax(ptr) _pl_load(ptr) +#define _pl_load(ptr) ({ typeof(*(ptr)) __ptr = *(volatile typeof(ptr))ptr; __ptr; }) + +/* atomic store: on x86 it's just a volatile write */ +#define _pl_store_lax(ptr) _pl_store(ptr) +#define _pl_store(ptr, x) do { *((volatile typeof(ptr))(ptr)) = (typeof(*ptr))(x); } while (0) /* increment integer value pointed to by pointer , and return non-zero if * result is non-null. */ -#define pl_inc(ptr) ( \ +#define _pl_inc_lax(ptr) _pl_inc(ptr) +#define _pl_inc_acq(ptr) _pl_inc(ptr) +#define _pl_inc_rel(ptr) _pl_inc(ptr) +#define _pl_inc(ptr) ( \ (sizeof(long) == 8 && sizeof(*(ptr)) == 8) ? ({ \ unsigned char ret; \ asm volatile("lock incq %0\n" \ - "setne %1\n" \ - : "+m" (*(ptr)), "=qm" (ret) \ + X86_COND_NZ_TO_REG(1) \ + : "+m" (*(ptr)), X86_COND_NZ_RESULT(ret) \ : \ : "cc"); \ ret; /* return value */ \ }) : (sizeof(*(ptr)) == 4) ? ({ \ unsigned char ret; \ asm volatile("lock incl %0\n" \ - "setne %1\n" \ - : "+m" (*(ptr)), "=qm" (ret) \ + X86_COND_NZ_TO_REG(1) \ + : "+m" (*(ptr)), X86_COND_NZ_RESULT(ret) \ : \ : "cc"); \ ret; /* return value */ \ }) : (sizeof(*(ptr)) == 2) ? ({ \ unsigned char ret; \ asm volatile("lock incw %0\n" \ - "setne %1\n" \ - : "+m" (*(ptr)), "=qm" (ret) \ + X86_COND_NZ_TO_REG(1) \ + : "+m" (*(ptr)), X86_COND_NZ_RESULT(ret) \ : \ : "cc"); \ ret; /* return value */ \ }) : (sizeof(*(ptr)) == 1) ? ({ \ unsigned char ret; \ asm volatile("lock incb %0\n" \ - "setne %1\n" \ - : "+m" (*(ptr)), "=qm" (ret) \ + X86_COND_NZ_TO_REG(1) \ + : "+m" (*(ptr)), X86_COND_NZ_RESULT(ret) \ : \ : "cc"); \ ret; /* return value */ \ @@ -88,36 +159,39 @@ /* decrement integer value pointed to by pointer , and return non-zero if * result is non-null. */ -#define pl_dec(ptr) ( \ +#define _pl_dec_lax(ptr) _pl_dec(ptr) +#define _pl_dec_acq(ptr) _pl_dec(ptr) +#define _pl_dec_rel(ptr) _pl_dec(ptr) +#define _pl_dec(ptr) ( \ (sizeof(long) == 8 && sizeof(*(ptr)) == 8) ? ({ \ unsigned char ret; \ asm volatile("lock decq %0\n" \ - "setne %1\n" \ - : "+m" (*(ptr)), "=qm" (ret) \ + X86_COND_NZ_TO_REG(1) \ + : "+m" (*(ptr)), X86_COND_NZ_RESULT(ret) \ : \ : "cc"); \ ret; /* return value */ \ }) : (sizeof(*(ptr)) == 4) ? ({ \ unsigned char ret; \ asm volatile("lock decl %0\n" \ - "setne %1\n" \ - : "+m" (*(ptr)), "=qm" (ret) \ + X86_COND_NZ_TO_REG(1) \ + : "+m" (*(ptr)), X86_COND_NZ_RESULT(ret) \ : \ : "cc"); \ ret; /* return value */ \ }) : (sizeof(*(ptr)) == 2) ? ({ \ unsigned char ret; \ asm volatile("lock decw %0\n" \ - "setne %1\n" \ - : "+m" (*(ptr)), "=qm" (ret) \ + X86_COND_NZ_TO_REG(1) \ + : "+m" (*(ptr)), X86_COND_NZ_RESULT(ret) \ : \ : "cc"); \ ret; /* return value */ \ }) : (sizeof(*(ptr)) == 1) ? ({ \ unsigned char ret; \ asm volatile("lock decb %0\n" \ - "setne %1\n" \ - : "+m" (*(ptr)), "=qm" (ret) \ + X86_COND_NZ_TO_REG(1) \ + : "+m" (*(ptr)), X86_COND_NZ_RESULT(ret) \ : \ : "cc"); \ ret; /* return value */ \ @@ -131,7 +205,10 @@ ) /* increment integer value pointed to by pointer , no return */ -#define pl_inc_noret(ptr) ({ \ +#define pl_inc_noret_lax(ptr) pl_inc_noret(ptr) +#define pl_inc_noret_acq(ptr) pl_inc_noret(ptr) +#define pl_inc_noret_rel(ptr) pl_inc_noret(ptr) +#define pl_inc_noret(ptr) do { \ if (sizeof(long) == 8 && sizeof(*(ptr)) == 8) { \ asm volatile("lock incq %0\n" \ : "+m" (*(ptr)) \ @@ -158,10 +235,13 @@ sizeof(*(ptr)) != 4 && (sizeof(long) != 8 || sizeof(*(ptr)) != 8)) \ __unsupported_argument_size_for_pl_inc_noret__(__FILE__,__LINE__); \ } \ -}) +} while (0) /* decrement integer value pointed to by pointer , no return */ -#define pl_dec_noret(ptr) ({ \ +#define pl_dec_noret_lax(ptr) pl_dec_noret(ptr) +#define pl_dec_noret_acq(ptr) pl_dec_noret(ptr) +#define pl_dec_noret_rel(ptr) pl_dec_noret(ptr) +#define pl_dec_noret(ptr) do { \ if (sizeof(long) == 8 && sizeof(*(ptr)) == 8) { \ asm volatile("lock decq %0\n" \ : "+m" (*(ptr)) \ @@ -188,12 +268,15 @@ sizeof(*(ptr)) != 4 && (sizeof(long) != 8 || sizeof(*(ptr)) != 8)) \ __unsupported_argument_size_for_pl_dec_noret__(__FILE__,__LINE__); \ } \ -}) +} while (0) /* add integer constant to integer value pointed to by pointer , * no return. Size of is not checked. */ -#define pl_add(ptr, x) ({ \ +#define _pl_add_noret_lax(ptr, x) _pl_add_noret(ptr, x) +#define _pl_add_noret_acq(ptr, x) _pl_add_noret(ptr, x) +#define _pl_add_noret_rel(ptr, x) _pl_add_noret(ptr, x) +#define _pl_add_noret(ptr, x) do { \ if (sizeof(long) == 8 && sizeof(*(ptr)) == 8) { \ asm volatile("lock addq %1, %0\n" \ : "+m" (*(ptr)) \ @@ -220,12 +303,15 @@ sizeof(*(ptr)) != 4 && (sizeof(long) != 8 || sizeof(*(ptr)) != 8)) \ __unsupported_argument_size_for_pl_add__(__FILE__,__LINE__); \ } \ -}) +} while (0) /* subtract integer constant from integer value pointed to by pointer * , no return. Size of is not checked. */ -#define pl_sub(ptr, x) ({ \ +#define _pl_sub_noret_lax(ptr, x) _pl_sub_noret(ptr, x) +#define _pl_sub_noret_acq(ptr, x) _pl_sub_noret(ptr, x) +#define _pl_sub_noret_rel(ptr, x) _pl_sub_noret(ptr, x) +#define _pl_sub_noret(ptr, x) do { \ if (sizeof(long) == 8 && sizeof(*(ptr)) == 8) { \ asm volatile("lock subq %1, %0\n" \ : "+m" (*(ptr)) \ @@ -252,12 +338,15 @@ sizeof(*(ptr)) != 4 && (sizeof(long) != 8 || sizeof(*(ptr)) != 8)) \ __unsupported_argument_size_for_pl_sub__(__FILE__,__LINE__); \ } \ -}) +} while (0) /* binary and integer value pointed to by pointer with constant , no * return. Size of is not checked. */ -#define pl_and(ptr, x) ({ \ +#define _pl_and_noret_lax(ptr, x) _pl_and_noret(ptr, x) +#define _pl_and_noret_acq(ptr, x) _pl_and_noret(ptr, x) +#define _pl_and_noret_rel(ptr, x) _pl_and_noret(ptr, x) +#define _pl_and_noret(ptr, x) do { \ if (sizeof(long) == 8 && sizeof(*(ptr)) == 8) { \ asm volatile("lock andq %1, %0\n" \ : "+m" (*(ptr)) \ @@ -284,12 +373,15 @@ sizeof(*(ptr)) != 4 && (sizeof(long) != 8 || sizeof(*(ptr)) != 8)) \ __unsupported_argument_size_for_pl_and__(__FILE__,__LINE__); \ } \ -}) +} while (0) /* binary or integer value pointed to by pointer with constant , no * return. Size of is not checked. */ -#define pl_or(ptr, x) ({ \ +#define _pl_or_noret_lax(ptr, x) _pl_or_noret(ptr, x) +#define _pl_or_noret_acq(ptr, x) _pl_or_noret(ptr, x) +#define _pl_or_noret_rel(ptr, x) _pl_or_noret(ptr, x) +#define _pl_or_noret(ptr, x) do { \ if (sizeof(long) == 8 && sizeof(*(ptr)) == 8) { \ asm volatile("lock orq %1, %0\n" \ : "+m" (*(ptr)) \ @@ -316,12 +408,15 @@ sizeof(*(ptr)) != 4 && (sizeof(long) != 8 || sizeof(*(ptr)) != 8)) \ __unsupported_argument_size_for_pl_or__(__FILE__,__LINE__); \ } \ -}) +} while (0) /* binary xor integer value pointed to by pointer with constant , no * return. Size of is not checked. */ -#define pl_xor(ptr, x) ({ \ +#define _pl_xor_noret_lax(ptr, x) _pl_xor_noret(ptr, x) +#define _pl_xor_noret_acq(ptr, x) _pl_xor_noret(ptr, x) +#define _pl_xor_noret_rel(ptr, x) _pl_xor_noret(ptr, x) +#define _pl_xor_noret(ptr, x) do { \ if (sizeof(long) == 8 && sizeof(*(ptr)) == 8) { \ asm volatile("lock xorq %1, %0\n" \ : "+m" (*(ptr)) \ @@ -348,34 +443,78 @@ sizeof(*(ptr)) != 4 && (sizeof(long) != 8 || sizeof(*(ptr)) != 8)) \ __unsupported_argument_size_for_pl_xor__(__FILE__,__LINE__); \ } \ -}) +} while (0) + +/* test and reset bit in integer value pointed to by pointer . Returns + * 0 if the bit was not set, or ~0 of the same type as *ptr if it was set. Note + * that there is no 8-bit equivalent operation. + */ +#define pl_btr_lax(ptr, bit) pl_btr(ptr, bit) +#define pl_btr_acq(ptr, bit) pl_btr(ptr, bit) +#define pl_btr_rel(ptr, bit) pl_btr(ptr, bit) +#define pl_btr(ptr, bit) ( \ + (sizeof(long) == 8 && sizeof(*(ptr)) == 8) ? ({ \ + unsigned long ret; \ + asm volatile("lock btrq %2, %0\n\t" \ + X86_COND_C_TO_REG(1) \ + : "+m" (*(ptr)), X86_COND_C_RESULT(ret) \ + : "Ir" ((unsigned long)(bit)) \ + : "cc"); \ + ret; /* return value */ \ + }) : (sizeof(*(ptr)) == 4) ? ({ \ + unsigned int ret; \ + asm volatile("lock btrl %2, %0\n\t" \ + X86_COND_C_TO_REG(1) \ + : "+m" (*(ptr)), X86_COND_C_RESULT(ret) \ + : "Ir" ((unsigned int)(bit)) \ + : "cc"); \ + ret; /* return value */ \ + }) : (sizeof(*(ptr)) == 2) ? ({ \ + unsigned short ret; \ + asm volatile("lock btrw %2, %0\n\t" \ + X86_COND_C_TO_REG(1) \ + : "+m" (*(ptr)), X86_COND_C_RESULT(ret) \ + : "Ir" ((unsigned short)(bit)) \ + : "cc"); \ + ret; /* return value */ \ + }) : ({ \ + void __unsupported_argument_size_for_pl_btr__(char *,int); \ + if (sizeof(*(ptr)) != 1 && sizeof(*(ptr)) != 2 && \ + sizeof(*(ptr)) != 4 && (sizeof(long) != 8 || sizeof(*(ptr)) != 8)) \ + __unsupported_argument_size_for_pl_btr__(__FILE__,__LINE__); \ + 0; \ + }) \ +) /* test and set bit in integer value pointed to by pointer . Returns * 0 if the bit was not set, or ~0 of the same type as *ptr if it was set. Note * that there is no 8-bit equivalent operation. */ -#define pl_bts(ptr, bit) ( \ +#define pl_bts_lax(ptr, bit) pl_bts(ptr, bit) +#define pl_bts_acq(ptr, bit) pl_bts(ptr, bit) +#define pl_bts_rel(ptr, bit) pl_bts(ptr, bit) +#define pl_bts(ptr, bit) ( \ (sizeof(long) == 8 && sizeof(*(ptr)) == 8) ? ({ \ unsigned long ret; \ asm volatile("lock btsq %2, %0\n\t" \ - "sbb %1, %1\n\t" \ - : "+m" (*(ptr)), "=r" (ret) \ + X86_COND_C_TO_REG(1) \ + : "+m" (*(ptr)), X86_COND_C_RESULT(ret) \ : "Ir" ((unsigned long)(bit)) \ : "cc"); \ ret; /* return value */ \ }) : (sizeof(*(ptr)) == 4) ? ({ \ unsigned int ret; \ asm volatile("lock btsl %2, %0\n\t" \ - "sbb %1, %1\n\t" \ - : "+m" (*(ptr)), "=r" (ret) \ + X86_COND_C_TO_REG(1) \ + : "+m" (*(ptr)), X86_COND_C_RESULT(ret) \ : "Ir" ((unsigned int)(bit)) \ : "cc"); \ ret; /* return value */ \ }) : (sizeof(*(ptr)) == 2) ? ({ \ unsigned short ret; \ asm volatile("lock btsw %2, %0\n\t" \ - "sbb %1, %1\n\t" \ - : "+m" (*(ptr)), "=r" (ret) \ + X86_COND_C_TO_REG(1) \ + : "+m" (*(ptr)), X86_COND_C_RESULT(ret) \ : "Ir" ((unsigned short)(bit)) \ : "cc"); \ ret; /* return value */ \ @@ -395,8 +534,9 @@ /* fetch-and-add: fetch integer value pointed to by pointer , add to * to <*ptr> and return the previous value. + * => THIS IS LEGACY, USE _pl_ldadd() INSTEAD. */ -#define pl_xadd(ptr, x) ( \ +#define _pl_xadd(ptr, x) ( \ (sizeof(long) == 8 && sizeof(*(ptr)) == 8) ? ({ \ unsigned long ret = (unsigned long)(x); \ asm volatile("lock xaddq %0, %1\n" \ @@ -434,10 +574,98 @@ }) \ ) +/* fetch-and-add: fetch integer value pointed to by pointer , add to + * to <*ptr> and return the previous value. + */ +#define _pl_ldadd_lax(ptr, x) _pl_ldadd(ptr, x) +#define _pl_ldadd_acq(ptr, x) _pl_ldadd(ptr, x) +#define _pl_ldadd_rel(ptr, x) _pl_ldadd(ptr, x) +#define _pl_ldadd(ptr, x) ( \ + (sizeof(long) == 8 && sizeof(*(ptr)) == 8) ? ({ \ + unsigned long ret = (unsigned long)(x); \ + asm volatile("lock xaddq %0, %1\n" \ + : "=r" (ret), "+m" (*(ptr)) \ + : "0" (ret) \ + : "cc"); \ + ret; /* return value */ \ + }) : (sizeof(*(ptr)) == 4) ? ({ \ + unsigned int ret = (unsigned int)(x); \ + asm volatile("lock xaddl %0, %1\n" \ + : "=r" (ret), "+m" (*(ptr)) \ + : "0" (ret) \ + : "cc"); \ + ret; /* return value */ \ + }) : (sizeof(*(ptr)) == 2) ? ({ \ + unsigned short ret = (unsigned short)(x); \ + asm volatile("lock xaddw %0, %1\n" \ + : "=r" (ret), "+m" (*(ptr)) \ + : "0" (ret) \ + : "cc"); \ + ret; /* return value */ \ + }) : (sizeof(*(ptr)) == 1) ? ({ \ + unsigned char ret = (unsigned char)(x); \ + asm volatile("lock xaddb %0, %1\n" \ + : "=r" (ret), "+m" (*(ptr)) \ + : "0" (ret) \ + : "cc"); \ + ret; /* return value */ \ + }) : ({ \ + void __unsupported_argument_size_for_pl_ldadd__(char *,int); \ + if (sizeof(*(ptr)) != 1 && sizeof(*(ptr)) != 2 && \ + sizeof(*(ptr)) != 4 && (sizeof(long) != 8 || sizeof(*(ptr)) != 8)) \ + __unsupported_argument_size_for_pl_ldadd__(__FILE__,__LINE__); \ + 0; \ + }) \ +) + +/* fetch-and-sub: fetch integer value pointed to by pointer , add - to + * to <*ptr> and return the previous value. + */ +#define _pl_ldsub_lax(ptr, x) _pl_ldsub(ptr, x) +#define _pl_ldsub_acq(ptr, x) _pl_ldsub(ptr, x) +#define _pl_ldsub_rel(ptr, x) _pl_ldsub(ptr, x) +#define _pl_ldsub(ptr, x) ( \ + (sizeof(long) == 8 && sizeof(*(ptr)) == 8) ? ({ \ + unsigned long ret = (unsigned long)(-x); \ + asm volatile("lock xaddq %0, %1\n" \ + : "=r" (ret), "+m" (*(ptr)) \ + : "0" (ret) \ + : "cc"); \ + ret; /* return value */ \ + }) : (sizeof(*(ptr)) == 4) ? ({ \ + unsigned int ret = (unsigned int)(-x); \ + asm volatile("lock xaddl %0, %1\n" \ + : "=r" (ret), "+m" (*(ptr)) \ + : "0" (ret) \ + : "cc"); \ + ret; /* return value */ \ + }) : (sizeof(*(ptr)) == 2) ? ({ \ + unsigned short ret = (unsigned short)(-x); \ + asm volatile("lock xaddw %0, %1\n" \ + : "=r" (ret), "+m" (*(ptr)) \ + : "0" (ret) \ + : "cc"); \ + ret; /* return value */ \ + }) : (sizeof(*(ptr)) == 1) ? ({ \ + unsigned char ret = (unsigned char)(-x); \ + asm volatile("lock xaddb %0, %1\n" \ + : "=r" (ret), "+m" (*(ptr)) \ + : "0" (ret) \ + : "cc"); \ + ret; /* return value */ \ + }) : ({ \ + void __unsupported_argument_size_for_pl_ldsub__(char *,int); \ + if (sizeof(*(ptr)) != 1 && sizeof(*(ptr)) != 2 && \ + sizeof(*(ptr)) != 4 && (sizeof(long) != 8 || sizeof(*(ptr)) != 8)) \ + __unsupported_argument_size_for_pl_ldsub__(__FILE__,__LINE__); \ + 0; \ + }) \ +) + /* exchange value with integer value pointed to by pointer , and return * previous <*ptr> value. must be of the same size as <*ptr>. */ -#define pl_xchg(ptr, x) ( \ +#define _pl_xchg(ptr, x) ( \ (sizeof(long) == 8 && sizeof(*(ptr)) == 8) ? ({ \ unsigned long ret = (unsigned long)(x); \ asm volatile("xchgq %0, %1\n" \ @@ -479,7 +707,7 @@ * it matches, and return . and must be of the same size as * <*ptr>. */ -#define pl_cmpxchg(ptr, old, new) ( \ +#define _pl_cmpxchg(ptr, old, new) ( \ (sizeof(long) == 8 && sizeof(*(ptr)) == 8) ? ({ \ unsigned long ret; \ asm volatile("lock cmpxchgq %2,%1" \ @@ -521,49 +749,1243 @@ }) \ ) -#else -/* generic implementations */ - -#if defined(__aarch64__) +/* + * ##### ARM64 (aarch64) below ##### + */ +#elif defined(__aarch64__) /* This was shown to improve fairness on modern ARMv8 such as Neoverse N1 */ #define pl_cpu_relax() do { \ asm volatile("isb" ::: "memory"); \ } while (0) -#else +/* full/load/store barriers */ +#define _pl_mb() do { asm volatile("dmb ish" ::: "memory"); } while (0) +#define _pl_mb_load() do { asm volatile("dmb ishld" ::: "memory"); } while (0) +#define _pl_mb_store() do { asm volatile("dmb ishst" ::: "memory"); } while (0) -#define pl_cpu_relax() do { \ - asm volatile(""); \ - } while (0) +/* atomic full/load/store */ +#define _pl_mb_ato() do { asm volatile("dmb ish" ::: "memory"); } while (0) +#define _pl_mb_ato_load() do { asm volatile("dmb ishld" ::: "memory"); } while (0) +#define _pl_mb_ato_store() do { asm volatile("dmb ishst" ::: "memory"); } while (0) +#endif // end of arch-specific code + + +/* + * Generic code using the C11 __atomic API for functions not defined above. + * These are usable from gcc-4.7 and clang. We'll simply rely on the macros + * defining the memory orders to detect them. All operations are not + * necessarily defined, so some fallbacks to the default methods might still + * be necessary. + */ + + +#if defined(__ATOMIC_RELAXED) && defined(__ATOMIC_CONSUME) && defined(__ATOMIC_ACQUIRE) && \ + defined(__ATOMIC_RELEASE) && defined(__ATOMIC_ACQ_REL) && defined(__ATOMIC_SEQ_CST) + +/* compiler-only memory barrier, for use around locks */ +#ifndef pl_barrier +#define pl_barrier() __atomic_signal_fence(__ATOMIC_SEQ_CST) #endif /* full memory barrier */ +#ifndef pl_mb +#define pl_mb() __atomic_thread_fence(__ATOMIC_SEQ_CST) +#endif + +/* atomic load */ +#ifndef pl_load_lax +#define pl_load_lax(ptr) __atomic_load_n(ptr, __ATOMIC_RELAXED) +#endif + +#ifndef pl_load +#define pl_load(ptr) __atomic_load_n(ptr, __ATOMIC_ACQUIRE) +#endif + +/* atomic store */ +#ifndef pl_store_lax +#define pl_store_lax(ptr, x) __atomic_store_n((ptr), (x), __ATOMIC_RELAXED) +#endif + +#ifndef pl_store +#define pl_store(ptr, x) __atomic_store_n((ptr), (x), __ATOMIC_RELEASE) +#endif + +/* increment integer value pointed to by pointer , and return non-zero if + * result is non-null. + */ +#ifndef pl_inc_lax +#define pl_inc_lax(ptr) (__atomic_add_fetch((ptr), 1, __ATOMIC_RELAXED) != 0) +#endif + +#ifndef pl_inc_acq +#define pl_inc_acq(ptr) (__atomic_add_fetch((ptr), 1, __ATOMIC_ACQUIRE) != 0) +#endif + +#ifndef pl_inc_rel +#define pl_inc_rel(ptr) (__atomic_add_fetch((ptr), 1, __ATOMIC_RELEASE) != 0) +#endif + +#ifndef pl_inc +#define pl_inc(ptr) (__atomic_add_fetch((ptr), 1, __ATOMIC_SEQ_CST) != 0) +#endif + +/* decrement integer value pointed to by pointer , and return non-zero if + * result is non-null. + */ +#ifndef pl_dec_lax +#define pl_dec_lax(ptr) (__atomic_sub_fetch((ptr), 1, __ATOMIC_RELAXED) != 0) +#endif + +#ifndef pl_dec_acq +#define pl_dec_acq(ptr) (__atomic_sub_fetch((ptr), 1, __ATOMIC_ACQUIRE) != 0) +#endif + +#ifndef pl_dec_rel +#define pl_dec_rel(ptr) (__atomic_sub_fetch((ptr), 1, __ATOMIC_RELEASE) != 0) +#endif + +#ifndef pl_dec +#define pl_dec(ptr) (__atomic_sub_fetch((ptr), 1, __ATOMIC_SEQ_CST) != 0) +#endif + +/* increment integer value pointed to by pointer , no return */ +#ifndef pl_inc_noret_lax +#define pl_inc_noret_lax(ptr) ((void)__atomic_add_fetch((ptr), 1, __ATOMIC_RELAXED)) +#endif + +#ifndef pl_inc_noret_acq +#define pl_inc_noret_acq(ptr) ((void)__atomic_add_fetch((ptr), 1, __ATOMIC_ACQUIRE)) +#endif + +#ifndef pl_inc_noret_rel +#define pl_inc_noret_relc(ptr) ((void)__atomic_add_fetch((ptr), 1, __ATOMIC_RELEASE)) +#endif + +#ifndef pl_inc_noret +#define pl_inc_noret(ptr) ((void)__atomic_add_fetch((ptr), 1, __ATOMIC_SEQ_CST)) +#endif + +/* decrement integer value pointed to by pointer , no return */ +#ifndef pl_dec_noret_lax +#define pl_dec_noret_lax(ptr) ((void)__atomic_sub_fetch((ptr), 1, __ATOMIC_RELAXED)) +#endif + +#ifndef pl_dec_noret_acq +#define pl_dec_noret_acq(ptr) ((void)__atomic_sub_fetch((ptr), 1, __ATOMIC_ACQUIRE)) +#endif + +#ifndef pl_dec_noret_rel +#define pl_dec_noret_relc(ptr) ((void)__atomic_sub_fetch((ptr), 1, __ATOMIC_RELEASE)) +#endif + +#ifndef pl_dec_noret +#define pl_dec_noret(ptr) ((void)__atomic_sub_fetch((ptr), 1, __ATOMIC_SEQ_CST)) +#endif + +/* add integer constant to integer value pointed to by pointer , + * no return. Size of is not checked. + */ +#ifndef pl_add_lax +#define pl_add_lax(ptr, x) (__atomic_add_fetch((ptr), (x), __ATOMIC_RELAXED)) +#endif + +#ifndef pl_add_acq +#define pl_add_acq(ptr, x) (__atomic_add_fetch((ptr), (x), __ATOMIC_ACQUIRE)) +#endif + +#ifndef pl_add_rel +#define pl_add_relc(ptr, x) (__atomic_add_fetch((ptr), (x), __ATOMIC_RELEASE)) +#endif + +#ifndef pl_add +#define pl_add(ptr, x) (__atomic_add_fetch((ptr), (x), __ATOMIC_SEQ_CST)) +#endif + +/* subtract integer constant from integer value pointed to by pointer + * , no return. Size of is not checked. + */ +#ifndef pl_sub_lax +#define pl_sub_lax(ptr, x) (__atomic_sub_fetch((ptr), (x), __ATOMIC_RELAXED)) +#endif + +#ifndef pl_sub_acq +#define pl_sub_acq(ptr, x) (__atomic_sub_fetch((ptr), (x), __ATOMIC_ACQUIRE)) +#endif + +#ifndef pl_sub_rel +#define pl_sub_relc(ptr, x) (__atomic_sub_fetch((ptr), (x), __ATOMIC_RELEASE)) +#endif + +#ifndef pl_sub +#define pl_sub(ptr, x) (__atomic_sub_fetch((ptr), (x), __ATOMIC_SEQ_CST)) +#endif + +/* binary and integer value pointed to by pointer with constant , no + * return. Size of is not checked. + */ +#ifndef pl_and_lax +#define pl_and_lax(ptr, x) (__atomic_and_fetch((ptr), (x), __ATOMIC_RELAXED)) +#endif + +#ifndef pl_and_acq +#define pl_and_acq(ptr, x) (__atomic_and_fetch((ptr), (x), __ATOMIC_ACQUIRE)) +#endif + +#ifndef pl_and_rel +#define pl_and_relc(ptr, x) (__atomic_and_fetch((ptr), (x), __ATOMIC_RELEASE)) +#endif + +#ifndef pl_and +#define pl_and(ptr, x) (__atomic_and_fetch((ptr), (x), __ATOMIC_SEQ_CST)) +#endif + +/* binary or integer value pointed to by pointer with constant , no + * return. Size of is not checked. + */ +#ifndef pl_or_lax +#define pl_or_lax(ptr, x) (__atomic_or_fetch((ptr), (x), __ATOMIC_RELAXED)) +#endif + +#ifndef pl_or_acq +#define pl_or_acq(ptr, x) (__atomic_or_fetch((ptr), (x), __ATOMIC_ACQUIRE)) +#endif + +#ifndef pl_or_rel +#define pl_or_relc(ptr, x) (__atomic_or_fetch((ptr), (x), __ATOMIC_RELEASE)) +#endif + +#ifndef pl_or +#define pl_or(ptr, x) (__atomic_or_fetch((ptr), (x), __ATOMIC_SEQ_CST)) +#endif + +/* binary xor integer value pointed to by pointer with constant , no + * return. Size of is not checked. + */ +#ifndef pl_xor_lax +#define pl_xor_lax(ptr, x) (__atomic_xor_fetch((ptr), (x), __ATOMIC_RELAXED)) +#endif + +#ifndef pl_xor_acq +#define pl_xor_acq(ptr, x) (__atomic_xor_fetch((ptr), (x), __ATOMIC_ACQUIRE)) +#endif + +#ifndef pl_xor_rel +#define pl_xor_relc(ptr, x) (__atomic_xor_fetch((ptr), (x), __ATOMIC_RELEASE)) +#endif + +#ifndef pl_xor +#define pl_xor(ptr, x) (__atomic_xor_fetch((ptr), (x), __ATOMIC_SEQ_CST)) +#endif + +/* fetch-and-add: fetch integer value pointed to by pointer , add to + * to <*ptr> and return the previous value. + * => THIS IS LEGACY, USE pl_ldadd() INSTEAD. + */ +#ifndef pl_xadd +#define pl_xadd(ptr, x) (__atomic_fetch_add((ptr), (x), __ATOMIC_SEQ_CST)) +#endif + +/* exchange value with integer value pointed to by pointer , and return + * previous <*ptr> value. must be of the same size as <*ptr>. + */ +#ifndef pl_xchg +#define pl_xchg(ptr, x) (__atomic_exchange_n((ptr), (x), __ATOMIC_SEQ_CST)) +#endif + +/* compare integer value <*ptr> with and exchange it with if + * it matches, and return . and must be of the same size as + * <*ptr>. + */ +#ifndef pl_cmpxchg +#define pl_cmpxchg(ptr, old, new) ({ \ + typeof(*ptr) __old = (old); \ + __atomic_compare_exchange_n((ptr), &__old, (new), 0, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED); \ + __old; }) +#endif + +/* fetch-and-add: fetch integer value pointed to by pointer , add to + * to <*ptr> and return the previous value. + */ +#ifndef pl_ldadd_lax +#define pl_ldadd_lax(ptr, x) (__atomic_fetch_add((ptr), (x), __ATOMIC_RELAXED)) +#endif + +#ifndef pl_ldadd_acq +#define pl_ldadd_acq(ptr, x) (__atomic_fetch_add((ptr), (x), __ATOMIC_ACQUIRE)) +#endif + +#ifndef pl_ldadd_rel +#define pl_ldadd_relc(ptr, x) (__atomic_fetch_add((ptr), (x), __ATOMIC_RELEASE)) +#endif + +#ifndef pl_ldadd +#define pl_ldadd(ptr, x) (__atomic_fetch_add((ptr), (x), __ATOMIC_SEQ_CST)) +#endif + + +#ifndef pl_ldand_lax +#define pl_ldand_lax(ptr, x) (__atomic_fetch_and((ptr), (x), __ATOMIC_RELAXED)) +#endif + +#ifndef pl_ldand_acq +#define pl_ldand_acq(ptr, x) (__atomic_fetch_and((ptr), (x), __ATOMIC_ACQUIRE)) +#endif + +#ifndef pl_ldand_rel +#define pl_ldand_relc(ptr, x) (__atomic_fetch_and((ptr), (x), __ATOMIC_RELEASE)) +#endif + +#ifndef pl_ldand +#define pl_ldand(ptr, x) (__atomic_fetch_and((ptr), (x), __ATOMIC_SEQ_CST)) +#endif + + +#ifndef pl_ldor_lax +#define pl_ldor_lax(ptr, x) (__atomic_fetch_or((ptr), (x), __ATOMIC_RELAXED)) +#endif + +#ifndef pl_ldor_acq +#define pl_ldor_acq(ptr, x) (__atomic_fetch_or((ptr), (x), __ATOMIC_ACQUIRE)) +#endif + +#ifndef pl_ldor_rel +#define pl_ldor_relc(ptr, x) (__atomic_fetch_or((ptr), (x), __ATOMIC_RELEASE)) +#endif + +#ifndef pl_ldor +#define pl_ldor(ptr, x) (__atomic_fetch_or((ptr), (x), __ATOMIC_SEQ_CST)) +#endif + + +#ifndef pl_ldsub_lax +#define pl_ldsub_lax(ptr, x) (__atomic_fetch_sub((ptr), (x), __ATOMIC_RELAXED)) +#endif + +#ifndef pl_ldsub_acq +#define pl_ldsub_acq(ptr, x) (__atomic_fetch_sub((ptr), (x), __ATOMIC_ACQUIRE)) +#endif + +#ifndef pl_ldsub_rel +#define pl_ldsub_relc(ptr, x) (__atomic_fetch_sub((ptr), (x), __ATOMIC_RELEASE)) +#endif + +#ifndef pl_ldsub +#define pl_ldsub(ptr, x) (__atomic_fetch_sub((ptr), (x), __ATOMIC_SEQ_CST)) +#endif + + +#ifndef pl_ldxor_lax +#define pl_ldxor_lax(ptr, x) (__atomic_fetch_xor((ptr), (x), __ATOMIC_RELAXED)) +#endif + +#ifndef pl_ldxor_acq +#define pl_ldxor_acq(ptr, x) (__atomic_fetch_xor((ptr), (x), __ATOMIC_ACQUIRE)) +#endif + +#ifndef pl_ldxor_rel +#define pl_ldxor_relc(ptr, x) (__atomic_fetch_xor((ptr), (x), __ATOMIC_RELEASE)) +#endif + +#ifndef pl_ldxor +#define pl_ldxor(ptr, x) (__atomic_fetch_xor((ptr), (x), __ATOMIC_SEQ_CST)) +#endif + +#endif /* end of C11 atomics */ + + +/* + * Automatically remap to fallback code when available. This allows the arch + * specific code above to be used as an immediate fallback for missing C11 + * definitions. Everything not defined will use the generic code at the end. + */ + +#if !defined(pl_cpu_relax) && defined(_pl_cpu_relax) +# define pl_cpu_relax _pl_cpu_relax +#endif + +#if !defined(pl_barrier) && defined(_pl_barrier) +# define pl_barrier _pl_barrier +#endif + +#if !defined(pl_mb) && defined(_pl_mb) +# define pl_mb _pl_mb +#endif + +#if !defined(pl_mb_load) && defined(_pl_mb_load) +# define pl_mb_load _pl_mb_load +#endif + +#if !defined(pl_mb_store) && defined(_pl_mb_store) +# define pl_mb_store _pl_mb_store +#endif + +#if !defined(pl_mb_ato) && defined(_pl_mb_ato) +# define pl_mb_ato _pl_mb_ato +#endif + +#if !defined(pl_mb_ato_load) && defined(_pl_mb_ato_load) +# define pl_mb_ato_load _pl_mb_ato_load +#endif + +#if !defined(pl_mb_ato_store) && defined(_pl_mb_ato_store) +# define pl_mb_ato_store _pl_mb_ato_store +#endif + + +#if !defined(pl_load) && defined(_pl_load) +#define pl_load _pl_load +#endif + +#if !defined(pl_load_lax) && defined(_pl_load_lax) +#define pl_load_lax _pl_load_lax +#endif + +#if !defined(pl_store) && defined(_pl_store) +#define pl_store _pl_store +#endif + +#if !defined(pl_store_lax) && defined(_pl_store_lax) +#define pl_store_lax _pl_store_lax +#endif + + +#if !defined(pl_inc_noret_lax) && defined(_pl_inc_noret_lax) +# define pl_inc_noret_lax _pl_inc_noret_lax +#endif + +#if !defined(pl_inc_noret_acq) && defined(_pl_inc_noret_acq) +# define pl_inc_noret_acq _pl_inc_noret_acq +#endif + +#if !defined(pl_inc_noret_rel) && defined(_pl_inc_noret_rel) +# define pl_inc_noret_rel _pl_inc_noret_rel +#endif + +#if !defined(pl_inc_noret) && defined(_pl_inc_noret) +# define pl_inc_noret _pl_inc_noret +#endif + + +#if !defined(pl_dec_noret_lax) && defined(_pl_dec_noret_lax) +# define pl_dec_noret_lax _pl_dec_noret_lax +#endif + +#if !defined(pl_dec_noret_acq) && defined(_pl_dec_noret_acq) +# define pl_dec_noret_acq _pl_dec_noret_acq +#endif + +#if !defined(pl_dec_noret_rel) && defined(_pl_dec_noret_rel) +# define pl_dec_noret_rel _pl_dec_noret_rel +#endif + +#if !defined(pl_dec_noret) && defined(_pl_dec_noret) +# define pl_dec_noret _pl_dec_noret +#endif + + +#if !defined(pl_inc_lax) && defined(_pl_inc_lax) +# define pl_inc_lax _pl_inc_lax +#endif + +#if !defined(pl_inc_acq) && defined(_pl_inc_acq) +# define pl_inc_acq _pl_inc_acq +#endif + +#if !defined(pl_inc_rel) && defined(_pl_inc_rel) +# define pl_inc_rel _pl_inc_rel +#endif + +#if !defined(pl_inc) && defined(_pl_inc) +# define pl_inc _pl_inc +#endif + + +#if !defined(pl_dec_lax) && defined(_pl_dec_lax) +# define pl_dec_lax _pl_dec_lax +#endif + +#if !defined(pl_dec_acq) && defined(_pl_dec_acq) +# define pl_dec_acq _pl_dec_acq +#endif + +#if !defined(pl_dec_rel) && defined(_pl_dec_rel) +# define pl_dec_rel _pl_dec_rel +#endif + +#if !defined(pl_dec) && defined(_pl_dec) +# define pl_dec _pl_dec +#endif + + +#if !defined(pl_add_lax) && defined(_pl_add_lax) +# define pl_add_lax _pl_add_lax +#endif + +#if !defined(pl_add_acq) && defined(_pl_add_acq) +# define pl_add_acq _pl_add_acq +#endif + +#if !defined(pl_add_rel) && defined(_pl_add_rel) +# define pl_add_rel _pl_add_rel +#endif + +#if !defined(pl_add) && defined(_pl_add) +# define pl_add _pl_add +#endif + + +#if !defined(pl_add_noret_lax) && defined(_pl_add_noret_lax) +# define pl_add_noret_lax _pl_add_noret_lax +#endif + +#if !defined(pl_add_noret_acq) && defined(_pl_add_noret_acq) +# define pl_add_noret_acq _pl_add_noret_acq +#endif + +#if !defined(pl_add_noret_rel) && defined(_pl_add_noret_rel) +# define pl_add_noret_rel _pl_add_noret_rel +#endif + +#if !defined(pl_add_noret) && defined(_pl_add_noret) +# define pl_add_noret _pl_add_noret +#endif + +#if !defined(pl_and_lax) && defined(_pl_and_lax) +# define pl_and_lax _pl_and_lax +#endif + +#if !defined(pl_and_acq) && defined(_pl_and_acq) +# define pl_and_acq _pl_and_acq +#endif + +#if !defined(pl_and_rel) && defined(_pl_and_rel) +# define pl_and_rel _pl_and_rel +#endif + +#if !defined(pl_and) && defined(_pl_and) +# define pl_and _pl_and +#endif + + +#if !defined(pl_and_noret_lax) && defined(_pl_and_noret_lax) +# define pl_and_noret_lax _pl_and_noret_lax +#endif + +#if !defined(pl_and_noret_acq) && defined(_pl_and_noret_acq) +# define pl_and_noret_acq _pl_and_noret_acq +#endif + +#if !defined(pl_and_noret_rel) && defined(_pl_and_noret_rel) +# define pl_and_noret_rel _pl_and_noret_rel +#endif + +#if !defined(pl_and_noret) && defined(_pl_and_noret) +# define pl_and_noret _pl_and_noret +#endif + + +#if !defined(pl_or_lax) && defined(_pl_or_lax) +# define pl_or_lax _pl_or_lax +#endif + +#if !defined(pl_or_acq) && defined(_pl_or_acq) +# define pl_or_acq _pl_or_acq +#endif + +#if !defined(pl_or_rel) && defined(_pl_or_rel) +# define pl_or_rel _pl_or_rel +#endif + +#if !defined(pl_or) && defined(_pl_or) +# define pl_or _pl_or +#endif + + +#if !defined(pl_or_noret_lax) && defined(_pl_or_noret_lax) +# define pl_or_noret_lax _pl_or_noret_lax +#endif + +#if !defined(pl_or_noret_acq) && defined(_pl_or_noret_acq) +# define pl_or_noret_acq _pl_or_noret_acq +#endif + +#if !defined(pl_or_noret_rel) && defined(_pl_or_noret_rel) +# define pl_or_noret_rel _pl_or_noret_rel +#endif + +#if !defined(pl_or_noret) && defined(_pl_or_noret) +# define pl_or_noret _pl_or_noret +#endif + + +#if !defined(pl_xor_lax) && defined(_pl_xor_lax) +# define pl_xor_lax _pl_xor_lax +#endif + +#if !defined(pl_xor_acq) && defined(_pl_xor_acq) +# define pl_xor_acq _pl_xor_acq +#endif + +#if !defined(pl_xor_rel) && defined(_pl_xor_rel) +# define pl_xor_rel _pl_xor_rel +#endif + +#if !defined(pl_xor) && defined(_pl_xor) +# define pl_xor _pl_xor +#endif + + +#if !defined(pl_xor_noret_lax) && defined(_pl_xor_noret_lax) +# define pl_xor_noret_lax _pl_xor_noret_lax +#endif + +#if !defined(pl_xor_noret_acq) && defined(_pl_xor_noret_acq) +# define pl_xor_noret_acq _pl_xor_noret_acq +#endif + +#if !defined(pl_xor_noret_rel) && defined(_pl_xor_noret_rel) +# define pl_xor_noret_rel _pl_xor_noret_rel +#endif + +#if !defined(pl_xor_noret) && defined(_pl_xor_noret) +# define pl_xor_noret _pl_xor_noret +#endif + + +#if !defined(pl_sub_lax) && defined(_pl_sub_lax) +# define pl_sub_lax _pl_sub_lax +#endif + +#if !defined(pl_sub_acq) && defined(_pl_sub_acq) +# define pl_sub_acq _pl_sub_acq +#endif + +#if !defined(pl_sub_rel) && defined(_pl_sub_rel) +# define pl_sub_rel _pl_sub_rel +#endif + +#if !defined(pl_sub) && defined(_pl_sub) +# define pl_sub _pl_sub +#endif + + +#if !defined(pl_sub_noret_lax) && defined(_pl_sub_noret_lax) +# define pl_sub_noret_lax _pl_sub_noret_lax +#endif + +#if !defined(pl_sub_noret_acq) && defined(_pl_sub_noret_acq) +# define pl_sub_noret_acq _pl_sub_noret_acq +#endif + +#if !defined(pl_sub_noret_rel) && defined(_pl_sub_noret_rel) +# define pl_sub_noret_rel _pl_sub_noret_rel +#endif + +#if !defined(pl_sub_noret) && defined(_pl_sub_noret) +# define pl_sub_noret _pl_sub_noret +#endif + + +#if !defined(pl_btr_lax) && defined(_pl_btr_lax) +# define pl_btr_lax _pl_btr_lax +#endif + +#if !defined(pl_btr_acq) && defined(_pl_btr_acq) +# define pl_btr_acq _pl_btr_acq +#endif + +#if !defined(pl_btr_rel) && defined(_pl_btr_rel) +# define pl_btr_rel _pl_btr_rel +#endif + +#if !defined(pl_btr) && defined(_pl_btr) +# define pl_btr _pl_btr +#endif + + +#if !defined(pl_bts_lax) && defined(_pl_bts_lax) +# define pl_bts_lax _pl_bts_lax +#endif + +#if !defined(pl_bts_acq) && defined(_pl_bts_acq) +# define pl_bts_acq _pl_bts_acq +#endif + +#if !defined(pl_bts_rel) && defined(_pl_bts_rel) +# define pl_bts_rel _pl_bts_rel +#endif + +#if !defined(pl_bts) && defined(_pl_bts) +# define pl_bts _pl_bts +#endif + + +#if !defined(pl_xadd) && defined(_pl_xadd) +# define pl_xadd _pl_xadd +#endif + +#if !defined(pl_cmpxchg) && defined(_pl_cmpxchg) +# define pl_cmpxchg _pl_cmpxchg +#endif + +#if !defined(pl_xchg) && defined(_pl_xchg) +# define pl_xchg _pl_xchg +#endif + + +#if !defined(pl_ldadd_lax) && defined(_pl_ldadd_lax) +# define pl_ldadd_lax _pl_ldadd_lax +#endif + +#if !defined(pl_ldadd_acq) && defined(_pl_ldadd_acq) +# define pl_ldadd_acq _pl_ldadd_acq +#endif + +#if !defined(pl_ldadd_rel) && defined(_pl_ldadd_rel) +# define pl_ldadd_rel _pl_ldadd_rel +#endif + +#if !defined(pl_ldadd) && defined(_pl_ldadd) +# define pl_ldadd _pl_ldadd +#endif + + +#if !defined(pl_ldand_lax) && defined(_pl_ldand_lax) +# define pl_ldand_lax _pl_ldand_lax +#endif + +#if !defined(pl_ldand_acq) && defined(_pl_ldand_acq) +# define pl_ldand_acq _pl_ldand_acq +#endif + +#if !defined(pl_ldand_rel) && defined(_pl_ldand_rel) +# define pl_ldand_rel _pl_ldand_rel +#endif + +#if !defined(pl_ldand) && defined(_pl_ldand) +# define pl_ldand _pl_ldand +#endif + + +#if !defined(pl_ldor_lax) && defined(_pl_ldor_lax) +# define pl_ldor_lax _pl_ldor_lax +#endif + +#if !defined(pl_ldor_acq) && defined(_pl_ldor_acq) +# define pl_ldor_acq _pl_ldor_acq +#endif + +#if !defined(pl_ldor_rel) && defined(_pl_ldor_rel) +# define pl_ldor_rel _pl_ldor_rel +#endif + +#if !defined(pl_ldor) && defined(_pl_ldor) +# define pl_ldor _pl_ldor +#endif + + +#if !defined(pl_ldxor_lax) && defined(_pl_ldxor_lax) +# define pl_ldxor_lax _pl_ldxor_lax +#endif + +#if !defined(pl_ldxor_acq) && defined(_pl_ldxor_acq) +# define pl_ldxor_acq _pl_ldxor_acq +#endif + +#if !defined(pl_ldxor_rel) && defined(_pl_ldxor_rel) +# define pl_ldxor_rel _pl_ldxor_rel +#endif + +#if !defined(pl_ldxor) && defined(_pl_ldxor) +# define pl_ldxor _pl_ldxor +#endif + + +#if !defined(pl_ldsub_lax) && defined(_pl_ldsub_lax) +# define pl_ldsub_lax _pl_ldsub_lax +#endif + +#if !defined(pl_ldsub_acq) && defined(_pl_ldsub_acq) +# define pl_ldsub_acq _pl_ldsub_acq +#endif + +#if !defined(pl_ldsub_rel) && defined(_pl_ldsub_rel) +# define pl_ldsub_rel _pl_ldsub_rel +#endif + +#if !defined(pl_ldsub) && defined(_pl_ldsub) +# define pl_ldsub _pl_ldsub +#endif + + +/* + * Generic code using the __sync API for everything not defined above. + */ + + +/* CPU relaxation while waiting */ +#ifndef pl_cpu_relax +#define pl_cpu_relax() do { \ + asm volatile(""); \ + } while (0) +#endif + +/* compiler-only memory barrier, for use around locks */ +#ifndef pl_barrier +#define pl_barrier() do { \ + asm volatile("" ::: "memory"); \ + } while (0) +#endif + +/* full memory barrier */ +#ifndef pl_mb #define pl_mb() do { \ __sync_synchronize(); \ } while (0) - -#define pl_inc_noret(ptr) ({ __sync_add_and_fetch((ptr), 1); }) -#define pl_dec_noret(ptr) ({ __sync_sub_and_fetch((ptr), 1); }) -#define pl_inc(ptr) ({ __sync_add_and_fetch((ptr), 1); }) -#define pl_dec(ptr) ({ __sync_sub_and_fetch((ptr), 1); }) -#define pl_add(ptr, x) ({ __sync_add_and_fetch((ptr), (x)); }) -#define pl_and(ptr, x) ({ __sync_and_and_fetch((ptr), (x)); }) -#define pl_or(ptr, x) ({ __sync_or_and_fetch((ptr), (x)); }) -#define pl_xor(ptr, x) ({ __sync_xor_and_fetch((ptr), (x)); }) -#define pl_sub(ptr, x) ({ __sync_sub_and_fetch((ptr), (x)); }) -#define pl_bts(ptr, bit) ({ typeof(*(ptr)) __pl_t = (1u << (bit)); \ - __sync_fetch_and_or((ptr), __pl_t) & __pl_t; \ - }) -#define pl_xadd(ptr, x) ({ __sync_fetch_and_add((ptr), (x)); }) -#define pl_cmpxchg(ptr, o, n) ({ __sync_val_compare_and_swap((ptr), (o), (n)); }) -#define pl_xchg(ptr, x) ({ typeof(*(ptr)) __pl_t; \ - do { __pl_t = *(ptr); \ - } while (!__sync_bool_compare_and_swap((ptr), __pl_t, (x))); \ - __pl_t; \ - }) - #endif +#ifndef pl_mb_load +#define pl_mb_load() pl_mb() +#endif + +#ifndef pl_mb_store +#define pl_mb_store() pl_mb() +#endif + +#ifndef pl_mb_ato +#define pl_mb_ato() pl_mb() +#endif + +#ifndef pl_mb_ato_load +#define pl_mb_ato_load() pl_mb_ato() +#endif + +#ifndef pl_mb_ato_store +#define pl_mb_ato_store() pl_mb_ato() +#endif + +/* atomic load: volatile after a load barrier */ +#ifndef pl_load +#define pl_load(ptr) ({ \ + typeof(*(ptr)) __pl_ret = ({ \ + pl_mb_load(); \ + *(volatile typeof(ptr))ptr; \ + }); \ + __pl_ret; \ + }) +#endif + +/* atomic store, old style using a CAS */ +#ifndef pl_store +#define pl_store(ptr, x) do { \ + typeof((ptr)) __pl_ptr = (ptr); \ + typeof((x)) __pl_x = (x); \ + typeof(*(ptr)) __pl_old; \ + do { \ + __pl_old = *__pl_ptr; \ + } while (!__sync_bool_compare_and_swap(__pl_ptr, __pl_old, __pl_x)); \ + } while (0) +#endif + +#ifndef pl_inc_noret +#define pl_inc_noret(ptr) do { __sync_add_and_fetch((ptr), 1); } while (0) +#endif + +#ifndef pl_dec_noret +#define pl_dec_noret(ptr) do { __sync_sub_and_fetch((ptr), 1); } while (0) +#endif + +#ifndef pl_inc +#define pl_inc(ptr) ({ __sync_add_and_fetch((ptr), 1); }) +#endif + +#ifndef pl_dec +#define pl_dec(ptr) ({ __sync_sub_and_fetch((ptr), 1); }) +#endif + +#ifndef pl_add +#define pl_add(ptr, x) ({ __sync_add_and_fetch((ptr), (x)); }) +#endif + +#ifndef pl_and +#define pl_and(ptr, x) ({ __sync_and_and_fetch((ptr), (x)); }) +#endif + +#ifndef pl_or +#define pl_or(ptr, x) ({ __sync_or_and_fetch((ptr), (x)); }) +#endif + +#ifndef pl_xor +#define pl_xor(ptr, x) ({ __sync_xor_and_fetch((ptr), (x)); }) +#endif + +#ifndef pl_sub +#define pl_sub(ptr, x) ({ __sync_sub_and_fetch((ptr), (x)); }) +#endif + +#ifndef pl_btr +#define pl_btr(ptr, bit) ({ typeof(*(ptr)) __pl_t = ((typeof(*(ptr)))1) << (bit); \ + __sync_fetch_and_and((ptr), ~__pl_t) & __pl_t; \ + }) +#endif + +#ifndef pl_bts +#define pl_bts(ptr, bit) ({ typeof(*(ptr)) __pl_t = ((typeof(*(ptr)))1) << (bit); \ + __sync_fetch_and_or((ptr), __pl_t) & __pl_t; \ + }) +#endif + +#ifndef pl_xadd +#define pl_xadd(ptr, x) ({ __sync_fetch_and_add((ptr), (x)); }) +#endif + +#ifndef pl_cmpxchg +#define pl_cmpxchg(ptr, o, n) ({ __sync_val_compare_and_swap((ptr), (o), (n)); }) +#endif + +#ifndef pl_xchg +#define pl_xchg(ptr, x) ({ \ + typeof((ptr)) __pl_ptr = (ptr); \ + typeof((x)) __pl_x = (x); \ + typeof(*(ptr)) __pl_old; \ + do { \ + __pl_old = *__pl_ptr; \ + } while (!__sync_bool_compare_and_swap(__pl_ptr, __pl_old, __pl_x)); \ + __pl_old; \ + }) +#endif + +#ifndef pl_ldadd +#define pl_ldadd(ptr, x) ({ __sync_fetch_and_add((ptr), (x)); }) +#endif + +#ifndef pl_ldand +#define pl_ldand(ptr, x) ({ __sync_fetch_and_and((ptr), (x)); }) +#endif + +#ifndef pl_ldor +#define pl_ldor(ptr, x) ({ __sync_fetch_and_or((ptr), (x)); }) +#endif + +#ifndef pl_ldxor +#define pl_ldxor(ptr, x) ({ __sync_fetch_and_xor((ptr), (x)); }) +#endif + +#ifndef pl_ldsub +#define pl_ldsub(ptr, x) ({ __sync_fetch_and_sub((ptr), (x)); }) +#endif + +/* certain _noret operations may be defined from the regular ones */ +#if !defined(pl_inc_noret) && defined(pl_inc) +# define pl_inc_noret(ptr) (void)pl_inc(ptr) +#endif + +#if !defined(pl_dec_noret) && defined(pl_dec) +# define pl_dec_noret(ptr) (void)pl_dec(ptr) +#endif + +#if !defined(pl_add_noret) && defined(pl_add) +# define pl_add_noret(ptr, x) (void)pl_add(ptr, x) +#endif + +#if !defined(pl_sub_noret) && defined(pl_sub) +# define pl_sub_noret(ptr, x) (void)pl_sub(ptr, x) +#endif + +#if !defined(pl_or_noret) && defined(pl_or) +# define pl_or_noret(ptr, x) (void)pl_or(ptr, x) +#endif + +#if !defined(pl_and_noret) && defined(pl_and) +# define pl_and_noret(ptr, x) (void)pl_and(ptr, x) +#endif + +#if !defined(pl_xor_noret) && defined(pl_xor) +# define pl_xor_noret(ptr, x) (void)pl_xor(ptr, x) +#endif + +/* certain memory orders may fallback to the generic seq_cst definition */ + +#if !defined(pl_load_lax) && defined(pl_load) +#define pl_load_lax pl_load +#endif + + +#if !defined(pl_store_lax) && defined(pl_store) +#define pl_store_lax pl_store +#endif + + +#if !defined(pl_inc_lax) && defined(pl_inc) +# define pl_inc_lax pl_inc +#endif +#if !defined(pl_inc_acq) && defined(pl_inc) +# define pl_inc_acq pl_inc +#endif +#if !defined(pl_inc_rel) && defined(pl_inc) +# define pl_inc_rel pl_inc +#endif + + +#if !defined(pl_dec_lax) && defined(pl_dec) +# define pl_dec_lax pl_dec +#endif +#if !defined(pl_dec_acq) && defined(pl_dec) +# define pl_dec_acq pl_dec +#endif + +#if !defined(pl_dec_rel) && defined(pl_dec) +# define pl_dec_rel pl_dec +#endif + + +#if !defined(pl_inc_noret_lax) && defined(pl_inc_noret) +# define pl_inc_noret_lax pl_inc_noret +#endif + +#if !defined(pl_inc_noret_acq) && defined(pl_inc_noret) +# define pl_inc_noret_acq pl_inc_noret +#endif + +#if !defined(pl_inc_noret_rel) && defined(pl_inc_noret) +# define pl_inc_noret_rel pl_inc_noret +#endif + + +#if !defined(pl_dec_noret_lax) && defined(pl_dec_noret) +# define pl_dec_noret_lax pl_dec_noret +#endif + +#if !defined(pl_dec_noret_acq) && defined(pl_dec_noret) +# define pl_dec_noret_acq pl_dec_noret +#endif + +#if !defined(pl_dec_noret_rel) && defined(pl_dec_noret) +# define pl_dec_noret_rel pl_dec_noret +#endif + + +#if !defined(pl_add_lax) && defined(pl_add) +# define pl_add_lax pl_add +#endif + +#if !defined(pl_add_acq) && defined(pl_add) +# define pl_add_acq pl_add +#endif + +#if !defined(pl_add_rel) && defined(pl_add) +# define pl_add_rel pl_add +#endif + + +#if !defined(pl_sub_lax) && defined(pl_sub) +# define pl_sub_lax pl_sub +#endif + +#if !defined(pl_sub_acq) && defined(pl_sub) +# define pl_sub_acq pl_sub +#endif + +#if !defined(pl_sub_rel) && defined(pl_sub) +# define pl_sub_rel pl_sub +#endif + + +#if !defined(pl_and_lax) && defined(pl_and) +# define pl_and_lax pl_and +#endif + +#if !defined(pl_and_acq) && defined(pl_and) +# define pl_and_acq pl_and +#endif + +#if !defined(pl_and_rel) && defined(pl_and) +# define pl_and_rel pl_and +#endif + + +#if !defined(pl_or_lax) && defined(pl_or) +# define pl_or_lax pl_or +#endif + +#if !defined(pl_or_acq) && defined(pl_or) +# define pl_or_acq pl_or +#endif + +#if !defined(pl_or_rel) && defined(pl_or) +# define pl_or_rel pl_or +#endif + + +#if !defined(pl_xor_lax) && defined(pl_xor) +# define pl_xor_lax pl_xor +#endif + +#if !defined(pl_xor_acq) && defined(pl_xor) +# define pl_xor_acq pl_xor +#endif + +#if !defined(pl_xor_rel) && defined(pl_xor) +# define pl_xor_rel pl_xor +#endif + + +#if !defined(pl_add_noret_lax) && defined(pl_add_noret) +# define pl_add_noret_lax pl_add_noret +#endif + +#if !defined(pl_add_noret_acq) && defined(pl_add_noret) +# define pl_add_noret_acq pl_add_noret +#endif + +#if !defined(pl_add_noret_rel) && defined(pl_add_noret) +# define pl_add_noret_rel pl_add_noret +#endif + + +#if !defined(pl_sub_noret_lax) && defined(pl_sub_noret) +# define pl_sub_noret_lax pl_sub_noret +#endif + +#if !defined(pl_sub_noret_acq) && defined(pl_sub_noret) +# define pl_sub_noret_acq pl_sub_noret +#endif + +#if !defined(pl_sub_noret_rel) && defined(pl_sub_noret) +# define pl_sub_noret_rel pl_sub_noret +#endif + + +#if !defined(pl_and_noret_lax) && defined(pl_and_noret) +# define pl_and_noret_lax pl_and_noret +#endif + +#if !defined(pl_and_noret_acq) && defined(pl_and_noret) +# define pl_and_noret_acq pl_and_noret +#endif + +#if !defined(pl_and_noret_rel) && defined(pl_and_noret) +# define pl_and_noret_rel pl_and_noret +#endif + + +#if !defined(pl_or_noret_lax) && defined(pl_or_noret) +# define pl_or_noret_lax pl_or_noret +#endif + +#if !defined(pl_or_noret_acq) && defined(pl_or_noret) +# define pl_or_noret_acq pl_or_noret +#endif + +#if !defined(pl_or_noret_rel) && defined(pl_or_noret) +# define pl_or_noret_rel pl_or_noret +#endif + + +#if !defined(pl_xor_noret_lax) && defined(pl_xor_noret) +# define pl_xor_noret_lax pl_xor_noret +#endif + +#if !defined(pl_xor_noret_acq) && defined(pl_xor_noret) +# define pl_xor_noret_acq pl_xor_noret +#endif + +#if !defined(pl_xor_noret_rel) && defined(pl_xor_noret) +# define pl_xor_noret_rel pl_xor_noret +#endif + + +#if !defined(pl_btr_lax) && defined(pl_btr) +# define pl_btr_lax pl_btr +#endif + +#if !defined(pl_btr_acq) && defined(pl_btr) +# define pl_btr_acq pl_btr +#endif + +#if !defined(pl_btr_rel) && defined(pl_btr) +# define pl_btr_rel pl_btr +#endif + + +#if !defined(pl_bts_lax) && defined(pl_bts) +# define pl_bts_lax pl_bts +#endif + +#if !defined(pl_bts_acq) && defined(pl_bts) +# define pl_bts_acq pl_bts +#endif + +#if !defined(pl_bts_rel) && defined(pl_bts) +# define pl_bts_rel pl_bts +#endif + + +#if !defined(pl_ldadd_lax) && defined(pl_ldadd) +# define pl_ldadd_lax pl_ldadd +#endif + +#if !defined(pl_ldadd_acq) && defined(pl_ldadd) +# define pl_ldadd_acq pl_ldadd +#endif + +#if !defined(pl_ldadd_rel) && defined(pl_ldadd) +# define pl_ldadd_rel pl_ldadd +#endif + + +#if !defined(pl_ldsub_lax) && defined(pl_ldsub) +# define pl_ldsub_lax pl_ldsub +#endif + +#if !defined(pl_ldsub_acq) && defined(pl_ldsub) +# define pl_ldsub_acq pl_ldsub +#endif + +#if !defined(pl_ldsub_rel) && defined(pl_ldsub) +# define pl_ldsub_rel pl_ldsub +#endif + + +#if !defined(pl_ldand_lax) && defined(pl_ldand) +# define pl_ldand_lax pl_ldand +#endif + +#if !defined(pl_ldand_acq) && defined(pl_ldand) +# define pl_ldand_acq pl_ldand +#endif + +#if !defined(pl_ldand_rel) && defined(pl_ldand) +# define pl_ldand_rel pl_ldand +#endif + + +#if !defined(pl_ldor_lax) && defined(pl_ldor) +# define pl_ldor_lax pl_ldor +#endif + +#if !defined(pl_ldor_acq) && defined(pl_ldor) +# define pl_ldor_acq pl_ldor +#endif + +#if !defined(pl_ldor_rel) && defined(pl_ldor) +# define pl_ldor_rel pl_ldor +#endif + + +#if !defined(pl_ldxor_lax) && defined(pl_ldxor) +# define pl_ldxor_lax pl_ldxor +#endif + +#if !defined(pl_ldxor_acq) && defined(pl_ldxor) +# define pl_ldxor_acq pl_ldxor +#endif + +#if !defined(pl_ldxor_rel) && defined(pl_ldxor) +# define pl_ldxor_rel pl_ldxor +#endif + + #endif /* PL_ATOMIC_OPS_H */ diff --git a/include/import/plock.h b/include/import/plock.h index ac10950a0..4e6ab8c26 100644 --- a/include/import/plock.h +++ b/include/import/plock.h @@ -74,8 +74,12 @@ } while (_r & mask); \ _r; /* return value */ \ }) -#else +#else /* not PLOCK_DISABLE_EBO */ +# if defined(PLOCK_INLINE_EBO) +__attribute__((unused,always_inline,no_instrument_function)) inline +# else __attribute__((unused,noinline,no_instrument_function)) +# endif static unsigned long pl_wait_unlock_long(const unsigned long *lock, const unsigned long mask) { unsigned long ret; @@ -90,13 +94,13 @@ static unsigned long pl_wait_unlock_long(const unsigned long *lock, const unsign loops -= 32768; } #endif - for (; loops >= 200; loops -= 10) + for (; loops >= 60; loops --) pl_cpu_relax(); for (; loops >= 1; loops--) pl_barrier(); - ret = pl_deref_long(lock); + ret = pl_load(lock); if (__builtin_expect(ret & mask, 0) == 0) break; @@ -228,18 +232,18 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int register unsigned long __pl_r = pl_deref_long(lock) & PLOCK64_WL_ANY; \ pl_barrier(); \ if (!__builtin_expect(__pl_r, 0)) { \ - __pl_r = pl_xadd((lock), PLOCK64_RL_1) & PLOCK64_WL_ANY; \ + __pl_r = pl_ldadd_acq((lock), PLOCK64_RL_1) & PLOCK64_WL_ANY; \ if (__builtin_expect(__pl_r, 0)) \ - pl_sub((lock), PLOCK64_RL_1); \ + pl_sub_noret((lock), PLOCK64_RL_1); \ } \ !__pl_r; /* return value */ \ }) : (sizeof(*(lock)) == 4) ? ({ \ register unsigned int __pl_r = pl_deref_int(lock) & PLOCK32_WL_ANY; \ pl_barrier(); \ if (!__builtin_expect(__pl_r, 0)) { \ - __pl_r = pl_xadd((lock), PLOCK32_RL_1) & PLOCK32_WL_ANY; \ + __pl_r = pl_ldadd_acq((lock), PLOCK32_RL_1) & PLOCK32_WL_ANY; \ if (__builtin_expect(__pl_r, 0)) \ - pl_sub((lock), PLOCK32_RL_1); \ + pl_sub_noret((lock), PLOCK32_RL_1); \ } \ !__pl_r; /* return value */ \ }) : ({ \ @@ -259,12 +263,15 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int register unsigned long *__lk_r = (unsigned long *)(lock); \ register unsigned long __set_r = PLOCK64_RL_1; \ register unsigned long __msk_r = PLOCK64_WL_ANY; \ - while (1) { \ - if (__builtin_expect(pl_deref_long(__lk_r) & __msk_r, 0)) \ - pl_wait_unlock_long(__lk_r, __msk_r); \ - if (!__builtin_expect(pl_xadd(__lk_r, __set_r) & __msk_r, 0)) \ - break; \ - pl_sub(__lk_r, __set_r); \ + register unsigned long __old_r = pl_cmpxchg(__lk_r, 0, __set_r); \ + if (__old_r) { \ + while (1) { \ + if (__old_r & __msk_r) \ + pl_wait_unlock_long(__lk_r, __msk_r); \ + if (!(pl_ldadd_acq(__lk_r, __set_r) & __msk_r)) \ + break; \ + __old_r = pl_sub_lax(__lk_r, __set_r); \ + } \ } \ pl_barrier(); \ 0; \ @@ -272,12 +279,15 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int register unsigned int *__lk_r = (unsigned int *)(lock); \ register unsigned int __set_r = PLOCK32_RL_1; \ register unsigned int __msk_r = PLOCK32_WL_ANY; \ - while (1) { \ - if (__builtin_expect(pl_deref_int(__lk_r) & __msk_r, 0)) \ - pl_wait_unlock_int(__lk_r, __msk_r); \ - if (!__builtin_expect(pl_xadd(__lk_r, __set_r) & __msk_r, 0)) \ - break; \ - pl_sub(__lk_r, __set_r); \ + register unsigned int __old_r = pl_cmpxchg(__lk_r, 0, __set_r); \ + if (__old_r) { \ + while (1) { \ + if (__old_r & __msk_r) \ + pl_wait_unlock_int(__lk_r, __msk_r); \ + if (!(pl_ldadd_acq(__lk_r, __set_r) & __msk_r)) \ + break; \ + __old_r = pl_sub_lax(__lk_r, __set_r); \ + } \ } \ pl_barrier(); \ 0; \ @@ -292,10 +302,10 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int #define pl_drop_r(lock) ( \ (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ pl_barrier(); \ - pl_sub(lock, PLOCK64_RL_1); \ + pl_sub_noret_rel(lock, PLOCK64_RL_1); \ }) : (sizeof(*(lock)) == 4) ? ({ \ pl_barrier(); \ - pl_sub(lock, PLOCK32_RL_1); \ + pl_sub_noret_rel(lock, PLOCK32_RL_1); \ }) : ({ \ void __unsupported_argument_size_for_pl_drop_r__(char *,int); \ if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ @@ -309,20 +319,20 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int register unsigned long __pl_r = pl_deref_long(lock); \ pl_barrier(); \ if (!__builtin_expect(__pl_r & (PLOCK64_WL_ANY | PLOCK64_SL_ANY), 0)) { \ - __pl_r = pl_xadd((lock), PLOCK64_SL_1 | PLOCK64_RL_1) & \ + __pl_r = pl_ldadd_acq((lock), PLOCK64_SL_1 | PLOCK64_RL_1) & \ (PLOCK64_WL_ANY | PLOCK64_SL_ANY); \ if (__builtin_expect(__pl_r, 0)) \ - pl_sub((lock), PLOCK64_SL_1 | PLOCK64_RL_1); \ + pl_sub_noret_lax((lock), PLOCK64_SL_1 | PLOCK64_RL_1); \ } \ !__pl_r; /* return value */ \ }) : (sizeof(*(lock)) == 4) ? ({ \ register unsigned int __pl_r = pl_deref_int(lock); \ pl_barrier(); \ if (!__builtin_expect(__pl_r & (PLOCK32_WL_ANY | PLOCK32_SL_ANY), 0)) { \ - __pl_r = pl_xadd((lock), PLOCK32_SL_1 | PLOCK32_RL_1) & \ + __pl_r = pl_ldadd_acq((lock), PLOCK32_SL_1 | PLOCK32_RL_1) & \ (PLOCK32_WL_ANY | PLOCK32_SL_ANY); \ if (__builtin_expect(__pl_r, 0)) \ - pl_sub((lock), PLOCK32_SL_1 | PLOCK32_RL_1); \ + pl_sub_noret_lax((lock), PLOCK32_SL_1 | PLOCK32_RL_1); \ } \ !__pl_r; /* return value */ \ }) : ({ \ @@ -344,9 +354,9 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int register unsigned long __set_r = PLOCK64_SL_1 | PLOCK64_RL_1; \ register unsigned long __msk_r = PLOCK64_WL_ANY | PLOCK64_SL_ANY; \ while (1) { \ - if (!__builtin_expect(pl_xadd(__lk_r, __set_r) & __msk_r, 0)) \ + if (!__builtin_expect(pl_ldadd_acq(__lk_r, __set_r) & __msk_r, 0)) \ break; \ - pl_sub(__lk_r, __set_r); \ + pl_sub_noret_lax(__lk_r, __set_r); \ pl_wait_unlock_long(__lk_r, __msk_r); \ } \ pl_barrier(); \ @@ -356,9 +366,9 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int register unsigned int __set_r = PLOCK32_SL_1 | PLOCK32_RL_1; \ register unsigned int __msk_r = PLOCK32_WL_ANY | PLOCK32_SL_ANY; \ while (1) { \ - if (!__builtin_expect(pl_xadd(__lk_r, __set_r) & __msk_r, 0)) \ + if (!__builtin_expect(pl_ldadd_acq(__lk_r, __set_r) & __msk_r, 0)) \ break; \ - pl_sub(__lk_r, __set_r); \ + pl_sub_noret_lax(__lk_r, __set_r); \ pl_wait_unlock_int(__lk_r, __msk_r); \ } \ pl_barrier(); \ @@ -374,10 +384,10 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int #define pl_drop_s(lock) ( \ (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ pl_barrier(); \ - pl_sub(lock, PLOCK64_SL_1 + PLOCK64_RL_1); \ + pl_sub_noret_rel(lock, PLOCK64_SL_1 + PLOCK64_RL_1); \ }) : (sizeof(*(lock)) == 4) ? ({ \ pl_barrier(); \ - pl_sub(lock, PLOCK32_SL_1 + PLOCK32_RL_1); \ + pl_sub_noret_rel(lock, PLOCK32_SL_1 + PLOCK32_RL_1); \ }) : ({ \ void __unsupported_argument_size_for_pl_drop_s__(char *,int); \ if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ @@ -389,10 +399,10 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int #define pl_stor(lock) ( \ (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ pl_barrier(); \ - pl_sub(lock, PLOCK64_SL_1); \ + pl_sub_noret(lock, PLOCK64_SL_1); \ }) : (sizeof(*(lock)) == 4) ? ({ \ pl_barrier(); \ - pl_sub(lock, PLOCK32_SL_1); \ + pl_sub_noret(lock, PLOCK32_SL_1); \ }) : ({ \ void __unsupported_argument_size_for_pl_stor__(char *,int); \ if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ @@ -403,14 +413,14 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int /* take the W lock under the S lock */ #define pl_stow(lock) ( \ (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ - register unsigned long __pl_r = pl_xadd((lock), PLOCK64_WL_1); \ - while ((__pl_r & PLOCK64_RL_ANY) != PLOCK64_RL_1) \ - __pl_r = pl_deref_long(lock); \ + register unsigned long __pl_r = pl_ldadd((lock), PLOCK64_WL_1); \ + if (__pl_r & (PLOCK64_RL_ANY & ~PLOCK64_RL_1)) \ + __pl_r = pl_wait_unlock_long((const unsigned long*)lock, (PLOCK64_RL_ANY & ~PLOCK64_RL_1)); \ pl_barrier(); \ }) : (sizeof(*(lock)) == 4) ? ({ \ - register unsigned int __pl_r = pl_xadd((lock), PLOCK32_WL_1); \ - while ((__pl_r & PLOCK32_RL_ANY) != PLOCK32_RL_1) \ - __pl_r = pl_deref_int(lock); \ + register unsigned int __pl_r = pl_ldadd((lock), PLOCK32_WL_1); \ + if (__pl_r & (PLOCK32_RL_ANY & ~PLOCK32_RL_1)) \ + __pl_r = pl_wait_unlock_int((const unsigned int*)lock, (PLOCK32_RL_ANY & ~PLOCK32_RL_1)); \ pl_barrier(); \ }) : ({ \ void __unsupported_argument_size_for_pl_stow__(char *,int); \ @@ -423,10 +433,10 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int #define pl_wtos(lock) ( \ (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ pl_barrier(); \ - pl_sub(lock, PLOCK64_WL_1); \ + pl_sub_noret(lock, PLOCK64_WL_1); \ }) : (sizeof(*(lock)) == 4) ? ({ \ pl_barrier(); \ - pl_sub(lock, PLOCK32_WL_1); \ + pl_sub_noret(lock, PLOCK32_WL_1); \ }) : ({ \ void __unsupported_argument_size_for_pl_wtos__(char *,int); \ if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ @@ -438,10 +448,10 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int #define pl_wtor(lock) ( \ (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ pl_barrier(); \ - pl_sub(lock, PLOCK64_WL_1 | PLOCK64_SL_1); \ + pl_sub_noret(lock, PLOCK64_WL_1 | PLOCK64_SL_1); \ }) : (sizeof(*(lock)) == 4) ? ({ \ pl_barrier(); \ - pl_sub(lock, PLOCK32_WL_1 | PLOCK32_SL_1); \ + pl_sub_noret(lock, PLOCK32_WL_1 | PLOCK32_SL_1); \ }) : ({ \ void __unsupported_argument_size_for_pl_wtor__(char *,int); \ if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ @@ -469,10 +479,10 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int register unsigned long __pl_r = pl_deref_long(lock); \ pl_barrier(); \ if (!__builtin_expect(__pl_r & (PLOCK64_WL_ANY | PLOCK64_SL_ANY), 0)) { \ - __pl_r = pl_xadd((lock), PLOCK64_WL_1 | PLOCK64_SL_1 | PLOCK64_RL_1); \ + __pl_r = pl_ldadd_acq((lock), PLOCK64_WL_1 | PLOCK64_SL_1 | PLOCK64_RL_1);\ if (__builtin_expect(__pl_r & (PLOCK64_WL_ANY | PLOCK64_SL_ANY), 0)) { \ /* a writer, seeker or atomic is present, let's leave */ \ - pl_sub((lock), PLOCK64_WL_1 | PLOCK64_SL_1 | PLOCK64_RL_1); \ + pl_sub_noret_lax((lock), PLOCK64_WL_1 | PLOCK64_SL_1 | PLOCK64_RL_1);\ __pl_r &= (PLOCK64_WL_ANY | PLOCK64_SL_ANY); /* return value */\ } else { \ /* wait for all other readers to leave */ \ @@ -486,10 +496,10 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int register unsigned int __pl_r = pl_deref_int(lock); \ pl_barrier(); \ if (!__builtin_expect(__pl_r & (PLOCK32_WL_ANY | PLOCK32_SL_ANY), 0)) { \ - __pl_r = pl_xadd((lock), PLOCK32_WL_1 | PLOCK32_SL_1 | PLOCK32_RL_1); \ + __pl_r = pl_ldadd_acq((lock), PLOCK32_WL_1 | PLOCK32_SL_1 | PLOCK32_RL_1);\ if (__builtin_expect(__pl_r & (PLOCK32_WL_ANY | PLOCK32_SL_ANY), 0)) { \ /* a writer, seeker or atomic is present, let's leave */ \ - pl_sub((lock), PLOCK32_WL_1 | PLOCK32_SL_1 | PLOCK32_RL_1); \ + pl_sub_noret_lax((lock), PLOCK32_WL_1 | PLOCK32_SL_1 | PLOCK32_RL_1);\ __pl_r &= (PLOCK32_WL_ANY | PLOCK32_SL_ANY); /* return value */\ } else { \ /* wait for all other readers to leave */ \ @@ -517,15 +527,15 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int register unsigned long __msk_r = PLOCK64_WL_ANY | PLOCK64_SL_ANY; \ register unsigned long __pl_r; \ while (1) { \ - __pl_r = pl_xadd(__lk_r, __set_r); \ + __pl_r = pl_ldadd_acq(__lk_r, __set_r); \ if (!__builtin_expect(__pl_r & __msk_r, 0)) \ break; \ - pl_sub(__lk_r, __set_r); \ + pl_sub_noret_lax(__lk_r, __set_r); \ __pl_r = pl_wait_unlock_long(__lk_r, __msk_r); \ } \ /* wait for all other readers to leave */ \ - while (__builtin_expect(__pl_r, 0)) \ - __pl_r = pl_deref_long(__lk_r) - __set_r; \ + if (__builtin_expect(__pl_r & (PLOCK64_RL_ANY & ~PLOCK64_RL_1), 0)) \ + __pl_r = pl_wait_unlock_long(__lk_r, (PLOCK64_RL_ANY & ~PLOCK64_RL_1)) - __set_r; \ pl_barrier(); \ 0; \ }) : (sizeof(*(lock)) == 4) ? ({ \ @@ -534,15 +544,15 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int register unsigned int __msk_r = PLOCK32_WL_ANY | PLOCK32_SL_ANY; \ register unsigned int __pl_r; \ while (1) { \ - __pl_r = pl_xadd(__lk_r, __set_r); \ + __pl_r = pl_ldadd_acq(__lk_r, __set_r); \ if (!__builtin_expect(__pl_r & __msk_r, 0)) \ break; \ - pl_sub(__lk_r, __set_r); \ + pl_sub_noret_lax(__lk_r, __set_r); \ __pl_r = pl_wait_unlock_int(__lk_r, __msk_r); \ } \ /* wait for all other readers to leave */ \ - while (__builtin_expect(__pl_r, 0)) \ - __pl_r = pl_deref_int(__lk_r) - __set_r; \ + if (__builtin_expect(__pl_r & (PLOCK32_RL_ANY & ~PLOCK32_RL_1), 0)) \ + __pl_r = pl_wait_unlock_int(__lk_r, (PLOCK32_RL_ANY & ~PLOCK32_RL_1)) - __set_r; \ pl_barrier(); \ 0; \ }) : ({ \ @@ -556,10 +566,10 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int #define pl_drop_w(lock) ( \ (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ pl_barrier(); \ - pl_sub(lock, PLOCK64_WL_1 | PLOCK64_SL_1 | PLOCK64_RL_1); \ + pl_sub_noret_rel(lock, PLOCK64_WL_1 | PLOCK64_SL_1 | PLOCK64_RL_1); \ }) : (sizeof(*(lock)) == 4) ? ({ \ pl_barrier(); \ - pl_sub(lock, PLOCK32_WL_1 | PLOCK32_SL_1 | PLOCK32_RL_1); \ + pl_sub_noret_rel(lock, PLOCK32_WL_1 | PLOCK32_SL_1 | PLOCK32_RL_1); \ }) : ({ \ void __unsupported_argument_size_for_pl_drop_w__(char *,int); \ if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ @@ -574,24 +584,16 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int */ #define pl_try_rtos(lock) ( \ (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ - register unsigned long __pl_r = pl_deref_long(lock); \ - pl_barrier(); \ - if (!__builtin_expect(__pl_r & (PLOCK64_WL_ANY | PLOCK64_SL_ANY), 0)) { \ - __pl_r = pl_xadd((lock), PLOCK64_SL_1) & \ - (PLOCK64_WL_ANY | PLOCK64_SL_ANY); \ - if (__builtin_expect(__pl_r, 0)) \ - pl_sub((lock), PLOCK64_SL_1); \ - } \ + register unsigned long __pl_r; \ + __pl_r = pl_ldadd_acq((lock), PLOCK64_SL_1) & (PLOCK64_WL_ANY | PLOCK64_SL_ANY);\ + if (__builtin_expect(__pl_r, 0)) \ + pl_sub_noret_lax((lock), PLOCK64_SL_1); \ !__pl_r; /* return value */ \ }) : (sizeof(*(lock)) == 4) ? ({ \ - register unsigned int __pl_r = pl_deref_int(lock); \ - pl_barrier(); \ - if (!__builtin_expect(__pl_r & (PLOCK32_WL_ANY | PLOCK32_SL_ANY), 0)) { \ - __pl_r = pl_xadd((lock), PLOCK32_SL_1) & \ - (PLOCK32_WL_ANY | PLOCK32_SL_ANY); \ - if (__builtin_expect(__pl_r, 0)) \ - pl_sub((lock), PLOCK32_SL_1); \ - } \ + register unsigned int __pl_r; \ + __pl_r = pl_ldadd_acq((lock), PLOCK32_SL_1) & (PLOCK32_WL_ANY | PLOCK32_SL_ANY);\ + if (__builtin_expect(__pl_r, 0)) \ + pl_sub_noret_lax((lock), PLOCK32_SL_1); \ !__pl_r; /* return value */ \ }) : ({ \ void __unsupported_argument_size_for_pl_try_rtos__(char *,int); \ @@ -616,9 +618,9 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int register unsigned long __pl_r; \ pl_barrier(); \ while (1) { \ - __pl_r = pl_xadd(__lk_r, __set_r); \ + __pl_r = pl_ldadd_acq(__lk_r, __set_r); \ if (__builtin_expect(__pl_r & __msk_r, 0)) { \ - if (pl_xadd(__lk_r, - __set_r)) \ + if (pl_ldadd_lax(__lk_r, - __set_r)) \ break; /* the caller needs to drop the lock now */ \ continue; /* lock was released, try again */ \ } \ @@ -636,9 +638,9 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int register unsigned int __pl_r; \ pl_barrier(); \ while (1) { \ - __pl_r = pl_xadd(__lk_r, __set_r); \ + __pl_r = pl_ldadd_acq(__lk_r, __set_r); \ if (__builtin_expect(__pl_r & __msk_r, 0)) { \ - if (pl_xadd(__lk_r, - __set_r)) \ + if (pl_ldadd_lax(__lk_r, - __set_r)) \ break; /* the caller needs to drop the lock now */ \ continue; /* lock was released, try again */ \ } \ @@ -671,10 +673,10 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int register unsigned long __pl_r = pl_deref_long(lock) & PLOCK64_SL_ANY; \ pl_barrier(); \ if (!__builtin_expect(__pl_r, 0)) { \ - __pl_r = pl_xadd((lock), PLOCK64_WL_1); \ + __pl_r = pl_ldadd_acq((lock), PLOCK64_WL_1); \ while (1) { \ if (__builtin_expect(__pl_r & PLOCK64_SL_ANY, 0)) { \ - pl_sub((lock), PLOCK64_WL_1); \ + pl_sub_noret_lax((lock), PLOCK64_WL_1); \ break; /* return !__pl_r */ \ } \ __pl_r &= PLOCK64_RL_ANY; \ @@ -688,10 +690,10 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int register unsigned int __pl_r = pl_deref_int(lock) & PLOCK32_SL_ANY; \ pl_barrier(); \ if (!__builtin_expect(__pl_r, 0)) { \ - __pl_r = pl_xadd((lock), PLOCK32_WL_1); \ + __pl_r = pl_ldadd_acq((lock), PLOCK32_WL_1); \ while (1) { \ if (__builtin_expect(__pl_r & PLOCK32_SL_ANY, 0)) { \ - pl_sub((lock), PLOCK32_WL_1); \ + pl_sub_noret_lax((lock), PLOCK32_WL_1); \ break; /* return !__pl_r */ \ } \ __pl_r &= PLOCK32_RL_ANY; \ @@ -718,12 +720,12 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int register unsigned long __set_r = PLOCK64_WL_1; \ register unsigned long __msk_r = PLOCK64_SL_ANY; \ register unsigned long __pl_r; \ - __pl_r = pl_xadd(__lk_r, __set_r); \ + __pl_r = pl_ldadd_acq(__lk_r, __set_r); \ while (__builtin_expect(__pl_r & PLOCK64_RL_ANY, 0)) { \ if (__builtin_expect(__pl_r & __msk_r, 0)) { \ - pl_sub(__lk_r, __set_r); \ + pl_sub_noret_lax(__lk_r, __set_r); \ pl_wait_unlock_long(__lk_r, __msk_r); \ - __pl_r = pl_xadd(__lk_r, __set_r); \ + __pl_r = pl_ldadd_acq(__lk_r, __set_r); \ continue; \ } \ /* wait for all readers to leave or upgrade */ \ @@ -737,12 +739,12 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int register unsigned int __set_r = PLOCK32_WL_1; \ register unsigned int __msk_r = PLOCK32_SL_ANY; \ register unsigned int __pl_r; \ - __pl_r = pl_xadd(__lk_r, __set_r); \ + __pl_r = pl_ldadd_acq(__lk_r, __set_r); \ while (__builtin_expect(__pl_r & PLOCK32_RL_ANY, 0)) { \ if (__builtin_expect(__pl_r & __msk_r, 0)) { \ - pl_sub(__lk_r, __set_r); \ + pl_sub_noret_lax(__lk_r, __set_r); \ pl_wait_unlock_int(__lk_r, __msk_r); \ - __pl_r = pl_xadd(__lk_r, __set_r); \ + __pl_r = pl_ldadd_acq(__lk_r, __set_r); \ continue; \ } \ /* wait for all readers to leave or upgrade */ \ @@ -762,10 +764,10 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int #define pl_drop_a(lock) ( \ (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ pl_barrier(); \ - pl_sub(lock, PLOCK64_WL_1); \ + pl_sub_noret_rel(lock, PLOCK64_WL_1); \ }) : (sizeof(*(lock)) == 4) ? ({ \ pl_barrier(); \ - pl_sub(lock, PLOCK32_WL_1); \ + pl_sub_noret_rel(lock, PLOCK32_WL_1); \ }) : ({ \ void __unsupported_argument_size_for_pl_drop_a__(char *,int); \ if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ @@ -779,7 +781,7 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int register unsigned long *__lk_r = (unsigned long *)(lock); \ register unsigned long __set_r = PLOCK64_RL_1 - PLOCK64_WL_1; \ register unsigned long __msk_r = PLOCK64_WL_ANY; \ - register unsigned long __pl_r = pl_xadd(__lk_r, __set_r) + __set_r; \ + register unsigned long __pl_r = pl_ldadd(__lk_r, __set_r) + __set_r; \ while (__builtin_expect(__pl_r & __msk_r, 0)) { \ __pl_r = pl_wait_unlock_long(__lk_r, __msk_r); \ } \ @@ -788,7 +790,7 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int register unsigned int *__lk_r = (unsigned int *)(lock); \ register unsigned int __set_r = PLOCK32_RL_1 - PLOCK32_WL_1; \ register unsigned int __msk_r = PLOCK32_WL_ANY; \ - register unsigned int __pl_r = pl_xadd(__lk_r, __set_r) + __set_r; \ + register unsigned int __pl_r = pl_ldadd(__lk_r, __set_r) + __set_r; \ while (__builtin_expect(__pl_r & __msk_r, 0)) { \ __pl_r = pl_wait_unlock_int(__lk_r, __msk_r); \ } \ @@ -813,10 +815,10 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int register unsigned long __pl_r = pl_deref_long(lock) & PLOCK64_SL_ANY; \ pl_barrier(); \ if (!__builtin_expect(__pl_r, 0)) { \ - __pl_r = pl_xadd((lock), PLOCK64_WL_1 - PLOCK64_RL_1); \ + __pl_r = pl_ldadd_acq((lock), PLOCK64_WL_1 - PLOCK64_RL_1); \ while (1) { \ if (__builtin_expect(__pl_r & PLOCK64_SL_ANY, 0)) { \ - pl_sub((lock), PLOCK64_WL_1 - PLOCK64_RL_1); \ + pl_sub_noret_lax((lock), PLOCK64_WL_1 - PLOCK64_RL_1); \ break; /* return !__pl_r */ \ } \ __pl_r &= PLOCK64_RL_ANY; \ @@ -830,10 +832,10 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int register unsigned int __pl_r = pl_deref_int(lock) & PLOCK32_SL_ANY; \ pl_barrier(); \ if (!__builtin_expect(__pl_r, 0)) { \ - __pl_r = pl_xadd((lock), PLOCK32_WL_1 - PLOCK32_RL_1); \ + __pl_r = pl_ldadd_acq((lock), PLOCK32_WL_1 - PLOCK32_RL_1); \ while (1) { \ if (__builtin_expect(__pl_r & PLOCK32_SL_ANY, 0)) { \ - pl_sub((lock), PLOCK32_WL_1 - PLOCK32_RL_1); \ + pl_sub_noret_lax((lock), PLOCK32_WL_1 - PLOCK32_RL_1); \ break; /* return !__pl_r */ \ } \ __pl_r &= PLOCK32_RL_ANY; \ @@ -861,7 +863,7 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int #define pl_rtoj(lock) ( \ (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ register unsigned long *__lk_r = (unsigned long *)(lock); \ - register unsigned long __pl_r = pl_xadd(__lk_r, PLOCK64_WL_1) + PLOCK64_WL_1; \ + register unsigned long __pl_r = pl_ldadd_acq(__lk_r, PLOCK64_WL_1) + PLOCK64_WL_1;\ register unsigned char __m = 0; \ while (!(__pl_r & PLOCK64_SL_ANY) && \ (__pl_r / PLOCK64_WL_1 != (__pl_r & PLOCK64_RL_ANY) / PLOCK64_RL_1)) { \ @@ -876,7 +878,7 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int pl_barrier(); \ }) : (sizeof(*(lock)) == 4) ? ({ \ register unsigned int *__lk_r = (unsigned int *)(lock); \ - register unsigned int __pl_r = pl_xadd(__lk_r, PLOCK32_WL_1) + PLOCK32_WL_1; \ + register unsigned int __pl_r = pl_ldadd_acq(__lk_r, PLOCK32_WL_1) + PLOCK32_WL_1;\ register unsigned char __m = 0; \ while (!(__pl_r & PLOCK32_SL_ANY) && \ (__pl_r / PLOCK32_WL_1 != (__pl_r & PLOCK32_RL_ANY) / PLOCK32_RL_1)) { \ @@ -902,13 +904,13 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int register unsigned long *__lk_r = (unsigned long *)(lock); \ register unsigned long __pl_r = pl_deref_long(__lk_r); \ if (!(__pl_r & PLOCK64_SL_ANY)) \ - pl_or(__lk_r, PLOCK64_SL_1); \ + pl_or_noret(__lk_r, PLOCK64_SL_1); \ pl_barrier(); \ }) : (sizeof(*(lock)) == 4) ? ({ \ register unsigned int *__lk_r = (unsigned int *)(lock); \ register unsigned int __pl_r = pl_deref_int(__lk_r); \ if (!(__pl_r & PLOCK32_SL_ANY)) \ - pl_or(__lk_r, PLOCK32_SL_1); \ + pl_or_noret(__lk_r, PLOCK32_SL_1); \ pl_barrier(); \ }) : ({ \ void __unsupported_argument_size_for_pl_jtoc__(char *,int); \ @@ -921,12 +923,12 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int #define pl_rtoc(lock) ( \ (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ register unsigned long *__lk_r = (unsigned long *)(lock); \ - register unsigned long __pl_r = pl_xadd(__lk_r, PLOCK64_WL_1) + PLOCK64_WL_1; \ + register unsigned long __pl_r = pl_ldadd_acq(__lk_r, PLOCK64_WL_1) + PLOCK64_WL_1;\ register unsigned char __m = 0; \ while (__builtin_expect(!(__pl_r & PLOCK64_SL_ANY), 0)) { \ unsigned char __loops; \ if (__pl_r / PLOCK64_WL_1 == (__pl_r & PLOCK64_RL_ANY) / PLOCK64_RL_1) { \ - pl_or(__lk_r, PLOCK64_SL_1); \ + pl_or_noret(__lk_r, PLOCK64_SL_1); \ break; \ } \ __loops = __m + 1; \ @@ -940,12 +942,12 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int pl_barrier(); \ }) : (sizeof(*(lock)) == 4) ? ({ \ register unsigned int *__lk_r = (unsigned int *)(lock); \ - register unsigned int __pl_r = pl_xadd(__lk_r, PLOCK32_WL_1) + PLOCK32_WL_1; \ + register unsigned int __pl_r = pl_ldadd_acq(__lk_r, PLOCK32_WL_1) + PLOCK32_WL_1;\ register unsigned char __m = 0; \ while (__builtin_expect(!(__pl_r & PLOCK32_SL_ANY), 0)) { \ unsigned char __loops; \ if (__pl_r / PLOCK32_WL_1 == (__pl_r & PLOCK32_RL_ANY) / PLOCK32_RL_1) { \ - pl_or(__lk_r, PLOCK32_SL_1); \ + pl_or_noret(__lk_r, PLOCK32_SL_1); \ break; \ } \ __loops = __m + 1; \ @@ -969,16 +971,16 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ register unsigned long *__lk_r = (unsigned long *)(lock); \ register unsigned long __set_r = - PLOCK64_RL_1 - PLOCK64_WL_1; \ - register unsigned long __pl_r = pl_xadd(__lk_r, __set_r) + __set_r; \ + register unsigned long __pl_r = pl_ldadd(__lk_r, __set_r) + __set_r; \ if (!(__pl_r & PLOCK64_RL_ANY)) \ - pl_and(__lk_r, ~PLOCK64_SL_1); \ + pl_and_noret(__lk_r, ~PLOCK64_SL_1); \ pl_barrier(); \ }) : (sizeof(*(lock)) == 4) ? ({ \ register unsigned int *__lk_r = (unsigned int *)(lock); \ register unsigned int __set_r = - PLOCK32_RL_1 - PLOCK32_WL_1; \ - register unsigned int __pl_r = pl_xadd(__lk_r, __set_r) + __set_r; \ + register unsigned int __pl_r = pl_ldadd(__lk_r, __set_r) + __set_r; \ if (!(__pl_r & PLOCK32_RL_ANY)) \ - pl_and(__lk_r, ~PLOCK32_SL_1); \ + pl_and_noret(__lk_r, ~PLOCK32_SL_1); \ pl_barrier(); \ }) : ({ \ void __unsupported_argument_size_for_pl_drop_c__(char *,int); \ @@ -991,10 +993,10 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int #define pl_ctoa(lock) ( \ (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ register unsigned long *__lk_r = (unsigned long *)(lock); \ - register unsigned long __pl_r = pl_xadd(__lk_r, -PLOCK64_RL_1) - PLOCK64_RL_1; \ + register unsigned long __pl_r = pl_ldadd(__lk_r, -PLOCK64_RL_1) - PLOCK64_RL_1;\ while (__pl_r & PLOCK64_SL_ANY) { \ if (!(__pl_r & PLOCK64_RL_ANY)) { \ - pl_and(__lk_r, ~PLOCK64_SL_1); \ + pl_and_noret(__lk_r, ~PLOCK64_SL_1); \ break; \ } \ pl_cpu_relax(); \ @@ -1004,10 +1006,10 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int pl_barrier(); \ }) : (sizeof(*(lock)) == 4) ? ({ \ register unsigned int *__lk_r = (unsigned int *)(lock); \ - register unsigned int __pl_r = pl_xadd(__lk_r, -PLOCK32_RL_1) - PLOCK32_RL_1; \ + register unsigned int __pl_r = pl_ldadd(__lk_r, -PLOCK32_RL_1) - PLOCK32_RL_1; \ while (__pl_r & PLOCK32_SL_ANY) { \ if (!(__pl_r & PLOCK32_RL_ANY)) { \ - pl_and(__lk_r, ~PLOCK32_SL_1); \ + pl_and_noret(__lk_r, ~PLOCK32_SL_1); \ break; \ } \ pl_cpu_relax(); \ @@ -1026,10 +1028,10 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int #define pl_atoj(lock) ( \ (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ pl_barrier(); \ - pl_add(lock, PLOCK64_RL_1); \ + pl_add_noret(lock, PLOCK64_RL_1); \ }) : (sizeof(*(lock)) == 4) ? ({ \ pl_barrier(); \ - pl_add(lock, PLOCK32_RL_1); \ + pl_add_noret(lock, PLOCK32_RL_1); \ }) : ({ \ void __unsupported_argument_size_for_pl_atoj__(char *,int); \ if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ @@ -1071,14 +1073,14 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int register unsigned long __pl_r; \ register unsigned char __m; \ pl_wait_unlock_long(__lk_r, __msk_r); \ - __pl_r = pl_xadd(__lk_r, __set_r) + __set_r; \ + __pl_r = pl_ldadd_acq(__lk_r, __set_r) + __set_r; \ /* wait for all other readers to leave */ \ __m = 0; \ while (__builtin_expect(__pl_r & PLOCK64_RL_2PL, 0)) { \ unsigned char __loops; \ /* give up on other writers */ \ if (__builtin_expect(__pl_r & PLOCK64_WL_2PL, 0)) { \ - pl_sub(__lk_r, __set_r); \ + pl_sub_noret_lax(__lk_r, __set_r); \ __pl_r = 0; /* failed to get the lock */ \ break; \ } \ @@ -1099,14 +1101,14 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int register unsigned int __pl_r; \ register unsigned char __m; \ pl_wait_unlock_int(__lk_r, __msk_r); \ - __pl_r = pl_xadd(__lk_r, __set_r) + __set_r; \ + __pl_r = pl_ldadd_acq(__lk_r, __set_r) + __set_r; \ /* wait for all other readers to leave */ \ __m = 0; \ while (__builtin_expect(__pl_r & PLOCK32_RL_2PL, 0)) { \ unsigned char __loops; \ /* but rollback on other writers */ \ if (__builtin_expect(__pl_r & PLOCK32_WL_2PL, 0)) { \ - pl_sub(__lk_r, __set_r); \ + pl_sub_noret_lax(__lk_r, __set_r); \ __pl_r = 0; /* failed to get the lock */ \ break; \ } \ @@ -1146,14 +1148,14 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int register unsigned char __m; \ __retry: \ pl_wait_unlock_long(__lk_r, __msk_r); \ - __pl_r = pl_xadd(__lk_r, __set_r) + __set_r; \ + __pl_r = pl_ldadd_acq(__lk_r, __set_r) + __set_r; \ /* wait for all other readers to leave */ \ __m = 0; \ while (__builtin_expect(__pl_r & PLOCK64_RL_2PL, 0)) { \ unsigned char __loops; \ /* but rollback on other writers */ \ if (__builtin_expect(__pl_r & PLOCK64_WL_2PL, 0)) { \ - pl_sub(__lk_r, __set_r); \ + pl_sub_noret_lax(__lk_r, __set_r); \ goto __retry; \ } \ __loops = __m + 1; \ @@ -1175,14 +1177,14 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int register unsigned char __m; \ __retry: \ pl_wait_unlock_int(__lk_r, __msk_r); \ - __pl_r = pl_xadd(__lk_r, __set_r) + __set_r; \ + __pl_r = pl_ldadd_acq(__lk_r, __set_r) + __set_r; \ /* wait for all other readers to leave */ \ __m = 0; \ while (__builtin_expect(__pl_r & PLOCK32_RL_2PL, 0)) { \ unsigned char __loops; \ /* but rollback on other writers */ \ if (__builtin_expect(__pl_r & PLOCK32_WL_2PL, 0)) { \ - pl_sub(__lk_r, __set_r); \ + pl_sub_noret_lax(__lk_r, __set_r); \ goto __retry; \ } \ __loops = __m + 1; \ @@ -1207,10 +1209,10 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int #define pl_drop_j(lock) ( \ (sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \ pl_barrier(); \ - pl_sub(lock, PLOCK64_WL_1 | PLOCK64_RL_1); \ + pl_sub_noret_rel(lock, PLOCK64_WL_1 | PLOCK64_RL_1); \ }) : (sizeof(*(lock)) == 4) ? ({ \ pl_barrier(); \ - pl_sub(lock, PLOCK32_WL_1 | PLOCK32_RL_1); \ + pl_sub_noret_rel(lock, PLOCK32_WL_1 | PLOCK32_RL_1); \ }) : ({ \ void __unsupported_argument_size_for_pl_drop_j__(char *,int); \ if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \ @@ -1303,7 +1305,7 @@ static inline void pl_lorw_rdlock(unsigned long *lock) lk = pl_wait_unlock_long(lock, PLOCK_LORW_WRQ_MASK); /* count us as visitor among others */ - lk = pl_xadd(lock, PLOCK_LORW_SHR_BASE); + lk = pl_ldadd_acq(lock, PLOCK_LORW_SHR_BASE); /* wait for end of exclusive access if any */ if (lk & PLOCK_LORW_EXC_MASK) @@ -1335,7 +1337,7 @@ static inline void pl_lorw_wrlock(unsigned long *lock) if (lk & PLOCK_LORW_SHR_MASK) { /* note below, an OR is significantly cheaper than BTS or XADD */ if (!(lk & PLOCK_LORW_WRQ_MASK)) - pl_or(lock, PLOCK_LORW_WRQ_BASE); + pl_or_noret(lock, PLOCK_LORW_WRQ_BASE); lk = pl_wait_unlock_long(lock, PLOCK_LORW_SHR_MASK); } @@ -1360,13 +1362,13 @@ static inline void pl_lorw_wrlock(unsigned long *lock) __attribute__((unused,always_inline,no_instrument_function)) static inline void pl_lorw_rdunlock(unsigned long *lock) { - pl_sub(lock, PLOCK_LORW_SHR_BASE); + pl_sub_noret_rel(lock, PLOCK_LORW_SHR_BASE); } __attribute__((unused,always_inline,no_instrument_function)) static inline void pl_lorw_wrunlock(unsigned long *lock) { - pl_and(lock, ~(PLOCK_LORW_WRQ_MASK | PLOCK_LORW_EXC_MASK)); + pl_and_noret_rel(lock, ~(PLOCK_LORW_WRQ_MASK | PLOCK_LORW_EXC_MASK)); } __attribute__((unused,always_inline,no_instrument_function))