MAJOR: threads/plock: update the embedded library again

This updates the local copy of the plock library to benefit from finer
memory ordering, EBO on more operations such as when take_w() and stow()
wait for readers to leave  and refined EBO, especially on common operation
such as attempts to upgade R to S, and avoids a counter-productive prior
read in rtos() and take_r().

These changes have shown a 5% increase on regular operations on ARM,
a 33% performance increase on ARM on stick-tables and 2% on x86, and
a 14% and 4% improvements on peers updates respectively on ARM and x86.

The availability of relaxed operations will probably be useful for stats
counters which are still extremely expensive to update.

The following plock commits were included in this update:

  9db830b plock: support inlining exponential backoff code
  008d3c2 plock: make the rtos upgrade faster
  2f76dde atomic: clean up the generic xchg()
  3c6919b atomic: make sure that the no-return macros do not return a value
  97c2bb7 atomic: make the fallback bts use the pointed type for the shift
  f4c1880 atomic: also implement the missing pl_btr()
  8329b82 atomic: guard all generic definitions to make it easier to provide specific ones
  7c5cb62 atomic: use C11 atomics when available
  96afaf9 atomic: prefer the C11 definitions in general
  f3ec7a6 atomic: implement load/store/atomic barriers
  8bdbd1e atomic: add atomic load/stores
  0f604c0 atomic: add more _noret operations
  3fe35db atomic: remove the (void) cast from the C11 operations
  3b08a7c atomic: allow to define the fallback _noret variants
  28deb22 atomic: make x86 arithmetic operations the _noret variants
  8061fe2 atomic: handle modern compilers that support returning flags
  b8b91b7 atomic: add the fetch-and-<op> operations (pl_ld<op>)
  59817ca atomic: add memory order variants for most operations
  a40774f plock: explicitly make use of the pl_*_noret operations
  6f1861b plock: switch to pl_sub_noret_lax() for cancellation
  c013980 plock: use pl_ldadd{_lax,_acq,} instead of pl_xadd()
  382eea3 plock: use a release ordering when dropping the lock
  60d750d plock: use EBO when waiting for readers to leave in take_w() and stow()
  fc01c4f plock: improve EBO a little bit
  1ef6390 plock: switch to CAS + XADD for pl_take_r()
This commit is contained in:
Willy Tarreau 2023-08-07 19:16:33 +02:00
parent 09133860bf
commit 8178a5211c
2 changed files with 1635 additions and 211 deletions

File diff suppressed because it is too large Load Diff

View File

@ -74,8 +74,12 @@
} while (_r & mask); \
_r; /* return value */ \
})
#else
#else /* not PLOCK_DISABLE_EBO */
# if defined(PLOCK_INLINE_EBO)
__attribute__((unused,always_inline,no_instrument_function)) inline
# else
__attribute__((unused,noinline,no_instrument_function))
# endif
static unsigned long pl_wait_unlock_long(const unsigned long *lock, const unsigned long mask)
{
unsigned long ret;
@ -90,13 +94,13 @@ static unsigned long pl_wait_unlock_long(const unsigned long *lock, const unsign
loops -= 32768;
}
#endif
for (; loops >= 200; loops -= 10)
for (; loops >= 60; loops --)
pl_cpu_relax();
for (; loops >= 1; loops--)
pl_barrier();
ret = pl_deref_long(lock);
ret = pl_load(lock);
if (__builtin_expect(ret & mask, 0) == 0)
break;
@ -228,18 +232,18 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
register unsigned long __pl_r = pl_deref_long(lock) & PLOCK64_WL_ANY; \
pl_barrier(); \
if (!__builtin_expect(__pl_r, 0)) { \
__pl_r = pl_xadd((lock), PLOCK64_RL_1) & PLOCK64_WL_ANY; \
__pl_r = pl_ldadd_acq((lock), PLOCK64_RL_1) & PLOCK64_WL_ANY; \
if (__builtin_expect(__pl_r, 0)) \
pl_sub((lock), PLOCK64_RL_1); \
pl_sub_noret((lock), PLOCK64_RL_1); \
} \
!__pl_r; /* return value */ \
}) : (sizeof(*(lock)) == 4) ? ({ \
register unsigned int __pl_r = pl_deref_int(lock) & PLOCK32_WL_ANY; \
pl_barrier(); \
if (!__builtin_expect(__pl_r, 0)) { \
__pl_r = pl_xadd((lock), PLOCK32_RL_1) & PLOCK32_WL_ANY; \
__pl_r = pl_ldadd_acq((lock), PLOCK32_RL_1) & PLOCK32_WL_ANY; \
if (__builtin_expect(__pl_r, 0)) \
pl_sub((lock), PLOCK32_RL_1); \
pl_sub_noret((lock), PLOCK32_RL_1); \
} \
!__pl_r; /* return value */ \
}) : ({ \
@ -259,12 +263,15 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
register unsigned long *__lk_r = (unsigned long *)(lock); \
register unsigned long __set_r = PLOCK64_RL_1; \
register unsigned long __msk_r = PLOCK64_WL_ANY; \
while (1) { \
if (__builtin_expect(pl_deref_long(__lk_r) & __msk_r, 0)) \
pl_wait_unlock_long(__lk_r, __msk_r); \
if (!__builtin_expect(pl_xadd(__lk_r, __set_r) & __msk_r, 0)) \
break; \
pl_sub(__lk_r, __set_r); \
register unsigned long __old_r = pl_cmpxchg(__lk_r, 0, __set_r); \
if (__old_r) { \
while (1) { \
if (__old_r & __msk_r) \
pl_wait_unlock_long(__lk_r, __msk_r); \
if (!(pl_ldadd_acq(__lk_r, __set_r) & __msk_r)) \
break; \
__old_r = pl_sub_lax(__lk_r, __set_r); \
} \
} \
pl_barrier(); \
0; \
@ -272,12 +279,15 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
register unsigned int *__lk_r = (unsigned int *)(lock); \
register unsigned int __set_r = PLOCK32_RL_1; \
register unsigned int __msk_r = PLOCK32_WL_ANY; \
while (1) { \
if (__builtin_expect(pl_deref_int(__lk_r) & __msk_r, 0)) \
pl_wait_unlock_int(__lk_r, __msk_r); \
if (!__builtin_expect(pl_xadd(__lk_r, __set_r) & __msk_r, 0)) \
break; \
pl_sub(__lk_r, __set_r); \
register unsigned int __old_r = pl_cmpxchg(__lk_r, 0, __set_r); \
if (__old_r) { \
while (1) { \
if (__old_r & __msk_r) \
pl_wait_unlock_int(__lk_r, __msk_r); \
if (!(pl_ldadd_acq(__lk_r, __set_r) & __msk_r)) \
break; \
__old_r = pl_sub_lax(__lk_r, __set_r); \
} \
} \
pl_barrier(); \
0; \
@ -292,10 +302,10 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
#define pl_drop_r(lock) ( \
(sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
pl_barrier(); \
pl_sub(lock, PLOCK64_RL_1); \
pl_sub_noret_rel(lock, PLOCK64_RL_1); \
}) : (sizeof(*(lock)) == 4) ? ({ \
pl_barrier(); \
pl_sub(lock, PLOCK32_RL_1); \
pl_sub_noret_rel(lock, PLOCK32_RL_1); \
}) : ({ \
void __unsupported_argument_size_for_pl_drop_r__(char *,int); \
if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \
@ -309,20 +319,20 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
register unsigned long __pl_r = pl_deref_long(lock); \
pl_barrier(); \
if (!__builtin_expect(__pl_r & (PLOCK64_WL_ANY | PLOCK64_SL_ANY), 0)) { \
__pl_r = pl_xadd((lock), PLOCK64_SL_1 | PLOCK64_RL_1) & \
__pl_r = pl_ldadd_acq((lock), PLOCK64_SL_1 | PLOCK64_RL_1) & \
(PLOCK64_WL_ANY | PLOCK64_SL_ANY); \
if (__builtin_expect(__pl_r, 0)) \
pl_sub((lock), PLOCK64_SL_1 | PLOCK64_RL_1); \
pl_sub_noret_lax((lock), PLOCK64_SL_1 | PLOCK64_RL_1); \
} \
!__pl_r; /* return value */ \
}) : (sizeof(*(lock)) == 4) ? ({ \
register unsigned int __pl_r = pl_deref_int(lock); \
pl_barrier(); \
if (!__builtin_expect(__pl_r & (PLOCK32_WL_ANY | PLOCK32_SL_ANY), 0)) { \
__pl_r = pl_xadd((lock), PLOCK32_SL_1 | PLOCK32_RL_1) & \
__pl_r = pl_ldadd_acq((lock), PLOCK32_SL_1 | PLOCK32_RL_1) & \
(PLOCK32_WL_ANY | PLOCK32_SL_ANY); \
if (__builtin_expect(__pl_r, 0)) \
pl_sub((lock), PLOCK32_SL_1 | PLOCK32_RL_1); \
pl_sub_noret_lax((lock), PLOCK32_SL_1 | PLOCK32_RL_1); \
} \
!__pl_r; /* return value */ \
}) : ({ \
@ -344,9 +354,9 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
register unsigned long __set_r = PLOCK64_SL_1 | PLOCK64_RL_1; \
register unsigned long __msk_r = PLOCK64_WL_ANY | PLOCK64_SL_ANY; \
while (1) { \
if (!__builtin_expect(pl_xadd(__lk_r, __set_r) & __msk_r, 0)) \
if (!__builtin_expect(pl_ldadd_acq(__lk_r, __set_r) & __msk_r, 0)) \
break; \
pl_sub(__lk_r, __set_r); \
pl_sub_noret_lax(__lk_r, __set_r); \
pl_wait_unlock_long(__lk_r, __msk_r); \
} \
pl_barrier(); \
@ -356,9 +366,9 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
register unsigned int __set_r = PLOCK32_SL_1 | PLOCK32_RL_1; \
register unsigned int __msk_r = PLOCK32_WL_ANY | PLOCK32_SL_ANY; \
while (1) { \
if (!__builtin_expect(pl_xadd(__lk_r, __set_r) & __msk_r, 0)) \
if (!__builtin_expect(pl_ldadd_acq(__lk_r, __set_r) & __msk_r, 0)) \
break; \
pl_sub(__lk_r, __set_r); \
pl_sub_noret_lax(__lk_r, __set_r); \
pl_wait_unlock_int(__lk_r, __msk_r); \
} \
pl_barrier(); \
@ -374,10 +384,10 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
#define pl_drop_s(lock) ( \
(sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
pl_barrier(); \
pl_sub(lock, PLOCK64_SL_1 + PLOCK64_RL_1); \
pl_sub_noret_rel(lock, PLOCK64_SL_1 + PLOCK64_RL_1); \
}) : (sizeof(*(lock)) == 4) ? ({ \
pl_barrier(); \
pl_sub(lock, PLOCK32_SL_1 + PLOCK32_RL_1); \
pl_sub_noret_rel(lock, PLOCK32_SL_1 + PLOCK32_RL_1); \
}) : ({ \
void __unsupported_argument_size_for_pl_drop_s__(char *,int); \
if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \
@ -389,10 +399,10 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
#define pl_stor(lock) ( \
(sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
pl_barrier(); \
pl_sub(lock, PLOCK64_SL_1); \
pl_sub_noret(lock, PLOCK64_SL_1); \
}) : (sizeof(*(lock)) == 4) ? ({ \
pl_barrier(); \
pl_sub(lock, PLOCK32_SL_1); \
pl_sub_noret(lock, PLOCK32_SL_1); \
}) : ({ \
void __unsupported_argument_size_for_pl_stor__(char *,int); \
if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \
@ -403,14 +413,14 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
/* take the W lock under the S lock */
#define pl_stow(lock) ( \
(sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
register unsigned long __pl_r = pl_xadd((lock), PLOCK64_WL_1); \
while ((__pl_r & PLOCK64_RL_ANY) != PLOCK64_RL_1) \
__pl_r = pl_deref_long(lock); \
register unsigned long __pl_r = pl_ldadd((lock), PLOCK64_WL_1); \
if (__pl_r & (PLOCK64_RL_ANY & ~PLOCK64_RL_1)) \
__pl_r = pl_wait_unlock_long((const unsigned long*)lock, (PLOCK64_RL_ANY & ~PLOCK64_RL_1)); \
pl_barrier(); \
}) : (sizeof(*(lock)) == 4) ? ({ \
register unsigned int __pl_r = pl_xadd((lock), PLOCK32_WL_1); \
while ((__pl_r & PLOCK32_RL_ANY) != PLOCK32_RL_1) \
__pl_r = pl_deref_int(lock); \
register unsigned int __pl_r = pl_ldadd((lock), PLOCK32_WL_1); \
if (__pl_r & (PLOCK32_RL_ANY & ~PLOCK32_RL_1)) \
__pl_r = pl_wait_unlock_int((const unsigned int*)lock, (PLOCK32_RL_ANY & ~PLOCK32_RL_1)); \
pl_barrier(); \
}) : ({ \
void __unsupported_argument_size_for_pl_stow__(char *,int); \
@ -423,10 +433,10 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
#define pl_wtos(lock) ( \
(sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
pl_barrier(); \
pl_sub(lock, PLOCK64_WL_1); \
pl_sub_noret(lock, PLOCK64_WL_1); \
}) : (sizeof(*(lock)) == 4) ? ({ \
pl_barrier(); \
pl_sub(lock, PLOCK32_WL_1); \
pl_sub_noret(lock, PLOCK32_WL_1); \
}) : ({ \
void __unsupported_argument_size_for_pl_wtos__(char *,int); \
if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \
@ -438,10 +448,10 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
#define pl_wtor(lock) ( \
(sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
pl_barrier(); \
pl_sub(lock, PLOCK64_WL_1 | PLOCK64_SL_1); \
pl_sub_noret(lock, PLOCK64_WL_1 | PLOCK64_SL_1); \
}) : (sizeof(*(lock)) == 4) ? ({ \
pl_barrier(); \
pl_sub(lock, PLOCK32_WL_1 | PLOCK32_SL_1); \
pl_sub_noret(lock, PLOCK32_WL_1 | PLOCK32_SL_1); \
}) : ({ \
void __unsupported_argument_size_for_pl_wtor__(char *,int); \
if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \
@ -469,10 +479,10 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
register unsigned long __pl_r = pl_deref_long(lock); \
pl_barrier(); \
if (!__builtin_expect(__pl_r & (PLOCK64_WL_ANY | PLOCK64_SL_ANY), 0)) { \
__pl_r = pl_xadd((lock), PLOCK64_WL_1 | PLOCK64_SL_1 | PLOCK64_RL_1); \
__pl_r = pl_ldadd_acq((lock), PLOCK64_WL_1 | PLOCK64_SL_1 | PLOCK64_RL_1);\
if (__builtin_expect(__pl_r & (PLOCK64_WL_ANY | PLOCK64_SL_ANY), 0)) { \
/* a writer, seeker or atomic is present, let's leave */ \
pl_sub((lock), PLOCK64_WL_1 | PLOCK64_SL_1 | PLOCK64_RL_1); \
pl_sub_noret_lax((lock), PLOCK64_WL_1 | PLOCK64_SL_1 | PLOCK64_RL_1);\
__pl_r &= (PLOCK64_WL_ANY | PLOCK64_SL_ANY); /* return value */\
} else { \
/* wait for all other readers to leave */ \
@ -486,10 +496,10 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
register unsigned int __pl_r = pl_deref_int(lock); \
pl_barrier(); \
if (!__builtin_expect(__pl_r & (PLOCK32_WL_ANY | PLOCK32_SL_ANY), 0)) { \
__pl_r = pl_xadd((lock), PLOCK32_WL_1 | PLOCK32_SL_1 | PLOCK32_RL_1); \
__pl_r = pl_ldadd_acq((lock), PLOCK32_WL_1 | PLOCK32_SL_1 | PLOCK32_RL_1);\
if (__builtin_expect(__pl_r & (PLOCK32_WL_ANY | PLOCK32_SL_ANY), 0)) { \
/* a writer, seeker or atomic is present, let's leave */ \
pl_sub((lock), PLOCK32_WL_1 | PLOCK32_SL_1 | PLOCK32_RL_1); \
pl_sub_noret_lax((lock), PLOCK32_WL_1 | PLOCK32_SL_1 | PLOCK32_RL_1);\
__pl_r &= (PLOCK32_WL_ANY | PLOCK32_SL_ANY); /* return value */\
} else { \
/* wait for all other readers to leave */ \
@ -517,15 +527,15 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
register unsigned long __msk_r = PLOCK64_WL_ANY | PLOCK64_SL_ANY; \
register unsigned long __pl_r; \
while (1) { \
__pl_r = pl_xadd(__lk_r, __set_r); \
__pl_r = pl_ldadd_acq(__lk_r, __set_r); \
if (!__builtin_expect(__pl_r & __msk_r, 0)) \
break; \
pl_sub(__lk_r, __set_r); \
pl_sub_noret_lax(__lk_r, __set_r); \
__pl_r = pl_wait_unlock_long(__lk_r, __msk_r); \
} \
/* wait for all other readers to leave */ \
while (__builtin_expect(__pl_r, 0)) \
__pl_r = pl_deref_long(__lk_r) - __set_r; \
if (__builtin_expect(__pl_r & (PLOCK64_RL_ANY & ~PLOCK64_RL_1), 0)) \
__pl_r = pl_wait_unlock_long(__lk_r, (PLOCK64_RL_ANY & ~PLOCK64_RL_1)) - __set_r; \
pl_barrier(); \
0; \
}) : (sizeof(*(lock)) == 4) ? ({ \
@ -534,15 +544,15 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
register unsigned int __msk_r = PLOCK32_WL_ANY | PLOCK32_SL_ANY; \
register unsigned int __pl_r; \
while (1) { \
__pl_r = pl_xadd(__lk_r, __set_r); \
__pl_r = pl_ldadd_acq(__lk_r, __set_r); \
if (!__builtin_expect(__pl_r & __msk_r, 0)) \
break; \
pl_sub(__lk_r, __set_r); \
pl_sub_noret_lax(__lk_r, __set_r); \
__pl_r = pl_wait_unlock_int(__lk_r, __msk_r); \
} \
/* wait for all other readers to leave */ \
while (__builtin_expect(__pl_r, 0)) \
__pl_r = pl_deref_int(__lk_r) - __set_r; \
if (__builtin_expect(__pl_r & (PLOCK32_RL_ANY & ~PLOCK32_RL_1), 0)) \
__pl_r = pl_wait_unlock_int(__lk_r, (PLOCK32_RL_ANY & ~PLOCK32_RL_1)) - __set_r; \
pl_barrier(); \
0; \
}) : ({ \
@ -556,10 +566,10 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
#define pl_drop_w(lock) ( \
(sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
pl_barrier(); \
pl_sub(lock, PLOCK64_WL_1 | PLOCK64_SL_1 | PLOCK64_RL_1); \
pl_sub_noret_rel(lock, PLOCK64_WL_1 | PLOCK64_SL_1 | PLOCK64_RL_1); \
}) : (sizeof(*(lock)) == 4) ? ({ \
pl_barrier(); \
pl_sub(lock, PLOCK32_WL_1 | PLOCK32_SL_1 | PLOCK32_RL_1); \
pl_sub_noret_rel(lock, PLOCK32_WL_1 | PLOCK32_SL_1 | PLOCK32_RL_1); \
}) : ({ \
void __unsupported_argument_size_for_pl_drop_w__(char *,int); \
if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \
@ -574,24 +584,16 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
*/
#define pl_try_rtos(lock) ( \
(sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
register unsigned long __pl_r = pl_deref_long(lock); \
pl_barrier(); \
if (!__builtin_expect(__pl_r & (PLOCK64_WL_ANY | PLOCK64_SL_ANY), 0)) { \
__pl_r = pl_xadd((lock), PLOCK64_SL_1) & \
(PLOCK64_WL_ANY | PLOCK64_SL_ANY); \
if (__builtin_expect(__pl_r, 0)) \
pl_sub((lock), PLOCK64_SL_1); \
} \
register unsigned long __pl_r; \
__pl_r = pl_ldadd_acq((lock), PLOCK64_SL_1) & (PLOCK64_WL_ANY | PLOCK64_SL_ANY);\
if (__builtin_expect(__pl_r, 0)) \
pl_sub_noret_lax((lock), PLOCK64_SL_1); \
!__pl_r; /* return value */ \
}) : (sizeof(*(lock)) == 4) ? ({ \
register unsigned int __pl_r = pl_deref_int(lock); \
pl_barrier(); \
if (!__builtin_expect(__pl_r & (PLOCK32_WL_ANY | PLOCK32_SL_ANY), 0)) { \
__pl_r = pl_xadd((lock), PLOCK32_SL_1) & \
(PLOCK32_WL_ANY | PLOCK32_SL_ANY); \
if (__builtin_expect(__pl_r, 0)) \
pl_sub((lock), PLOCK32_SL_1); \
} \
register unsigned int __pl_r; \
__pl_r = pl_ldadd_acq((lock), PLOCK32_SL_1) & (PLOCK32_WL_ANY | PLOCK32_SL_ANY);\
if (__builtin_expect(__pl_r, 0)) \
pl_sub_noret_lax((lock), PLOCK32_SL_1); \
!__pl_r; /* return value */ \
}) : ({ \
void __unsupported_argument_size_for_pl_try_rtos__(char *,int); \
@ -616,9 +618,9 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
register unsigned long __pl_r; \
pl_barrier(); \
while (1) { \
__pl_r = pl_xadd(__lk_r, __set_r); \
__pl_r = pl_ldadd_acq(__lk_r, __set_r); \
if (__builtin_expect(__pl_r & __msk_r, 0)) { \
if (pl_xadd(__lk_r, - __set_r)) \
if (pl_ldadd_lax(__lk_r, - __set_r)) \
break; /* the caller needs to drop the lock now */ \
continue; /* lock was released, try again */ \
} \
@ -636,9 +638,9 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
register unsigned int __pl_r; \
pl_barrier(); \
while (1) { \
__pl_r = pl_xadd(__lk_r, __set_r); \
__pl_r = pl_ldadd_acq(__lk_r, __set_r); \
if (__builtin_expect(__pl_r & __msk_r, 0)) { \
if (pl_xadd(__lk_r, - __set_r)) \
if (pl_ldadd_lax(__lk_r, - __set_r)) \
break; /* the caller needs to drop the lock now */ \
continue; /* lock was released, try again */ \
} \
@ -671,10 +673,10 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
register unsigned long __pl_r = pl_deref_long(lock) & PLOCK64_SL_ANY; \
pl_barrier(); \
if (!__builtin_expect(__pl_r, 0)) { \
__pl_r = pl_xadd((lock), PLOCK64_WL_1); \
__pl_r = pl_ldadd_acq((lock), PLOCK64_WL_1); \
while (1) { \
if (__builtin_expect(__pl_r & PLOCK64_SL_ANY, 0)) { \
pl_sub((lock), PLOCK64_WL_1); \
pl_sub_noret_lax((lock), PLOCK64_WL_1); \
break; /* return !__pl_r */ \
} \
__pl_r &= PLOCK64_RL_ANY; \
@ -688,10 +690,10 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
register unsigned int __pl_r = pl_deref_int(lock) & PLOCK32_SL_ANY; \
pl_barrier(); \
if (!__builtin_expect(__pl_r, 0)) { \
__pl_r = pl_xadd((lock), PLOCK32_WL_1); \
__pl_r = pl_ldadd_acq((lock), PLOCK32_WL_1); \
while (1) { \
if (__builtin_expect(__pl_r & PLOCK32_SL_ANY, 0)) { \
pl_sub((lock), PLOCK32_WL_1); \
pl_sub_noret_lax((lock), PLOCK32_WL_1); \
break; /* return !__pl_r */ \
} \
__pl_r &= PLOCK32_RL_ANY; \
@ -718,12 +720,12 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
register unsigned long __set_r = PLOCK64_WL_1; \
register unsigned long __msk_r = PLOCK64_SL_ANY; \
register unsigned long __pl_r; \
__pl_r = pl_xadd(__lk_r, __set_r); \
__pl_r = pl_ldadd_acq(__lk_r, __set_r); \
while (__builtin_expect(__pl_r & PLOCK64_RL_ANY, 0)) { \
if (__builtin_expect(__pl_r & __msk_r, 0)) { \
pl_sub(__lk_r, __set_r); \
pl_sub_noret_lax(__lk_r, __set_r); \
pl_wait_unlock_long(__lk_r, __msk_r); \
__pl_r = pl_xadd(__lk_r, __set_r); \
__pl_r = pl_ldadd_acq(__lk_r, __set_r); \
continue; \
} \
/* wait for all readers to leave or upgrade */ \
@ -737,12 +739,12 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
register unsigned int __set_r = PLOCK32_WL_1; \
register unsigned int __msk_r = PLOCK32_SL_ANY; \
register unsigned int __pl_r; \
__pl_r = pl_xadd(__lk_r, __set_r); \
__pl_r = pl_ldadd_acq(__lk_r, __set_r); \
while (__builtin_expect(__pl_r & PLOCK32_RL_ANY, 0)) { \
if (__builtin_expect(__pl_r & __msk_r, 0)) { \
pl_sub(__lk_r, __set_r); \
pl_sub_noret_lax(__lk_r, __set_r); \
pl_wait_unlock_int(__lk_r, __msk_r); \
__pl_r = pl_xadd(__lk_r, __set_r); \
__pl_r = pl_ldadd_acq(__lk_r, __set_r); \
continue; \
} \
/* wait for all readers to leave or upgrade */ \
@ -762,10 +764,10 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
#define pl_drop_a(lock) ( \
(sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
pl_barrier(); \
pl_sub(lock, PLOCK64_WL_1); \
pl_sub_noret_rel(lock, PLOCK64_WL_1); \
}) : (sizeof(*(lock)) == 4) ? ({ \
pl_barrier(); \
pl_sub(lock, PLOCK32_WL_1); \
pl_sub_noret_rel(lock, PLOCK32_WL_1); \
}) : ({ \
void __unsupported_argument_size_for_pl_drop_a__(char *,int); \
if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \
@ -779,7 +781,7 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
register unsigned long *__lk_r = (unsigned long *)(lock); \
register unsigned long __set_r = PLOCK64_RL_1 - PLOCK64_WL_1; \
register unsigned long __msk_r = PLOCK64_WL_ANY; \
register unsigned long __pl_r = pl_xadd(__lk_r, __set_r) + __set_r; \
register unsigned long __pl_r = pl_ldadd(__lk_r, __set_r) + __set_r; \
while (__builtin_expect(__pl_r & __msk_r, 0)) { \
__pl_r = pl_wait_unlock_long(__lk_r, __msk_r); \
} \
@ -788,7 +790,7 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
register unsigned int *__lk_r = (unsigned int *)(lock); \
register unsigned int __set_r = PLOCK32_RL_1 - PLOCK32_WL_1; \
register unsigned int __msk_r = PLOCK32_WL_ANY; \
register unsigned int __pl_r = pl_xadd(__lk_r, __set_r) + __set_r; \
register unsigned int __pl_r = pl_ldadd(__lk_r, __set_r) + __set_r; \
while (__builtin_expect(__pl_r & __msk_r, 0)) { \
__pl_r = pl_wait_unlock_int(__lk_r, __msk_r); \
} \
@ -813,10 +815,10 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
register unsigned long __pl_r = pl_deref_long(lock) & PLOCK64_SL_ANY; \
pl_barrier(); \
if (!__builtin_expect(__pl_r, 0)) { \
__pl_r = pl_xadd((lock), PLOCK64_WL_1 - PLOCK64_RL_1); \
__pl_r = pl_ldadd_acq((lock), PLOCK64_WL_1 - PLOCK64_RL_1); \
while (1) { \
if (__builtin_expect(__pl_r & PLOCK64_SL_ANY, 0)) { \
pl_sub((lock), PLOCK64_WL_1 - PLOCK64_RL_1); \
pl_sub_noret_lax((lock), PLOCK64_WL_1 - PLOCK64_RL_1); \
break; /* return !__pl_r */ \
} \
__pl_r &= PLOCK64_RL_ANY; \
@ -830,10 +832,10 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
register unsigned int __pl_r = pl_deref_int(lock) & PLOCK32_SL_ANY; \
pl_barrier(); \
if (!__builtin_expect(__pl_r, 0)) { \
__pl_r = pl_xadd((lock), PLOCK32_WL_1 - PLOCK32_RL_1); \
__pl_r = pl_ldadd_acq((lock), PLOCK32_WL_1 - PLOCK32_RL_1); \
while (1) { \
if (__builtin_expect(__pl_r & PLOCK32_SL_ANY, 0)) { \
pl_sub((lock), PLOCK32_WL_1 - PLOCK32_RL_1); \
pl_sub_noret_lax((lock), PLOCK32_WL_1 - PLOCK32_RL_1); \
break; /* return !__pl_r */ \
} \
__pl_r &= PLOCK32_RL_ANY; \
@ -861,7 +863,7 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
#define pl_rtoj(lock) ( \
(sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
register unsigned long *__lk_r = (unsigned long *)(lock); \
register unsigned long __pl_r = pl_xadd(__lk_r, PLOCK64_WL_1) + PLOCK64_WL_1; \
register unsigned long __pl_r = pl_ldadd_acq(__lk_r, PLOCK64_WL_1) + PLOCK64_WL_1;\
register unsigned char __m = 0; \
while (!(__pl_r & PLOCK64_SL_ANY) && \
(__pl_r / PLOCK64_WL_1 != (__pl_r & PLOCK64_RL_ANY) / PLOCK64_RL_1)) { \
@ -876,7 +878,7 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
pl_barrier(); \
}) : (sizeof(*(lock)) == 4) ? ({ \
register unsigned int *__lk_r = (unsigned int *)(lock); \
register unsigned int __pl_r = pl_xadd(__lk_r, PLOCK32_WL_1) + PLOCK32_WL_1; \
register unsigned int __pl_r = pl_ldadd_acq(__lk_r, PLOCK32_WL_1) + PLOCK32_WL_1;\
register unsigned char __m = 0; \
while (!(__pl_r & PLOCK32_SL_ANY) && \
(__pl_r / PLOCK32_WL_1 != (__pl_r & PLOCK32_RL_ANY) / PLOCK32_RL_1)) { \
@ -902,13 +904,13 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
register unsigned long *__lk_r = (unsigned long *)(lock); \
register unsigned long __pl_r = pl_deref_long(__lk_r); \
if (!(__pl_r & PLOCK64_SL_ANY)) \
pl_or(__lk_r, PLOCK64_SL_1); \
pl_or_noret(__lk_r, PLOCK64_SL_1); \
pl_barrier(); \
}) : (sizeof(*(lock)) == 4) ? ({ \
register unsigned int *__lk_r = (unsigned int *)(lock); \
register unsigned int __pl_r = pl_deref_int(__lk_r); \
if (!(__pl_r & PLOCK32_SL_ANY)) \
pl_or(__lk_r, PLOCK32_SL_1); \
pl_or_noret(__lk_r, PLOCK32_SL_1); \
pl_barrier(); \
}) : ({ \
void __unsupported_argument_size_for_pl_jtoc__(char *,int); \
@ -921,12 +923,12 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
#define pl_rtoc(lock) ( \
(sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
register unsigned long *__lk_r = (unsigned long *)(lock); \
register unsigned long __pl_r = pl_xadd(__lk_r, PLOCK64_WL_1) + PLOCK64_WL_1; \
register unsigned long __pl_r = pl_ldadd_acq(__lk_r, PLOCK64_WL_1) + PLOCK64_WL_1;\
register unsigned char __m = 0; \
while (__builtin_expect(!(__pl_r & PLOCK64_SL_ANY), 0)) { \
unsigned char __loops; \
if (__pl_r / PLOCK64_WL_1 == (__pl_r & PLOCK64_RL_ANY) / PLOCK64_RL_1) { \
pl_or(__lk_r, PLOCK64_SL_1); \
pl_or_noret(__lk_r, PLOCK64_SL_1); \
break; \
} \
__loops = __m + 1; \
@ -940,12 +942,12 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
pl_barrier(); \
}) : (sizeof(*(lock)) == 4) ? ({ \
register unsigned int *__lk_r = (unsigned int *)(lock); \
register unsigned int __pl_r = pl_xadd(__lk_r, PLOCK32_WL_1) + PLOCK32_WL_1; \
register unsigned int __pl_r = pl_ldadd_acq(__lk_r, PLOCK32_WL_1) + PLOCK32_WL_1;\
register unsigned char __m = 0; \
while (__builtin_expect(!(__pl_r & PLOCK32_SL_ANY), 0)) { \
unsigned char __loops; \
if (__pl_r / PLOCK32_WL_1 == (__pl_r & PLOCK32_RL_ANY) / PLOCK32_RL_1) { \
pl_or(__lk_r, PLOCK32_SL_1); \
pl_or_noret(__lk_r, PLOCK32_SL_1); \
break; \
} \
__loops = __m + 1; \
@ -969,16 +971,16 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
(sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
register unsigned long *__lk_r = (unsigned long *)(lock); \
register unsigned long __set_r = - PLOCK64_RL_1 - PLOCK64_WL_1; \
register unsigned long __pl_r = pl_xadd(__lk_r, __set_r) + __set_r; \
register unsigned long __pl_r = pl_ldadd(__lk_r, __set_r) + __set_r; \
if (!(__pl_r & PLOCK64_RL_ANY)) \
pl_and(__lk_r, ~PLOCK64_SL_1); \
pl_and_noret(__lk_r, ~PLOCK64_SL_1); \
pl_barrier(); \
}) : (sizeof(*(lock)) == 4) ? ({ \
register unsigned int *__lk_r = (unsigned int *)(lock); \
register unsigned int __set_r = - PLOCK32_RL_1 - PLOCK32_WL_1; \
register unsigned int __pl_r = pl_xadd(__lk_r, __set_r) + __set_r; \
register unsigned int __pl_r = pl_ldadd(__lk_r, __set_r) + __set_r; \
if (!(__pl_r & PLOCK32_RL_ANY)) \
pl_and(__lk_r, ~PLOCK32_SL_1); \
pl_and_noret(__lk_r, ~PLOCK32_SL_1); \
pl_barrier(); \
}) : ({ \
void __unsupported_argument_size_for_pl_drop_c__(char *,int); \
@ -991,10 +993,10 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
#define pl_ctoa(lock) ( \
(sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
register unsigned long *__lk_r = (unsigned long *)(lock); \
register unsigned long __pl_r = pl_xadd(__lk_r, -PLOCK64_RL_1) - PLOCK64_RL_1; \
register unsigned long __pl_r = pl_ldadd(__lk_r, -PLOCK64_RL_1) - PLOCK64_RL_1;\
while (__pl_r & PLOCK64_SL_ANY) { \
if (!(__pl_r & PLOCK64_RL_ANY)) { \
pl_and(__lk_r, ~PLOCK64_SL_1); \
pl_and_noret(__lk_r, ~PLOCK64_SL_1); \
break; \
} \
pl_cpu_relax(); \
@ -1004,10 +1006,10 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
pl_barrier(); \
}) : (sizeof(*(lock)) == 4) ? ({ \
register unsigned int *__lk_r = (unsigned int *)(lock); \
register unsigned int __pl_r = pl_xadd(__lk_r, -PLOCK32_RL_1) - PLOCK32_RL_1; \
register unsigned int __pl_r = pl_ldadd(__lk_r, -PLOCK32_RL_1) - PLOCK32_RL_1; \
while (__pl_r & PLOCK32_SL_ANY) { \
if (!(__pl_r & PLOCK32_RL_ANY)) { \
pl_and(__lk_r, ~PLOCK32_SL_1); \
pl_and_noret(__lk_r, ~PLOCK32_SL_1); \
break; \
} \
pl_cpu_relax(); \
@ -1026,10 +1028,10 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
#define pl_atoj(lock) ( \
(sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
pl_barrier(); \
pl_add(lock, PLOCK64_RL_1); \
pl_add_noret(lock, PLOCK64_RL_1); \
}) : (sizeof(*(lock)) == 4) ? ({ \
pl_barrier(); \
pl_add(lock, PLOCK32_RL_1); \
pl_add_noret(lock, PLOCK32_RL_1); \
}) : ({ \
void __unsupported_argument_size_for_pl_atoj__(char *,int); \
if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \
@ -1071,14 +1073,14 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
register unsigned long __pl_r; \
register unsigned char __m; \
pl_wait_unlock_long(__lk_r, __msk_r); \
__pl_r = pl_xadd(__lk_r, __set_r) + __set_r; \
__pl_r = pl_ldadd_acq(__lk_r, __set_r) + __set_r; \
/* wait for all other readers to leave */ \
__m = 0; \
while (__builtin_expect(__pl_r & PLOCK64_RL_2PL, 0)) { \
unsigned char __loops; \
/* give up on other writers */ \
if (__builtin_expect(__pl_r & PLOCK64_WL_2PL, 0)) { \
pl_sub(__lk_r, __set_r); \
pl_sub_noret_lax(__lk_r, __set_r); \
__pl_r = 0; /* failed to get the lock */ \
break; \
} \
@ -1099,14 +1101,14 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
register unsigned int __pl_r; \
register unsigned char __m; \
pl_wait_unlock_int(__lk_r, __msk_r); \
__pl_r = pl_xadd(__lk_r, __set_r) + __set_r; \
__pl_r = pl_ldadd_acq(__lk_r, __set_r) + __set_r; \
/* wait for all other readers to leave */ \
__m = 0; \
while (__builtin_expect(__pl_r & PLOCK32_RL_2PL, 0)) { \
unsigned char __loops; \
/* but rollback on other writers */ \
if (__builtin_expect(__pl_r & PLOCK32_WL_2PL, 0)) { \
pl_sub(__lk_r, __set_r); \
pl_sub_noret_lax(__lk_r, __set_r); \
__pl_r = 0; /* failed to get the lock */ \
break; \
} \
@ -1146,14 +1148,14 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
register unsigned char __m; \
__retry: \
pl_wait_unlock_long(__lk_r, __msk_r); \
__pl_r = pl_xadd(__lk_r, __set_r) + __set_r; \
__pl_r = pl_ldadd_acq(__lk_r, __set_r) + __set_r; \
/* wait for all other readers to leave */ \
__m = 0; \
while (__builtin_expect(__pl_r & PLOCK64_RL_2PL, 0)) { \
unsigned char __loops; \
/* but rollback on other writers */ \
if (__builtin_expect(__pl_r & PLOCK64_WL_2PL, 0)) { \
pl_sub(__lk_r, __set_r); \
pl_sub_noret_lax(__lk_r, __set_r); \
goto __retry; \
} \
__loops = __m + 1; \
@ -1175,14 +1177,14 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
register unsigned char __m; \
__retry: \
pl_wait_unlock_int(__lk_r, __msk_r); \
__pl_r = pl_xadd(__lk_r, __set_r) + __set_r; \
__pl_r = pl_ldadd_acq(__lk_r, __set_r) + __set_r; \
/* wait for all other readers to leave */ \
__m = 0; \
while (__builtin_expect(__pl_r & PLOCK32_RL_2PL, 0)) { \
unsigned char __loops; \
/* but rollback on other writers */ \
if (__builtin_expect(__pl_r & PLOCK32_WL_2PL, 0)) { \
pl_sub(__lk_r, __set_r); \
pl_sub_noret_lax(__lk_r, __set_r); \
goto __retry; \
} \
__loops = __m + 1; \
@ -1207,10 +1209,10 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
#define pl_drop_j(lock) ( \
(sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
pl_barrier(); \
pl_sub(lock, PLOCK64_WL_1 | PLOCK64_RL_1); \
pl_sub_noret_rel(lock, PLOCK64_WL_1 | PLOCK64_RL_1); \
}) : (sizeof(*(lock)) == 4) ? ({ \
pl_barrier(); \
pl_sub(lock, PLOCK32_WL_1 | PLOCK32_RL_1); \
pl_sub_noret_rel(lock, PLOCK32_WL_1 | PLOCK32_RL_1); \
}) : ({ \
void __unsupported_argument_size_for_pl_drop_j__(char *,int); \
if (sizeof(*(lock)) != 4 && (sizeof(long) != 8 || sizeof(*(lock)) != 8)) \
@ -1303,7 +1305,7 @@ static inline void pl_lorw_rdlock(unsigned long *lock)
lk = pl_wait_unlock_long(lock, PLOCK_LORW_WRQ_MASK);
/* count us as visitor among others */
lk = pl_xadd(lock, PLOCK_LORW_SHR_BASE);
lk = pl_ldadd_acq(lock, PLOCK_LORW_SHR_BASE);
/* wait for end of exclusive access if any */
if (lk & PLOCK_LORW_EXC_MASK)
@ -1335,7 +1337,7 @@ static inline void pl_lorw_wrlock(unsigned long *lock)
if (lk & PLOCK_LORW_SHR_MASK) {
/* note below, an OR is significantly cheaper than BTS or XADD */
if (!(lk & PLOCK_LORW_WRQ_MASK))
pl_or(lock, PLOCK_LORW_WRQ_BASE);
pl_or_noret(lock, PLOCK_LORW_WRQ_BASE);
lk = pl_wait_unlock_long(lock, PLOCK_LORW_SHR_MASK);
}
@ -1360,13 +1362,13 @@ static inline void pl_lorw_wrlock(unsigned long *lock)
__attribute__((unused,always_inline,no_instrument_function))
static inline void pl_lorw_rdunlock(unsigned long *lock)
{
pl_sub(lock, PLOCK_LORW_SHR_BASE);
pl_sub_noret_rel(lock, PLOCK_LORW_SHR_BASE);
}
__attribute__((unused,always_inline,no_instrument_function))
static inline void pl_lorw_wrunlock(unsigned long *lock)
{
pl_and(lock, ~(PLOCK_LORW_WRQ_MASK | PLOCK_LORW_EXC_MASK));
pl_and_noret_rel(lock, ~(PLOCK_LORW_WRQ_MASK | PLOCK_LORW_EXC_MASK));
}
__attribute__((unused,always_inline,no_instrument_function))