mirror of
http://git.haproxy.org/git/haproxy.git/
synced 2025-01-07 12:49:42 +00:00
7122ab31b1
atomic-ops header contains some low-level functions to do atomic operations. These operations are used by the progressive locks (plock).
428 lines
28 KiB
C
428 lines
28 KiB
C
/* plock - progressive locks
|
|
*
|
|
* Copyright (C) 2012-2017 Willy Tarreau <w@1wt.eu>
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining
|
|
* a copy of this software and associated documentation files (the
|
|
* "Software"), to deal in the Software without restriction, including
|
|
* without limitation the rights to use, copy, modify, merge, publish,
|
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
|
* permit persons to whom the Software is furnished to do so, subject to
|
|
* the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be
|
|
* included in all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
* OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include "atomic-ops.h"
|
|
|
|
/* 64 bit */
|
|
#define PLOCK64_RL_1 0x0000000000000004ULL
|
|
#define PLOCK64_RL_ANY 0x00000000FFFFFFFCULL
|
|
#define PLOCK64_SL_1 0x0000000100000000ULL
|
|
#define PLOCK64_SL_ANY 0x0000000300000000ULL
|
|
#define PLOCK64_WL_1 0x0000000400000000ULL
|
|
#define PLOCK64_WL_ANY 0xFFFFFFFC00000000ULL
|
|
|
|
/* 32 bit */
|
|
#define PLOCK32_RL_1 0x00000004
|
|
#define PLOCK32_RL_ANY 0x0000FFFC
|
|
#define PLOCK32_SL_1 0x00010000
|
|
#define PLOCK32_SL_ANY 0x00030000
|
|
#define PLOCK32_WL_1 0x00040000
|
|
#define PLOCK32_WL_ANY 0xFFFC0000
|
|
|
|
/* dereferences <*p> as unsigned long without causing aliasing issues */
|
|
#define pl_deref_long(p) ({ volatile unsigned long *__plock_l = (void *)(p); *__plock_l; })
|
|
|
|
/* dereferences <*p> as unsigned int without causing aliasing issues */
|
|
#define pl_deref_int(p) ({ volatile unsigned int *__plock_i = (void *)(p); *__plock_i; })
|
|
|
|
/* request shared read access (R), return non-zero on success, otherwise 0 */
|
|
#define pl_try_r(lock) ( \
|
|
(sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
|
|
unsigned long ret = pl_deref_long(lock) & PLOCK64_WL_ANY; \
|
|
pl_barrier(); \
|
|
if (!__builtin_expect(ret, 0)) { \
|
|
ret = pl_xadd((lock), PLOCK64_RL_1) & PLOCK64_WL_ANY; \
|
|
if (__builtin_expect(ret, 0)) \
|
|
pl_sub((lock), PLOCK64_RL_1); \
|
|
} \
|
|
!ret; /* return value */ \
|
|
}) : (sizeof(*(lock)) == 4) ? ({ \
|
|
unsigned int ret = pl_deref_int(lock) & PLOCK32_WL_ANY; \
|
|
pl_barrier(); \
|
|
if (!__builtin_expect(ret, 0)) { \
|
|
ret = pl_xadd((lock), PLOCK32_RL_1) & PLOCK32_WL_ANY; \
|
|
if (__builtin_expect(ret, 0)) \
|
|
pl_sub((lock), PLOCK32_RL_1); \
|
|
} \
|
|
!ret; /* return value */ \
|
|
}) : ({ \
|
|
void __unsupported_argument_size_for_pl_try_r__(char *,int); \
|
|
__unsupported_argument_size_for_pl_try_r__(__FILE__,__LINE__); \
|
|
0; \
|
|
}) \
|
|
)
|
|
|
|
/* request shared read access (R) and wait for it */
|
|
#define pl_take_r(lock) \
|
|
do { \
|
|
while (__builtin_expect(pl_try_r(lock), 1) == 0) \
|
|
pl_cpu_relax(); \
|
|
} while (0)
|
|
|
|
/* release the read access (R) lock */
|
|
#define pl_drop_r(lock) ( \
|
|
(sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
|
|
pl_sub(lock, PLOCK64_RL_1); \
|
|
}) : (sizeof(*(lock)) == 4) ? ({ \
|
|
pl_sub(lock, PLOCK32_RL_1); \
|
|
}) : ({ \
|
|
void __unsupported_argument_size_for_pl_drop_r__(char *,int); \
|
|
__unsupported_argument_size_for_pl_drop_r__(__FILE__,__LINE__); \
|
|
}) \
|
|
)
|
|
|
|
/* request a seek access (S), return non-zero on success, otherwise 0 */
|
|
#define pl_try_s(lock) ( \
|
|
(sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
|
|
unsigned long ret = pl_deref_long(lock); \
|
|
pl_barrier(); \
|
|
if (!__builtin_expect(ret & (PLOCK64_WL_ANY | PLOCK64_SL_ANY), 0)) { \
|
|
ret = pl_xadd((lock), PLOCK64_SL_1 | PLOCK64_RL_1) & \
|
|
(PLOCK64_WL_ANY | PLOCK64_SL_ANY); \
|
|
if (__builtin_expect(ret, 0)) \
|
|
pl_sub((lock), PLOCK64_SL_1 | PLOCK64_RL_1); \
|
|
} \
|
|
!ret; /* return value */ \
|
|
}) : (sizeof(*(lock)) == 4) ? ({ \
|
|
unsigned int ret = pl_deref_int(lock); \
|
|
pl_barrier(); \
|
|
if (!__builtin_expect(ret & (PLOCK32_WL_ANY | PLOCK32_SL_ANY), 0)) { \
|
|
ret = pl_xadd((lock), PLOCK32_SL_1 | PLOCK32_RL_1) & \
|
|
(PLOCK32_WL_ANY | PLOCK32_SL_ANY); \
|
|
if (__builtin_expect(ret, 0)) \
|
|
pl_sub((lock), PLOCK32_SL_1 | PLOCK32_RL_1); \
|
|
} \
|
|
!ret; /* return value */ \
|
|
}) : ({ \
|
|
void __unsupported_argument_size_for_pl_try_s__(char *,int); \
|
|
__unsupported_argument_size_for_pl_try_s__(__FILE__,__LINE__); \
|
|
0; \
|
|
}) \
|
|
)
|
|
|
|
/* request a seek access (S) and wait for it */
|
|
#define pl_take_s(lock) \
|
|
do { \
|
|
while (__builtin_expect(pl_try_s(lock), 0) == 0) \
|
|
pl_cpu_relax(); \
|
|
} while (0)
|
|
|
|
/* release the seek access (S) lock */
|
|
#define pl_drop_s(lock) ( \
|
|
(sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
|
|
pl_sub(lock, PLOCK64_SL_1 + PLOCK64_RL_1); \
|
|
}) : (sizeof(*(lock)) == 4) ? ({ \
|
|
pl_sub(lock, PLOCK32_SL_1 + PLOCK32_RL_1); \
|
|
}) : ({ \
|
|
void __unsupported_argument_size_for_pl_drop_s__(char *,int); \
|
|
__unsupported_argument_size_for_pl_drop_s__(__FILE__,__LINE__); \
|
|
}) \
|
|
)
|
|
|
|
/* drop the S lock and go back to the R lock */
|
|
#define pl_stor(lock) ( \
|
|
(sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
|
|
pl_sub(lock, PLOCK64_SL_1); \
|
|
}) : (sizeof(*(lock)) == 4) ? ({ \
|
|
pl_sub(lock, PLOCK32_SL_1); \
|
|
}) : ({ \
|
|
void __unsupported_argument_size_for_pl_stor__(char *,int); \
|
|
__unsupported_argument_size_for_pl_stor__(__FILE__,__LINE__); \
|
|
}) \
|
|
)
|
|
|
|
/* take the W lock under the S lock */
|
|
#define pl_stow(lock) ( \
|
|
(sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
|
|
unsigned long ret = pl_xadd((lock), PLOCK64_WL_1); \
|
|
pl_barrier(); \
|
|
while ((ret & PLOCK64_RL_ANY) != PLOCK64_RL_1) \
|
|
ret = pl_deref_long(lock); \
|
|
}) : (sizeof(*(lock)) == 4) ? ({ \
|
|
unsigned int ret = pl_xadd((lock), PLOCK32_WL_1); \
|
|
pl_barrier(); \
|
|
while ((ret & PLOCK32_RL_ANY) != PLOCK32_RL_1) \
|
|
ret = pl_deref_int(lock); \
|
|
}) : ({ \
|
|
void __unsupported_argument_size_for_pl_stow__(char *,int); \
|
|
__unsupported_argument_size_for_pl_stow__(__FILE__,__LINE__); \
|
|
}) \
|
|
)
|
|
|
|
/* drop the W lock and go back to the S lock */
|
|
#define pl_wtos(lock) ( \
|
|
(sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
|
|
pl_sub(lock, PLOCK64_WL_1); \
|
|
}) : (sizeof(*(lock)) == 4) ? ({ \
|
|
pl_sub(lock, PLOCK32_WL_1); \
|
|
}) : ({ \
|
|
void __unsupported_argument_size_for_pl_wtos__(char *,int); \
|
|
__unsupported_argument_size_for_pl_wtos__(__FILE__,__LINE__); \
|
|
}) \
|
|
)
|
|
|
|
/* drop the W lock and go back to the R lock */
|
|
#define pl_wtor(lock) ( \
|
|
(sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
|
|
pl_sub(lock, PLOCK64_WL_1 | PLOCK64_SL_1); \
|
|
}) : (sizeof(*(lock)) == 4) ? ({ \
|
|
pl_sub(lock, PLOCK32_WL_1 | PLOCK32_SL_1); \
|
|
}) : ({ \
|
|
void __unsupported_argument_size_for_pl_wtor__(char *,int); \
|
|
__unsupported_argument_size_for_pl_wtor__(__FILE__,__LINE__); \
|
|
}) \
|
|
)
|
|
|
|
/* request a write access (W), return non-zero on success, otherwise 0.
|
|
*
|
|
* Below there is something important : by taking both W and S, we will cause
|
|
* an overflow of W at 4/5 of the maximum value that can be stored into W due
|
|
* to the fact that S is 2 bits, so we're effectively adding 5 to the word
|
|
* composed by W:S. But for all words multiple of 4 bits, the maximum value is
|
|
* multiple of 15 thus of 5. So the largest value we can store with all bits
|
|
* set to one will be met by adding 5, and then adding 5 again will place value
|
|
* 1 in W and value 0 in S, so we never leave W with 0. Also, even upon such an
|
|
* overflow, there's no risk to confuse it with an atomic lock because R is not
|
|
* null since it will not have overflown. For 32-bit locks, this situation
|
|
* happens when exactly 13108 threads try to grab the lock at once, W=1, S=0
|
|
* and R=13108. For 64-bit locks, it happens at 858993460 concurrent writers
|
|
* where W=1, S=0 and R=858993460.
|
|
*/
|
|
#define pl_try_w(lock) ( \
|
|
(sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
|
|
unsigned long ret = pl_deref_long(lock); \
|
|
pl_barrier(); \
|
|
if (!__builtin_expect(ret & (PLOCK64_WL_ANY | PLOCK64_SL_ANY), 0)) { \
|
|
ret = pl_xadd((lock), PLOCK64_WL_1 | PLOCK64_SL_1 | PLOCK64_RL_1); \
|
|
if (__builtin_expect(ret & (PLOCK64_WL_ANY | PLOCK64_SL_ANY), 0)) { \
|
|
/* a writer, seeker or atomic is present, let's leave */ \
|
|
pl_sub((lock), PLOCK64_WL_1 | PLOCK64_SL_1 | PLOCK64_RL_1); \
|
|
ret &= (PLOCK64_WL_ANY | PLOCK64_SL_ANY); /* return value */ \
|
|
} else { \
|
|
/* wait for all other readers to leave */ \
|
|
while (ret) \
|
|
ret = pl_deref_long(lock) - \
|
|
(PLOCK64_WL_1 | PLOCK64_SL_1 | PLOCK64_RL_1); \
|
|
ret = 0; \
|
|
} \
|
|
} \
|
|
!ret; /* return value */ \
|
|
}) : (sizeof(*(lock)) == 4) ? ({ \
|
|
unsigned int ret = pl_deref_int(lock); \
|
|
pl_barrier(); \
|
|
if (!__builtin_expect(ret & (PLOCK32_WL_ANY | PLOCK32_SL_ANY), 0)) { \
|
|
ret = pl_xadd((lock), PLOCK32_WL_1 | PLOCK32_SL_1 | PLOCK32_RL_1); \
|
|
if (__builtin_expect(ret & (PLOCK32_WL_ANY | PLOCK32_SL_ANY), 0)) { \
|
|
/* a writer, seeker or atomic is present, let's leave */ \
|
|
pl_sub((lock), PLOCK32_WL_1 | PLOCK32_SL_1 | PLOCK32_RL_1); \
|
|
ret &= (PLOCK32_WL_ANY | PLOCK32_SL_ANY); /* return value */ \
|
|
} else { \
|
|
/* wait for all other readers to leave */ \
|
|
while (ret) \
|
|
ret = pl_deref_int(lock) - \
|
|
(PLOCK32_WL_1 | PLOCK32_SL_1 | PLOCK32_RL_1); \
|
|
ret = 0; \
|
|
} \
|
|
} \
|
|
!ret; /* return value */ \
|
|
}) : ({ \
|
|
void __unsupported_argument_size_for_pl_try_w__(char *,int); \
|
|
__unsupported_argument_size_for_pl_try_w__(__FILE__,__LINE__); \
|
|
0; \
|
|
}) \
|
|
)
|
|
|
|
/* request a seek access (W) and wait for it */
|
|
#define pl_take_w(lock) \
|
|
do { \
|
|
while (__builtin_expect(pl_try_w(lock), 0) == 0) \
|
|
pl_cpu_relax(); \
|
|
} while (0)
|
|
|
|
/* drop the write (W) lock entirely */
|
|
#define pl_drop_w(lock) ( \
|
|
(sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
|
|
pl_sub(lock, PLOCK64_WL_1 | PLOCK64_SL_1 | PLOCK64_RL_1); \
|
|
}) : (sizeof(*(lock)) == 4) ? ({ \
|
|
pl_sub(lock, PLOCK32_WL_1 | PLOCK32_SL_1 | PLOCK32_RL_1); \
|
|
}) : ({ \
|
|
void __unsupported_argument_size_for_pl_drop_w__(char *,int); \
|
|
__unsupported_argument_size_for_pl_drop_w__(__FILE__,__LINE__); \
|
|
}) \
|
|
)
|
|
|
|
/* Try to upgrade from R to S, return non-zero on success, otherwise 0.
|
|
* This lock will fail if S or W are already held. In case of failure to grab
|
|
* the lock, it MUST NOT be retried without first dropping R, or it may never
|
|
* complete due to S waiting for R to leave before upgrading to W.
|
|
*/
|
|
#define pl_try_rtos(lock) ( \
|
|
(sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
|
|
unsigned long ret = pl_deref_long(lock); \
|
|
pl_barrier(); \
|
|
if (!__builtin_expect(ret & (PLOCK64_WL_ANY | PLOCK64_SL_ANY), 0)) { \
|
|
ret = pl_xadd((lock), PLOCK64_SL_1) & \
|
|
(PLOCK64_WL_ANY | PLOCK64_SL_ANY); \
|
|
if (__builtin_expect(ret, 0)) \
|
|
pl_sub((lock), PLOCK64_SL_1); \
|
|
} \
|
|
!ret; /* return value */ \
|
|
}) : (sizeof(*(lock)) == 4) ? ({ \
|
|
unsigned int ret = pl_deref_int(lock); \
|
|
pl_barrier(); \
|
|
if (!__builtin_expect(ret & (PLOCK32_WL_ANY | PLOCK32_SL_ANY), 0)) { \
|
|
ret = pl_xadd((lock), PLOCK32_SL_1) & \
|
|
(PLOCK32_WL_ANY | PLOCK32_SL_ANY); \
|
|
if (__builtin_expect(ret, 0)) \
|
|
pl_sub((lock), PLOCK32_SL_1); \
|
|
} \
|
|
!ret; /* return value */ \
|
|
}) : ({ \
|
|
void __unsupported_argument_size_for_pl_try_rtos__(char *,int); \
|
|
__unsupported_argument_size_for_pl_try_rtos__(__FILE__,__LINE__); \
|
|
0; \
|
|
}) \
|
|
)
|
|
|
|
|
|
/* request atomic write access (A), return non-zero on success, otherwise 0.
|
|
* It's a bit tricky as we only use the W bits for this and want to distinguish
|
|
* between other atomic users and regular lock users. We have to give up if an
|
|
* S lock appears. It's possible that such a lock stays hidden in the W bits
|
|
* after an overflow, but in this case R is still held, ensuring we stay in the
|
|
* loop until we discover the conflict. The lock only return successfully if all
|
|
* readers are gone (or converted to A).
|
|
*/
|
|
#define pl_try_a(lock) ( \
|
|
(sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
|
|
unsigned long ret = pl_deref_long(lock) & PLOCK64_SL_ANY; \
|
|
pl_barrier(); \
|
|
if (!__builtin_expect(ret, 0)) { \
|
|
ret = pl_xadd((lock), PLOCK64_WL_1); \
|
|
while (1) { \
|
|
if (__builtin_expect(ret & PLOCK64_SL_ANY, 0)) { \
|
|
pl_sub((lock), PLOCK64_WL_1); \
|
|
break; /* return !ret */ \
|
|
} \
|
|
ret &= PLOCK64_RL_ANY; \
|
|
if (!__builtin_expect(ret, 0)) \
|
|
break; /* return !ret */ \
|
|
ret = pl_deref_long(lock); \
|
|
} \
|
|
} \
|
|
!ret; /* return value */ \
|
|
}) : (sizeof(*(lock)) == 4) ? ({ \
|
|
unsigned int ret = pl_deref_int(lock) & PLOCK32_SL_ANY; \
|
|
pl_barrier(); \
|
|
if (!__builtin_expect(ret, 0)) { \
|
|
ret = pl_xadd((lock), PLOCK32_WL_1); \
|
|
while (1) { \
|
|
if (__builtin_expect(ret & PLOCK32_SL_ANY, 0)) { \
|
|
pl_sub((lock), PLOCK32_WL_1); \
|
|
break; /* return !ret */ \
|
|
} \
|
|
ret &= PLOCK32_RL_ANY; \
|
|
if (!__builtin_expect(ret, 0)) \
|
|
break; /* return !ret */ \
|
|
ret = pl_deref_int(lock); \
|
|
} \
|
|
} \
|
|
!ret; /* return value */ \
|
|
}) : ({ \
|
|
void __unsupported_argument_size_for_pl_try_a__(char *,int); \
|
|
__unsupported_argument_size_for_pl_try_a__(__FILE__,__LINE__); \
|
|
0; \
|
|
}) \
|
|
)
|
|
|
|
/* request atomic write access (A) and wait for it */
|
|
#define pl_take_a(lock) \
|
|
do { \
|
|
while (__builtin_expect(pl_try_a(lock), 1) == 0) \
|
|
pl_cpu_relax(); \
|
|
} while (0)
|
|
|
|
/* release atomic write access (A) lock */
|
|
#define pl_drop_a(lock) ( \
|
|
(sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
|
|
pl_sub(lock, PLOCK64_WL_1); \
|
|
}) : (sizeof(*(lock)) == 4) ? ({ \
|
|
pl_sub(lock, PLOCK32_WL_1); \
|
|
}) : ({ \
|
|
void __unsupported_argument_size_for_pl_drop_a__(char *,int); \
|
|
__unsupported_argument_size_for_pl_drop_a__(__FILE__,__LINE__); \
|
|
}) \
|
|
)
|
|
|
|
/* Try to upgrade from R to A, return non-zero on success, otherwise 0.
|
|
* This lock will fail if S is held or appears while waiting (typically due to
|
|
* a previous grab that was disguised as a W due to an overflow). In case of
|
|
* failure to grab the lock, it MUST NOT be retried without first dropping R,
|
|
* or it may never complete due to S waiting for R to leave before upgrading
|
|
* to W. The lock succeeds once there's no more R (ie all of them have either
|
|
* completed or were turned to A).
|
|
*/
|
|
#define pl_try_rtoa(lock) ( \
|
|
(sizeof(long) == 8 && sizeof(*(lock)) == 8) ? ({ \
|
|
unsigned long ret = pl_deref_long(lock) & PLOCK64_SL_ANY; \
|
|
pl_barrier(); \
|
|
if (!__builtin_expect(ret, 0)) { \
|
|
ret = pl_xadd((lock), PLOCK64_WL_1 - PLOCK64_RL_1); \
|
|
while (1) { \
|
|
if (__builtin_expect(ret & PLOCK64_SL_ANY, 0)) { \
|
|
pl_sub((lock), PLOCK64_WL_1 - PLOCK64_RL_1); \
|
|
break; /* return !ret */ \
|
|
} \
|
|
ret &= PLOCK64_RL_ANY; \
|
|
if (!__builtin_expect(ret, 0)) \
|
|
break; /* return !ret */ \
|
|
ret = pl_deref_long(lock); \
|
|
} \
|
|
} \
|
|
!ret; /* return value */ \
|
|
}) : (sizeof(*(lock)) == 4) ? ({ \
|
|
unsigned int ret = pl_deref_int(lock) & PLOCK32_SL_ANY; \
|
|
pl_barrier(); \
|
|
if (!__builtin_expect(ret, 0)) { \
|
|
ret = pl_xadd((lock), PLOCK32_WL_1 - PLOCK32_RL_1); \
|
|
while (1) { \
|
|
if (__builtin_expect(ret & PLOCK32_SL_ANY, 0)) { \
|
|
pl_sub((lock), PLOCK32_WL_1 - PLOCK32_RL_1); \
|
|
break; /* return !ret */ \
|
|
} \
|
|
ret &= PLOCK32_RL_ANY; \
|
|
if (!__builtin_expect(ret, 0)) \
|
|
break; /* return !ret */ \
|
|
ret = pl_deref_int(lock); \
|
|
} \
|
|
} \
|
|
!ret; /* return value */ \
|
|
}) : ({ \
|
|
void __unsupported_argument_size_for_pl_try_rtoa__(char *,int); \
|
|
__unsupported_argument_size_for_pl_try_rtoa__(__FILE__,__LINE__); \
|
|
0; \
|
|
}) \
|
|
)
|