CLEANUP: shctx: remove the different inter-process locking techniques

With a single process, we don't need to USE_PRIVATE_CACHE, USE_FUTEX
nor USE_PTHREAD_PSHARED anymore. Let's only keep the basic spinlock
to lock between threads.
This commit is contained in:
Willy Tarreau 2021-06-15 16:11:33 +02:00
parent b54ca70e7c
commit 6fd0450b47
6 changed files with 12 additions and 268 deletions

43
INSTALL
View File

@ -379,11 +379,8 @@ target. Common issues may include:
- clock_gettime() not found
=> your system needs USE_RT=1
- __sync_sub_and_fetch undefined in cache.o
=> your system needs either USE_PTHREAD_PSHARED=1 or USE_PRIVATE_CACHE=1
- many __sync_<something> errors in many files
=> your gcc is too old, build without threads and with private cache.
=> your gcc is too old, build without threads.
- many openssl errors
=> your OpenSSL version really is too old, do not enable OpenSSL
@ -505,44 +502,6 @@ to the compiler so that any build warning will trigger an error. This is the
recommended way to build when developing, and it is expected that contributed
patches were tested with ERR=1.
The SSL stack supports session cache synchronization between all running
processes. This involves some atomic operations and synchronization operations
which come in multiple flavors depending on the system and architecture :
Atomic operations :
- internal assembler versions for x86/x86_64 architectures
- gcc builtins for other architectures. Some architectures might not
be fully supported or might require a more recent version of gcc.
If your architecture is not supported, you willy have to either use
pthread if supported, or to disable the shared cache.
- pthread (posix threads). Pthreads are very common but inter-process
support is not that common, and some older operating systems did not
report an error when enabling multi-process mode, so they used to
silently fail, possibly causing crashes. Linux's implementation is
fine. OpenBSD doesn't support them and doesn't build. FreeBSD 9 builds
and reports an error at runtime, while certain older versions might
silently fail. Pthreads are enabled using USE_PTHREAD_PSHARED=1.
Synchronization operations :
- internal spinlock : this mode is OS-independent, light but will not
scale well to many processes. However, accesses to the session cache
are rare enough that this mode could certainly always be used. This
is the default mode.
- Futexes, which are Linux-specific highly scalable light weight mutexes
implemented in user-space with some limited assistance from the kernel.
This is the default on Linux 2.6 and above and is enabled by passing
USE_FUTEX=1
- pthread (posix threads). See above.
If none of these mechanisms is supported by your platform, you may need to
build with USE_PRIVATE_CACHE=1 to totally disable SSL cache sharing. Then it
is better not to run SSL on multiple processes. Note that you don't need these
features if you only intend to use multi-threading and never multi-process.
If you need to pass other defines, includes, libraries, etc... then please
check the Makefile to see which ones will be available in your case, and
use/override the USE_* variables from the Makefile.

View File

@ -22,9 +22,7 @@
# USE_PCRE2 : enable use of libpcre2 for regex.
# USE_PCRE2_JIT : enable JIT for faster regex on libpcre2
# USE_POLL : enable poll(). Automatic.
# USE_PRIVATE_CACHE : disable shared memory cache of ssl sessions.
# USE_THREAD : enable threads support.
# USE_PTHREAD_PSHARED : enable pthread process shared mutex on sslcache.
# USE_STATIC_PCRE : enable static libpcre. Recommended.
# USE_STATIC_PCRE2 : enable static libpcre2.
# USE_TPROXY : enable transparent proxy. Automatic.
@ -35,7 +33,6 @@
# USE_GETADDRINFO : use getaddrinfo() to resolve IPv6 host names.
# USE_OPENSSL : enable use of OpenSSL. Recommended, but see below.
# USE_LUA : enable Lua support.
# USE_FUTEX : enable use of futex on kernel 2.6. Automatic.
# USE_ACCEPT4 : enable use of accept4() on linux. Automatic.
# USE_CLOSEFROM : enable use of closefrom() on *bsd, solaris. Automatic.
# USE_PRCTL : enable use of prctl(). Automatic.
@ -308,10 +305,10 @@ LDFLAGS = $(ARCH_FLAGS) -g
# the reported build options.
use_opts = USE_EPOLL USE_KQUEUE USE_NETFILTER \
USE_PCRE USE_PCRE_JIT USE_PCRE2 USE_PCRE2_JIT USE_POLL \
USE_PRIVATE_CACHE USE_THREAD USE_PTHREAD_PSHARED USE_BACKTRACE \
USE_THREAD USE_BACKTRACE \
USE_STATIC_PCRE USE_STATIC_PCRE2 USE_TPROXY USE_LINUX_TPROXY \
USE_LINUX_SPLICE USE_LIBCRYPT USE_CRYPT_H \
USE_GETADDRINFO USE_OPENSSL USE_LUA USE_FUTEX USE_ACCEPT4 \
USE_GETADDRINFO USE_OPENSSL USE_LUA USE_ACCEPT4 \
USE_CLOSEFROM USE_ZLIB USE_SLZ USE_CPU_AFFINITY USE_TFO USE_NS \
USE_DL USE_RT USE_DEVICEATLAS USE_51DEGREES USE_WURFL USE_SYSTEMD \
USE_OBSOLETE_LINKER USE_PRCTL USE_THREAD_DUMP USE_EVPORTS USE_OT \
@ -353,7 +350,7 @@ endif
ifeq ($(TARGET),linux-glibc)
set_target_defaults = $(call default_opts, \
USE_POLL USE_TPROXY USE_LIBCRYPT USE_DL USE_RT USE_CRYPT_H USE_NETFILTER \
USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_FUTEX USE_LINUX_TPROXY \
USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_LINUX_TPROXY \
USE_ACCEPT4 USE_LINUX_SPLICE USE_PRCTL USE_THREAD_DUMP USE_NS USE_TFO \
USE_GETADDRINFO USE_BACKTRACE)
ifneq ($(shell echo __arm__/__aarch64__ | $(CC) -E -xc - | grep '^[^\#]'),__arm__/__aarch64__)
@ -365,7 +362,7 @@ endif
ifeq ($(TARGET),linux-glibc-legacy)
set_target_defaults = $(call default_opts, \
USE_POLL USE_TPROXY USE_LIBCRYPT USE_DL USE_RT USE_CRYPT_H USE_NETFILTER \
USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_FUTEX USE_LINUX_TPROXY \
USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_LINUX_TPROXY \
USE_ACCEPT4 USE_LINUX_SPLICE USE_PRCTL USE_THREAD_DUMP USE_GETADDRINFO)
endif
@ -373,7 +370,7 @@ endif
ifeq ($(TARGET),linux-musl)
set_target_defaults = $(call default_opts, \
USE_POLL USE_TPROXY USE_LIBCRYPT USE_DL USE_RT USE_CRYPT_H USE_NETFILTER \
USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_FUTEX USE_LINUX_TPROXY \
USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_LINUX_TPROXY \
USE_ACCEPT4 USE_LINUX_SPLICE USE_PRCTL USE_THREAD_DUMP USE_NS USE_TFO \
USE_GETADDRINFO)
ifneq ($(shell echo __arm__/__aarch64__ | $(CC) -E -xc - | grep '^[^\#]'),__arm__/__aarch64__)
@ -429,7 +426,7 @@ endif
# AIX 5.1 only
ifeq ($(TARGET),aix51)
set_target_defaults = $(call default_opts, \
USE_POLL USE_LIBCRYPT USE_OBSOLETE_LINKER USE_PRIVATE_CACHE)
USE_POLL USE_LIBCRYPT USE_OBSOLETE_LINKER)
TARGET_CFLAGS = -Dss_family=__ss_family -Dip6_hdr=ip6hdr -DSTEVENS_API -D_LINUX_SOURCE_COMPAT -Dunsetenv=my_unsetenv
DEBUG_CFLAGS =
endif
@ -593,13 +590,6 @@ OPTIONS_OBJS += src/quic_sock.o src/proto_quic.o src/xprt_quic.o src/quic_tls.o
src/quic_frame.o src/quic_cc.o src/quic_cc_newreno.o
endif
# The private cache option affect the way the shctx is built
ifeq ($(USE_PRIVATE_CACHE),)
ifneq ($(USE_PTHREAD_PSHARED),)
OPTIONS_LDFLAGS += -lpthread
endif
endif
ifneq ($(USE_LUA),)
check_lua_lib = $(shell echo "int main(){}" | $(CC) -o /dev/null -x c - $(2) -l$(1) 2>/dev/null && echo $(1))
check_lua_inc = $(shell if [ -d $(2)$(1) ]; then echo $(2)$(1); fi;)

View File

@ -14,9 +14,6 @@
#ifndef __HAPROXY_SHCTX_T_H
#define __HAPROXY_SHCTX_T_H
#if !defined (USE_PRIVATE_CACHE) && defined(USE_PTHREAD_PSHARED)
#include <pthread.h>
#endif
#include <haproxy/api-t.h>
#include <haproxy/thread-t.h>
@ -49,15 +46,7 @@ struct shared_block {
};
struct shared_context {
#ifndef USE_PRIVATE_CACHE
#ifdef USE_PTHREAD_PSHARED
pthread_mutex_t mutex;
#else
unsigned int waiters;
#endif
#else
__decl_thread(HA_SPINLOCK_T lock); // used when USE_PRIVATE_CACHE=1
#endif
__decl_thread(HA_SPINLOCK_T lock);
struct list avail; /* list for active and free blocks */
struct list hot; /* list for locked blocks */
unsigned int nbav; /* number of available blocks */

View File

@ -17,20 +17,7 @@
#include <haproxy/api.h>
#include <haproxy/list.h>
#include <haproxy/shctx-t.h>
#ifndef USE_PRIVATE_CACHE
#ifdef USE_PTHREAD_PSHARED
#include <pthread.h>
#else
#ifdef USE_SYSCALL_FUTEX
#include <unistd.h>
#include <linux/futex.h>
#include <sys/syscall.h>
#endif
#endif
#else
#include <haproxy/thread.h>
#endif
int shctx_init(struct shared_context **orig_shctx,
int maxblocks, int blocksize, unsigned int maxobjsz,
@ -48,143 +35,11 @@ int shctx_row_data_get(struct shared_context *shctx, struct shared_block *first,
/* Lock functions */
#if defined (USE_PRIVATE_CACHE)
extern int use_shared_mem;
#define shctx_lock(shctx) if (use_shared_mem) HA_SPIN_LOCK(SHCTX_LOCK, &shctx->lock)
#define shctx_unlock(shctx) if (use_shared_mem) HA_SPIN_UNLOCK(SHCTX_LOCK, &shctx->lock)
#elif defined (USE_PTHREAD_PSHARED)
extern int use_shared_mem;
#define shctx_lock(shctx) if (use_shared_mem) pthread_mutex_lock(&shctx->mutex)
#define shctx_unlock(shctx) if (use_shared_mem) pthread_mutex_unlock(&shctx->mutex)
#else
extern int use_shared_mem;
#ifdef USE_SYSCALL_FUTEX
static inline void _shctx_wait4lock(unsigned int *count, unsigned int *uaddr, int value)
{
syscall(SYS_futex, uaddr, FUTEX_WAIT, value, NULL, 0, 0);
}
static inline void _shctx_awakelocker(unsigned int *uaddr)
{
syscall(SYS_futex, uaddr, FUTEX_WAKE, 1, NULL, 0, 0);
}
#else /* internal spin lock */
#if defined (__i486__) || defined (__i586__) || defined (__i686__) || defined (__x86_64__)
static inline void relax()
{
__asm volatile("rep;nop\n" ::: "memory");
}
#else /* if no x86_64 or i586 arch: use less optimized but generic asm */
static inline void relax()
{
__asm volatile("" ::: "memory");
}
#endif
static inline void _shctx_wait4lock(unsigned int *count, unsigned int *uaddr, int value)
{
int i;
for (i = 0; i < *count; i++) {
relax();
relax();
if (*uaddr != value)
return;
}
*count = (unsigned char)((*count << 1) + 1);
}
#define _shctx_awakelocker(a)
#endif
#if defined (__i486__) || defined (__i586__) || defined (__i686__) || defined (__x86_64__)
static inline unsigned int xchg(unsigned int *ptr, unsigned int x)
{
__asm volatile("lock xchgl %0,%1"
: "=r" (x), "+m" (*ptr)
: "0" (x)
: "memory");
return x;
}
static inline unsigned int cmpxchg(unsigned int *ptr, unsigned int old, unsigned int new)
{
unsigned int ret;
__asm volatile("lock cmpxchgl %2,%1"
: "=a" (ret), "+m" (*ptr)
: "r" (new), "0" (old)
: "memory");
return ret;
}
static inline unsigned char atomic_dec(unsigned int *ptr)
{
unsigned char ret;
__asm volatile("lock decl %0\n"
"setne %1\n"
: "+m" (*ptr), "=qm" (ret)
:
: "memory");
return ret;
}
#else /* if no x86_64 or i586 arch: use less optimized gcc >= 4.1 built-ins */
static inline unsigned int xchg(unsigned int *ptr, unsigned int x)
{
return __sync_lock_test_and_set(ptr, x);
}
static inline unsigned int cmpxchg(unsigned int *ptr, unsigned int old, unsigned int new)
{
return __sync_val_compare_and_swap(ptr, old, new);
}
static inline unsigned char atomic_dec(unsigned int *ptr)
{
return __sync_sub_and_fetch(ptr, 1) ? 1 : 0;
}
#endif
static inline void _shctx_lock(struct shared_context *shctx)
{
unsigned int x;
unsigned int count = 3;
x = cmpxchg(&shctx->waiters, 0, 1);
if (x) {
if (x != 2)
x = xchg(&shctx->waiters, 2);
while (x) {
_shctx_wait4lock(&count, &shctx->waiters, 2);
x = xchg(&shctx->waiters, 2);
}
}
}
static inline void _shctx_unlock(struct shared_context *shctx)
{
if (atomic_dec(&shctx->waiters)) {
shctx->waiters = 0;
_shctx_awakelocker(&shctx->waiters);
}
}
#define shctx_lock(shctx) if (use_shared_mem) _shctx_lock(shctx)
#define shctx_unlock(shctx) if (use_shared_mem) _shctx_unlock(shctx)
#endif
/* List Macros */

View File

@ -1998,21 +1998,6 @@ static void init(int argc, char **argv)
exit(1);
}
/* recompute the amount of per-process memory depending on
* the shared SSL cache size
*/
if (global.rlimit_memmax_all) {
#if defined (USE_OPENSSL) && !defined(USE_PRIVATE_CACHE)
int64_t ssl_cache_bytes = global.tune.sslcachesize * 200LL;
global.rlimit_memmax =
((((int64_t)global.rlimit_memmax_all * 1048576LL) - ssl_cache_bytes) +
ssl_cache_bytes + 1048575LL) / 1048576LL;
#else
global.rlimit_memmax = global.rlimit_memmax_all;
#endif
}
#ifdef USE_NS
err_code |= netns_init();
if (err_code & (ERR_ABORT|ERR_FATAL)) {

View File

@ -292,9 +292,6 @@ int shctx_init(struct shared_context **orig_shctx, int maxblocks, int blocksize,
int i;
struct shared_context *shctx;
int ret;
#ifdef USE_PTHREAD_PSHARED
pthread_mutexattr_t attr;
#endif
void *cur;
int maptype = MAP_PRIVATE;
@ -305,8 +302,10 @@ int shctx_init(struct shared_context **orig_shctx, int maxblocks, int blocksize,
blocksize = (blocksize + sizeof(void *) - 1) & -sizeof(void *);
extra = (extra + sizeof(void *) - 1) & -sizeof(void *);
if (shared)
if (shared) {
maptype = MAP_SHARED;
use_shared_mem = 1;
}
shctx = (struct shared_context *)mmap(NULL, sizeof(struct shared_context) + extra + (maxblocks * (sizeof(struct shared_block) + blocksize)),
PROT_READ | PROT_WRITE, maptype | MAP_ANON, -1, 0);
@ -316,42 +315,9 @@ int shctx_init(struct shared_context **orig_shctx, int maxblocks, int blocksize,
goto err;
}
HA_SPIN_INIT(&shctx->lock);
shctx->nbav = 0;
if (maptype == MAP_SHARED) {
#ifndef USE_PRIVATE_CACHE
#ifdef USE_PTHREAD_PSHARED
if (pthread_mutexattr_init(&attr)) {
munmap(shctx, sizeof(struct shared_context) + extra + (maxblocks * (sizeof(struct shared_block) + blocksize)));
shctx = NULL;
ret = SHCTX_E_INIT_LOCK;
goto err;
}
if (pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED)) {
pthread_mutexattr_destroy(&attr);
munmap(shctx, sizeof(struct shared_context) + extra + (maxblocks * (sizeof(struct shared_block) + blocksize)));
shctx = NULL;
ret = SHCTX_E_INIT_LOCK;
goto err;
}
if (pthread_mutex_init(&shctx->mutex, &attr)) {
pthread_mutexattr_destroy(&attr);
munmap(shctx, sizeof(struct shared_context) + extra + (maxblocks * (sizeof(struct shared_block) + blocksize)));
shctx = NULL;
ret = SHCTX_E_INIT_LOCK;
goto err;
}
#else
shctx->waiters = 0;
#endif
#else
HA_SPIN_INIT(&shctx->lock);
#endif
use_shared_mem = 1;
}
LIST_INIT(&shctx->avail);
LIST_INIT(&shctx->hot);