CLEANUP: shctx: remove the different inter-process locking techniques

With a single process, we don't need to USE_PRIVATE_CACHE, USE_FUTEX nor USE_PTHREAD_PSHARED anymore. Let's only keep the basic spinlock to lock between threads.
2025-02-09 14:58:25 +00:00 · 2021-06-15 16:11:33 +02:00 · 2021-06-15 16:11:33 +02:00 · 6fd0450b47
commit 6fd0450b47
parent b54ca70e7c
6 changed files with 12 additions and 268 deletions
--- a/43
+++ b/43
@ -379,11 +379,8 @@ target. Common issues may include:
  - clock_gettime() not found
    => your system needs USE_RT=1

-  - __sync_sub_and_fetch undefined in cache.o
-    => your system needs either USE_PTHREAD_PSHARED=1 or USE_PRIVATE_CACHE=1
-
  - many __sync_<something> errors in many files
-    => your gcc is too old, build without threads and with private cache.
+    => your gcc is too old, build without threads.

  - many openssl errors
    => your OpenSSL version really is too old, do not enable OpenSSL
@ -505,44 +502,6 @@ to the compiler so that any build warning will trigger an error. This is the
 recommended way to build when developing, and it is expected that contributed
 patches were tested with ERR=1.

-The SSL stack supports session cache synchronization between all running
-processes. This involves some atomic operations and synchronization operations
-which come in multiple flavors depending on the system and architecture :
-
-  Atomic operations :
-    - internal assembler versions for x86/x86_64 architectures
-
-    - gcc builtins for other architectures. Some architectures might not
-      be fully supported or might require a more recent version of gcc.
-      If your architecture is not supported, you willy have to either use
-      pthread if supported, or to disable the shared cache.
-
-    - pthread (posix threads). Pthreads are very common but inter-process
-      support is not that common, and some older operating systems did not
-      report an error when enabling multi-process mode, so they used to
-      silently fail, possibly causing crashes. Linux's implementation is
-      fine. OpenBSD doesn't support them and doesn't build. FreeBSD 9 builds
-      and reports an error at runtime, while certain older versions might
-      silently fail. Pthreads are enabled using USE_PTHREAD_PSHARED=1.
-
-  Synchronization operations :
-    - internal spinlock : this mode is OS-independent, light but will not
-      scale well to many processes. However, accesses to the session cache
-      are rare enough that this mode could certainly always be used. This
-      is the default mode.
-
-    - Futexes, which are Linux-specific highly scalable light weight mutexes
-      implemented in user-space with some limited assistance from the kernel.
-      This is the default on Linux 2.6 and above and is enabled by passing
-      USE_FUTEX=1
-
-    - pthread (posix threads). See above.
-
-If none of these mechanisms is supported by your platform, you may need to
-build with USE_PRIVATE_CACHE=1 to totally disable SSL cache sharing. Then it
-is better not to run SSL on multiple processes. Note that you don't need these
-features if you only intend to use multi-threading and never multi-process.
-
 If you need to pass other defines, includes, libraries, etc... then please
 check the Makefile to see which ones will be available in your case, and
 use/override the USE_* variables from the Makefile.
--- a/22
+++ b/22
@ -22,9 +22,7 @@
 #   USE_PCRE2            : enable use of libpcre2 for regex.
 #   USE_PCRE2_JIT        : enable JIT for faster regex on libpcre2
 #   USE_POLL             : enable poll(). Automatic.
-#   USE_PRIVATE_CACHE    : disable shared memory cache of ssl sessions.
 #   USE_THREAD           : enable threads support.
-#   USE_PTHREAD_PSHARED  : enable pthread process shared mutex on sslcache.
 #   USE_STATIC_PCRE      : enable static libpcre. Recommended.
 #   USE_STATIC_PCRE2     : enable static libpcre2.
 #   USE_TPROXY           : enable transparent proxy. Automatic.
@ -35,7 +33,6 @@
 #   USE_GETADDRINFO      : use getaddrinfo() to resolve IPv6 host names.
 #   USE_OPENSSL          : enable use of OpenSSL. Recommended, but see below.
 #   USE_LUA              : enable Lua support.
-#   USE_FUTEX            : enable use of futex on kernel 2.6. Automatic.
 #   USE_ACCEPT4          : enable use of accept4() on linux. Automatic.
 #   USE_CLOSEFROM        : enable use of closefrom() on *bsd, solaris. Automatic.
 #   USE_PRCTL            : enable use of prctl(). Automatic.
@ -308,10 +305,10 @@ LDFLAGS = $(ARCH_FLAGS) -g
 # the reported build options.
 use_opts = USE_EPOLL USE_KQUEUE USE_NETFILTER                                 \
           USE_PCRE USE_PCRE_JIT USE_PCRE2 USE_PCRE2_JIT USE_POLL             \
-           USE_PRIVATE_CACHE USE_THREAD USE_PTHREAD_PSHARED USE_BACKTRACE     \
+           USE_THREAD USE_BACKTRACE                                           \
           USE_STATIC_PCRE USE_STATIC_PCRE2 USE_TPROXY USE_LINUX_TPROXY       \
           USE_LINUX_SPLICE USE_LIBCRYPT USE_CRYPT_H                          \
-           USE_GETADDRINFO USE_OPENSSL USE_LUA USE_FUTEX USE_ACCEPT4          \
+           USE_GETADDRINFO USE_OPENSSL USE_LUA USE_ACCEPT4                    \
           USE_CLOSEFROM USE_ZLIB USE_SLZ USE_CPU_AFFINITY USE_TFO USE_NS     \
           USE_DL USE_RT USE_DEVICEATLAS USE_51DEGREES USE_WURFL USE_SYSTEMD  \
           USE_OBSOLETE_LINKER USE_PRCTL USE_THREAD_DUMP USE_EVPORTS USE_OT   \
@ -353,7 +350,7 @@ endif
 ifeq ($(TARGET),linux-glibc)
  set_target_defaults = $(call default_opts, \
    USE_POLL USE_TPROXY USE_LIBCRYPT USE_DL USE_RT USE_CRYPT_H USE_NETFILTER  \
-    USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_FUTEX USE_LINUX_TPROXY          \
+    USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_LINUX_TPROXY                    \
    USE_ACCEPT4 USE_LINUX_SPLICE USE_PRCTL USE_THREAD_DUMP USE_NS USE_TFO     \
    USE_GETADDRINFO USE_BACKTRACE)
 ifneq ($(shell echo __arm__/__aarch64__ | $(CC) -E -xc - | grep '^[^\#]'),__arm__/__aarch64__)
@ -365,7 +362,7 @@ endif
 ifeq ($(TARGET),linux-glibc-legacy)
  set_target_defaults = $(call default_opts, \
    USE_POLL USE_TPROXY USE_LIBCRYPT USE_DL USE_RT USE_CRYPT_H USE_NETFILTER  \
-    USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_FUTEX USE_LINUX_TPROXY          \
+    USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_LINUX_TPROXY                    \
    USE_ACCEPT4 USE_LINUX_SPLICE USE_PRCTL USE_THREAD_DUMP USE_GETADDRINFO)
 endif

@ -373,7 +370,7 @@ endif
 ifeq ($(TARGET),linux-musl)
  set_target_defaults = $(call default_opts, \
    USE_POLL USE_TPROXY USE_LIBCRYPT USE_DL USE_RT USE_CRYPT_H USE_NETFILTER  \
-    USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_FUTEX USE_LINUX_TPROXY          \
+    USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_LINUX_TPROXY                    \
    USE_ACCEPT4 USE_LINUX_SPLICE USE_PRCTL USE_THREAD_DUMP USE_NS USE_TFO     \
    USE_GETADDRINFO)
 ifneq ($(shell echo __arm__/__aarch64__ | $(CC) -E -xc - | grep '^[^\#]'),__arm__/__aarch64__)
@ -429,7 +426,7 @@ endif
 # AIX 5.1 only
 ifeq ($(TARGET),aix51)
  set_target_defaults = $(call default_opts, \
-    USE_POLL USE_LIBCRYPT USE_OBSOLETE_LINKER USE_PRIVATE_CACHE)
+    USE_POLL USE_LIBCRYPT USE_OBSOLETE_LINKER)
  TARGET_CFLAGS   = -Dss_family=__ss_family -Dip6_hdr=ip6hdr -DSTEVENS_API -D_LINUX_SOURCE_COMPAT -Dunsetenv=my_unsetenv
  DEBUG_CFLAGS    =
 endif
@ -593,13 +590,6 @@ OPTIONS_OBJS += src/quic_sock.o src/proto_quic.o src/xprt_quic.o src/quic_tls.o
                src/quic_frame.o src/quic_cc.o src/quic_cc_newreno.o
 endif

-# The private cache option affect the way the shctx is built
-ifeq ($(USE_PRIVATE_CACHE),)
-ifneq ($(USE_PTHREAD_PSHARED),)
-OPTIONS_LDFLAGS += -lpthread
-endif
-endif
-
 ifneq ($(USE_LUA),)
 check_lua_lib = $(shell echo "int main(){}" | $(CC) -o /dev/null -x c - $(2) -l$(1) 2>/dev/null && echo $(1))
 check_lua_inc = $(shell if [ -d $(2)$(1) ]; then echo $(2)$(1); fi;)
--- a/include/haproxy/shctx-t.h
+++ b/include/haproxy/shctx-t.h
@ -14,9 +14,6 @@
 #ifndef __HAPROXY_SHCTX_T_H
 #define __HAPROXY_SHCTX_T_H

-#if !defined (USE_PRIVATE_CACHE) && defined(USE_PTHREAD_PSHARED)
-#include <pthread.h>
-#endif
 #include <haproxy/api-t.h>
 #include <haproxy/thread-t.h>

@ -49,15 +46,7 @@ struct shared_block {
 };

 struct shared_context {
-#ifndef USE_PRIVATE_CACHE
-#ifdef USE_PTHREAD_PSHARED
-	pthread_mutex_t mutex;
-#else
-	unsigned int waiters;
-#endif
-#else
-	__decl_thread(HA_SPINLOCK_T lock);  // used when USE_PRIVATE_CACHE=1
-#endif
+	__decl_thread(HA_SPINLOCK_T lock);
 	struct list avail;  /* list for active and free blocks */
 	struct list hot;     /* list for locked blocks */
 	unsigned int nbav;  /* number of available blocks */
--- a/include/haproxy/shctx.h
+++ b/include/haproxy/shctx.h
@ -17,20 +17,7 @@
 #include <haproxy/api.h>
 #include <haproxy/list.h>
 #include <haproxy/shctx-t.h>
-
-#ifndef USE_PRIVATE_CACHE
-#ifdef USE_PTHREAD_PSHARED
-#include <pthread.h>
-#else
-#ifdef USE_SYSCALL_FUTEX
-#include <unistd.h>
-#include <linux/futex.h>
-#include <sys/syscall.h>
-#endif
-#endif
-#else
 #include <haproxy/thread.h>
-#endif

 int shctx_init(struct shared_context **orig_shctx,
               int maxblocks, int blocksize, unsigned int maxobjsz,
@ -48,143 +35,11 @@ int shctx_row_data_get(struct shared_context *shctx, struct shared_block *first,

 /* Lock functions */

-#if defined (USE_PRIVATE_CACHE)
 extern int use_shared_mem;

 #define shctx_lock(shctx)   if (use_shared_mem) HA_SPIN_LOCK(SHCTX_LOCK, &shctx->lock)
 #define shctx_unlock(shctx) if (use_shared_mem) HA_SPIN_UNLOCK(SHCTX_LOCK, &shctx->lock)

-#elif defined (USE_PTHREAD_PSHARED)
-extern int use_shared_mem;
-
-#define shctx_lock(shctx)   if (use_shared_mem) pthread_mutex_lock(&shctx->mutex)
-#define shctx_unlock(shctx) if (use_shared_mem) pthread_mutex_unlock(&shctx->mutex)
-
-#else
-extern int use_shared_mem;
-
-#ifdef USE_SYSCALL_FUTEX
-static inline void _shctx_wait4lock(unsigned int *count, unsigned int *uaddr, int value)
-{
-	syscall(SYS_futex, uaddr, FUTEX_WAIT, value, NULL, 0, 0);
-}
-
-static inline void _shctx_awakelocker(unsigned int *uaddr)
-{
-	syscall(SYS_futex, uaddr, FUTEX_WAKE, 1, NULL, 0, 0);
-}
-
-#else /* internal spin lock */
-
-#if defined (__i486__) || defined (__i586__) || defined (__i686__) || defined (__x86_64__)
-static inline void relax()
-{
-	__asm volatile("rep;nop\n" ::: "memory");
-}
-#else /* if no x86_64 or i586 arch: use less optimized but generic asm */
-static inline void relax()
-{
-	__asm volatile("" ::: "memory");
-}
-#endif
-
-static inline void _shctx_wait4lock(unsigned int *count, unsigned int *uaddr, int value)
-{
-        int i;
-
-        for (i = 0; i < *count; i++) {
-                relax();
-                relax();
-		if (*uaddr != value)
-			return;
-        }
-        *count = (unsigned char)((*count << 1) + 1);
-}
-
-#define _shctx_awakelocker(a)
-
-#endif
-
-#if defined (__i486__) || defined (__i586__) || defined (__i686__) || defined (__x86_64__)
-static inline unsigned int xchg(unsigned int *ptr, unsigned int x)
-{
-	__asm volatile("lock xchgl %0,%1"
-		     : "=r" (x), "+m" (*ptr)
-		     : "0" (x)
-		     : "memory");
-	return x;
-}
-
-static inline unsigned int cmpxchg(unsigned int *ptr, unsigned int old, unsigned int new)
-{
-	unsigned int ret;
-
-	__asm volatile("lock cmpxchgl %2,%1"
-		     : "=a" (ret), "+m" (*ptr)
-		     : "r" (new), "0" (old)
-		     : "memory");
-	return ret;
-}
-
-static inline unsigned char atomic_dec(unsigned int *ptr)
-{
-	unsigned char ret;
-	__asm volatile("lock decl %0\n"
-		     "setne %1\n"
-		     : "+m" (*ptr), "=qm" (ret)
-		     :
-		     : "memory");
-	return ret;
-}
-
-#else /* if no x86_64 or i586 arch: use less optimized gcc >= 4.1 built-ins */
-static inline unsigned int xchg(unsigned int *ptr, unsigned int x)
-{
-	return __sync_lock_test_and_set(ptr, x);
-}
-
-static inline unsigned int cmpxchg(unsigned int *ptr, unsigned int old, unsigned int new)
-{
-	return __sync_val_compare_and_swap(ptr, old, new);
-}
-
-static inline unsigned char atomic_dec(unsigned int *ptr)
-{
-	return __sync_sub_and_fetch(ptr, 1) ? 1 : 0;
-}
-
-#endif
-
-static inline void _shctx_lock(struct shared_context *shctx)
-{
-	unsigned int x;
-	unsigned int count = 3;
-
-	x = cmpxchg(&shctx->waiters, 0, 1);
-	if (x) {
-		if (x != 2)
-			x = xchg(&shctx->waiters, 2);
-
-		while (x) {
-			_shctx_wait4lock(&count, &shctx->waiters, 2);
-			x = xchg(&shctx->waiters, 2);
-		}
-	}
-}
-
-static inline void _shctx_unlock(struct shared_context *shctx)
-{
-	if (atomic_dec(&shctx->waiters)) {
-		shctx->waiters = 0;
-		_shctx_awakelocker(&shctx->waiters);
-	}
-}
-
-#define shctx_lock(shctx)   if (use_shared_mem) _shctx_lock(shctx)
-
-#define shctx_unlock(shctx) if (use_shared_mem) _shctx_unlock(shctx)
-
-#endif

 /* List Macros */

--- a/src/haproxy.c
+++ b/src/haproxy.c
@ -1998,21 +1998,6 @@ static void init(int argc, char **argv)
 		exit(1);
 	}

-	/* recompute the amount of per-process memory depending on
-	 * the shared SSL cache size
-	 */
-	if (global.rlimit_memmax_all) {
-#if defined (USE_OPENSSL) && !defined(USE_PRIVATE_CACHE)
-		int64_t ssl_cache_bytes = global.tune.sslcachesize * 200LL;
-
-		global.rlimit_memmax =
-			((((int64_t)global.rlimit_memmax_all * 1048576LL) - ssl_cache_bytes) +
-			 ssl_cache_bytes + 1048575LL) / 1048576LL;
-#else
-		global.rlimit_memmax = global.rlimit_memmax_all;
-#endif
-	}
-
 #ifdef USE_NS
        err_code |= netns_init();
        if (err_code & (ERR_ABORT|ERR_FATAL)) {
--- a/src/shctx.c
+++ b/src/shctx.c
@ -292,9 +292,6 @@ int shctx_init(struct shared_context **orig_shctx, int maxblocks, int blocksize,
 	int i;
 	struct shared_context *shctx;
 	int ret;
-#ifdef USE_PTHREAD_PSHARED
-	pthread_mutexattr_t attr;
-#endif
 	void *cur;
 	int maptype = MAP_PRIVATE;

@ -305,8 +302,10 @@ int shctx_init(struct shared_context **orig_shctx, int maxblocks, int blocksize,
 	blocksize = (blocksize + sizeof(void *) - 1) & -sizeof(void *);
 	extra     = (extra     + sizeof(void *) - 1) & -sizeof(void *);

-	if (shared)
+	if (shared) {
 		maptype = MAP_SHARED;
+		use_shared_mem = 1;
+	}

 	shctx = (struct shared_context *)mmap(NULL, sizeof(struct shared_context) + extra + (maxblocks * (sizeof(struct shared_block) + blocksize)),
 	                                      PROT_READ | PROT_WRITE, maptype | MAP_ANON, -1, 0);
@ -316,42 +315,9 @@ int shctx_init(struct shared_context **orig_shctx, int maxblocks, int blocksize,
 		goto err;
 	}

+	HA_SPIN_INIT(&shctx->lock);
 	shctx->nbav = 0;

-	if (maptype == MAP_SHARED) {
-#ifndef USE_PRIVATE_CACHE
-#ifdef USE_PTHREAD_PSHARED
-		if (pthread_mutexattr_init(&attr)) {
-			munmap(shctx, sizeof(struct shared_context) + extra + (maxblocks * (sizeof(struct shared_block) + blocksize)));
-			shctx = NULL;
-			ret = SHCTX_E_INIT_LOCK;
-			goto err;
-		}
-
-		if (pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED)) {
-			pthread_mutexattr_destroy(&attr);
-			munmap(shctx, sizeof(struct shared_context) + extra + (maxblocks * (sizeof(struct shared_block) + blocksize)));
-			shctx = NULL;
-			ret = SHCTX_E_INIT_LOCK;
-			goto err;
-		}
-
-		if (pthread_mutex_init(&shctx->mutex, &attr)) {
-			pthread_mutexattr_destroy(&attr);
-			munmap(shctx, sizeof(struct shared_context) + extra + (maxblocks * (sizeof(struct shared_block) + blocksize)));
-			shctx = NULL;
-			ret = SHCTX_E_INIT_LOCK;
-			goto err;
-		}
-#else
-		shctx->waiters = 0;
-#endif
-#else
-		HA_SPIN_INIT(&shctx->lock);
-#endif
-		use_shared_mem = 1;
-	}
-
 	LIST_INIT(&shctx->avail);
 	LIST_INIT(&shctx->hot);