From 06885aaea72bc8b70c6faea5b3a81de1c8f4225a Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sat, 12 Aug 2023 11:22:27 +0200 Subject: [PATCH] MINOR: pools: introduce the use of multiple buckets On many threads and without the shared cache, there can be extreme contention on the ->allocated counter, the ->free_list pointer, and the ->used counter. It's possible to limit this contention by spreading the counters a little bit over multiple entries, that are summed up when a consultation is needed. The criterion used to spread the values cannot be related to the thread ID due to migrations, since we need to keep consistent stats (allocated vs used). Instead we'll just hash the pointer, it provides an index that does the job and that is consistent for the object. When having just a few entries (16 here as it showed almost identical performance between global and non-global pools) even iterations should be short enough during measurements to not be a problem. A pair of functions designed to ease pointer hash bucket calculation were added, with one of them doing it for thread IDs because allocation failures will be associated with a thread and not a pointer. For now this patch only brings in the relevant parts of the infrastructure, the CONFIG_HAP_POOL_BUCKETS_BITS macro that defaults to 6 bits when 512 threads or more are supported, 5 bits when 128 or more are supported, 4 bits when 16 or more are supported, otherwise 3 bits for small setups. The array in the pool_head and the two utility functions are already added. It should have no measurable impact beyond inflating the pool_head structure. --- include/haproxy/defaults.h | 17 +++++++++++++++++ include/haproxy/pool-t.h | 10 ++++++++++ src/pool.c | 18 ++++++++++++++++++ 3 files changed, 45 insertions(+) diff --git a/include/haproxy/defaults.h b/include/haproxy/defaults.h index 898ebd15b..64b8cbf12 100644 --- a/include/haproxy/defaults.h +++ b/include/haproxy/defaults.h @@ -454,6 +454,23 @@ #define CONFIG_HAP_POOL_CLUSTER_SIZE 8 #endif +/* number of bits to encode the per-pool buckets for large setups */ +#ifndef CONFIG_HAP_POOL_BUCKETS_BITS +# if defined(USE_THREAD) && MAX_THREADS >= 512 +# define CONFIG_HAP_POOL_BUCKETS_BITS 6 +# elif defined(USE_THREAD) && MAX_THREADS >= 128 +# define CONFIG_HAP_POOL_BUCKETS_BITS 5 +# elif defined(USE_THREAD) && MAX_THREADS >= 16 +# define CONFIG_HAP_POOL_BUCKETS_BITS 4 +# elif defined(USE_THREAD) +# define CONFIG_HAP_POOL_BUCKETS_BITS 3 +# else +# define CONFIG_HAP_POOL_BUCKETS_BITS 0 +# endif +#endif + +#define CONFIG_HAP_POOL_BUCKETS (1UL << (CONFIG_HAP_POOL_BUCKETS_BITS)) + /* Number of samples used to compute the times reported in stats. A power of * two is highly recommended, and this value multiplied by the largest response * time must not overflow and unsigned int. See freq_ctr.h for more information. diff --git a/include/haproxy/pool-t.h b/include/haproxy/pool-t.h index ff6773ca9..f3aa0ed0b 100644 --- a/include/haproxy/pool-t.h +++ b/include/haproxy/pool-t.h @@ -120,11 +120,21 @@ struct pool_head { /* heavily read-write part */ THREAD_ALIGN(64); + struct pool_item *free_list; /* list of free shared objects */ unsigned int used; /* how many chunks are currently in use */ unsigned int needed_avg;/* floating indicator between used and allocated */ unsigned int allocated; /* how many chunks have been allocated */ unsigned int failed; /* failed allocations */ + + /* these entries depend on the pointer value, they're used to reduce + * the contention on fast-changing values. The alignment here is + * important since the purpose is to lower the thread contention. + */ + struct { + THREAD_ALIGN(64); + } buckets[CONFIG_HAP_POOL_BUCKETS]; + struct pool_cache_head cache[MAX_THREADS] THREAD_ALIGNED(64); /* pool caches */ } __attribute__((aligned(64))); diff --git a/src/pool.c b/src/pool.c index 47c07c96c..4f49be929 100644 --- a/src/pool.c +++ b/src/pool.c @@ -110,6 +110,24 @@ static int disable_trim __read_mostly = 0; static int(*my_mallctl)(const char *, void *, size_t *, void *, size_t) = NULL; static int(*_malloc_trim)(size_t) = NULL; +/* returns the pool hash bucket an object should use based on its pointer. + * Objects will needed consistent bucket assignment so that they may be + * allocated on one thread and released on another one. Thus only the + * pointer is usable. + */ +static inline forceinline unsigned int pool_pbucket(const void *ptr) +{ + return ptr_hash(ptr, CONFIG_HAP_POOL_BUCKETS_BITS); +} + +/* returns the pool hash bucket to use for the current thread. This should only + * be used when no pointer is available (e.g. count alloc failures). + */ +static inline forceinline unsigned int pool_tbucket(void) +{ + return tid % CONFIG_HAP_POOL_BUCKETS; +} + /* ask the allocator to trim memory pools. * This must run under thread isolation so that competing threads trying to * allocate or release memory do not prevent the allocator from completing