MINOR: pools: introduce the use of multiple buckets

On many threads and without the shared cache, there can be extreme
contention on the ->allocated counter, the ->free_list pointer, and
the ->used counter. It's possible to limit this contention by spreading
the counters a little bit over multiple entries, that are summed up when
a consultation is needed. The criterion used to spread the values cannot
be related to the thread ID due to migrations, since we need to keep
consistent stats (allocated vs used).

Instead we'll just hash the pointer, it provides an index that does the
job and that is consistent for the object. When having just a few entries
(16 here as it showed almost identical performance between global and
non-global pools) even iterations should be short enough during
measurements to not be a problem.

A pair of functions designed to ease pointer hash bucket calculation were
added, with one of them doing it for thread IDs because allocation failures
will be associated with a thread and not a pointer.

For now this patch only brings in the relevant parts of the infrastructure,
the CONFIG_HAP_POOL_BUCKETS_BITS macro that defaults to 6 bits when 512
threads or more are supported, 5 bits when 128 or more are supported, 4
bits when 16 or more are supported, otherwise 3 bits for small setups.
The array in the pool_head and the two utility functions are already
added. It should have no measurable impact beyond inflating the pool_head
structure.
This commit is contained in:
Willy Tarreau 2023-08-12 11:22:27 +02:00
parent 29ad61fb00
commit 06885aaea7
3 changed files with 45 additions and 0 deletions

View File

@ -454,6 +454,23 @@
#define CONFIG_HAP_POOL_CLUSTER_SIZE 8 #define CONFIG_HAP_POOL_CLUSTER_SIZE 8
#endif #endif
/* number of bits to encode the per-pool buckets for large setups */
#ifndef CONFIG_HAP_POOL_BUCKETS_BITS
# if defined(USE_THREAD) && MAX_THREADS >= 512
# define CONFIG_HAP_POOL_BUCKETS_BITS 6
# elif defined(USE_THREAD) && MAX_THREADS >= 128
# define CONFIG_HAP_POOL_BUCKETS_BITS 5
# elif defined(USE_THREAD) && MAX_THREADS >= 16
# define CONFIG_HAP_POOL_BUCKETS_BITS 4
# elif defined(USE_THREAD)
# define CONFIG_HAP_POOL_BUCKETS_BITS 3
# else
# define CONFIG_HAP_POOL_BUCKETS_BITS 0
# endif
#endif
#define CONFIG_HAP_POOL_BUCKETS (1UL << (CONFIG_HAP_POOL_BUCKETS_BITS))
/* Number of samples used to compute the times reported in stats. A power of /* Number of samples used to compute the times reported in stats. A power of
* two is highly recommended, and this value multiplied by the largest response * two is highly recommended, and this value multiplied by the largest response
* time must not overflow and unsigned int. See freq_ctr.h for more information. * time must not overflow and unsigned int. See freq_ctr.h for more information.

View File

@ -120,11 +120,21 @@ struct pool_head {
/* heavily read-write part */ /* heavily read-write part */
THREAD_ALIGN(64); THREAD_ALIGN(64);
struct pool_item *free_list; /* list of free shared objects */ struct pool_item *free_list; /* list of free shared objects */
unsigned int used; /* how many chunks are currently in use */ unsigned int used; /* how many chunks are currently in use */
unsigned int needed_avg;/* floating indicator between used and allocated */ unsigned int needed_avg;/* floating indicator between used and allocated */
unsigned int allocated; /* how many chunks have been allocated */ unsigned int allocated; /* how many chunks have been allocated */
unsigned int failed; /* failed allocations */ unsigned int failed; /* failed allocations */
/* these entries depend on the pointer value, they're used to reduce
* the contention on fast-changing values. The alignment here is
* important since the purpose is to lower the thread contention.
*/
struct {
THREAD_ALIGN(64);
} buckets[CONFIG_HAP_POOL_BUCKETS];
struct pool_cache_head cache[MAX_THREADS] THREAD_ALIGNED(64); /* pool caches */ struct pool_cache_head cache[MAX_THREADS] THREAD_ALIGNED(64); /* pool caches */
} __attribute__((aligned(64))); } __attribute__((aligned(64)));

View File

@ -110,6 +110,24 @@ static int disable_trim __read_mostly = 0;
static int(*my_mallctl)(const char *, void *, size_t *, void *, size_t) = NULL; static int(*my_mallctl)(const char *, void *, size_t *, void *, size_t) = NULL;
static int(*_malloc_trim)(size_t) = NULL; static int(*_malloc_trim)(size_t) = NULL;
/* returns the pool hash bucket an object should use based on its pointer.
* Objects will needed consistent bucket assignment so that they may be
* allocated on one thread and released on another one. Thus only the
* pointer is usable.
*/
static inline forceinline unsigned int pool_pbucket(const void *ptr)
{
return ptr_hash(ptr, CONFIG_HAP_POOL_BUCKETS_BITS);
}
/* returns the pool hash bucket to use for the current thread. This should only
* be used when no pointer is available (e.g. count alloc failures).
*/
static inline forceinline unsigned int pool_tbucket(void)
{
return tid % CONFIG_HAP_POOL_BUCKETS;
}
/* ask the allocator to trim memory pools. /* ask the allocator to trim memory pools.
* This must run under thread isolation so that competing threads trying to * This must run under thread isolation so that competing threads trying to
* allocate or release memory do not prevent the allocator from completing * allocate or release memory do not prevent the allocator from completing