mirror of
http://git.haproxy.org/git/haproxy.git/
synced 2025-03-18 17:15:06 +00:00
MEDIUM: pools: add CONFIG_HAP_NO_GLOBAL_POOLS and CONFIG_HAP_GLOBAL_POOLS
We've reached a point where the global pools represent a significant bottleneck with threads. On a 64-core machine, the performance was divided by 8 between 32 and 64 H2 connections only because there were not enough entries in the local caches to avoid picking from the global pools, and the contention on the list there was very high. It becomes obvious that we need to have an array of lists, but that will require more changes. In parallel, standard memory allocators have improved, with tcmalloc and jemalloc finding their ways through mainstream systems, and glibc having upgraded to a thread-aware ptmalloc variant, keeping this level of contention here isn't justified anymore when we have both the local per-thread pool caches and a fast process-wide allocator. For these reasons, this patch introduces a new compile time setting CONFIG_HAP_NO_GLOBAL_POOLS which is set by default when threads are enabled with thread local pool caches, and we know we have a fast thread-aware memory allocator (currently set for glibc>=2.26). In this case we entirely bypass the global pool and directly use the standard memory allocator when missing objects from the local pools. It is also possible to force it at compile time when a good allocator is used with another setup. It is still possible to re-enable the global pools using CONFIG_HAP_GLOBAL_POOLS, if a corner case is discovered regarding the operating system's default allocator, or when building with a recent libc but a different allocator which provides other benefits but does not scale well with threads.
This commit is contained in:
parent
566cebc1fc
commit
0bae075928
@ -249,6 +249,12 @@ typedef struct { } empty_t;
|
||||
#define HA_HAVE_MALLOC_TRIM
|
||||
#endif
|
||||
|
||||
/* glibc 2.26 includes a thread-local cache which makes it fast enough in threads */
|
||||
#if (defined(__GNU_LIBRARY__) && (__GLIBC__ > 2 || __GLIBC__ == 2 && __GLIBC_MINOR__ >= 26))
|
||||
#include <malloc.h>
|
||||
#define HA_HAVE_FAST_MALLOC
|
||||
#endif
|
||||
|
||||
/* Max number of file descriptors we send in one sendmsg(). Linux seems to be
|
||||
* able to send 253 fds per sendmsg(), not sure about the other OSes.
|
||||
*/
|
||||
|
@ -40,6 +40,16 @@
|
||||
#define CONFIG_HAP_LOCAL_POOLS
|
||||
#endif
|
||||
|
||||
/* On modern architectures with many threads, a fast memory allocator, and
|
||||
* local pools, the global pools with their single list can be way slower than
|
||||
* the standard allocator which already has its own per-thread arenas. In this
|
||||
* case we disable global pools. The global pools may still be enforced
|
||||
* using CONFIG_HAP_GLOBAL_POOLS though.
|
||||
*/
|
||||
#if defined(USE_THREAD) && defined(HA_HAVE_FAST_MALLOC) && defined(CONFIG_HAP_LOCAL_POOLS) && !defined(CONFIG_HAP_GLOBAL_POOLS)
|
||||
#define CONFIG_HAP_NO_GLOBAL_POOLS
|
||||
#endif
|
||||
|
||||
/* Pools of very similar size are shared by default, unless macro
|
||||
* DEBUG_DONT_SHARE_POOLS is set.
|
||||
*/
|
||||
|
@ -150,7 +150,25 @@ static inline void pool_put_to_cache(struct pool_head *pool, void *ptr, ssize_t
|
||||
#endif // CONFIG_HAP_LOCAL_POOLS
|
||||
|
||||
|
||||
#ifdef CONFIG_HAP_LOCKLESS_POOLS
|
||||
#if defined(CONFIG_HAP_NO_GLOBAL_POOLS)
|
||||
|
||||
/* this is essentially used with local caches and a fast malloc library,
|
||||
* which may sometimes be faster than the local shared pools because it
|
||||
* will maintain its own per-thread arenas.
|
||||
*/
|
||||
static inline void *__pool_get_first(struct pool_head *pool)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void __pool_free(struct pool_head *pool, void *ptr)
|
||||
{
|
||||
_HA_ATOMIC_SUB(&pool->used, 1);
|
||||
_HA_ATOMIC_SUB(&pool->allocated, 1);
|
||||
pool_free_area(ptr, pool->size + POOL_EXTRA);
|
||||
}
|
||||
|
||||
#elif defined(CONFIG_HAP_LOCKLESS_POOLS)
|
||||
|
||||
/****************** Lockless pools implementation ******************/
|
||||
|
||||
@ -274,11 +292,11 @@ static inline void *pool_get_first(struct pool_head *pool)
|
||||
return p;
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_HAP_LOCKLESS_POOLS
|
||||
#if !defined(CONFIG_HAP_LOCKLESS_POOLS) && !defined(CONFIG_HAP_NO_GLOBAL_POOLS)
|
||||
HA_SPIN_LOCK(POOL_LOCK, &pool->lock);
|
||||
#endif
|
||||
p = __pool_get_first(pool);
|
||||
#ifndef CONFIG_HAP_LOCKLESS_POOLS
|
||||
#if !defined(CONFIG_HAP_LOCKLESS_POOLS) && !defined(CONFIG_HAP_NO_GLOBAL_POOLS)
|
||||
HA_SPIN_UNLOCK(POOL_LOCK, &pool->lock);
|
||||
#endif
|
||||
return p;
|
||||
@ -298,12 +316,12 @@ static inline void *pool_alloc_dirty(struct pool_head *pool)
|
||||
return p;
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_HAP_LOCKLESS_POOLS
|
||||
#if !defined(CONFIG_HAP_LOCKLESS_POOLS) && !defined(CONFIG_HAP_NO_GLOBAL_POOLS)
|
||||
HA_SPIN_LOCK(POOL_LOCK, &pool->lock);
|
||||
#endif
|
||||
if ((p = __pool_get_first(pool)) == NULL)
|
||||
p = __pool_refill_alloc(pool, 0);
|
||||
#ifndef CONFIG_HAP_LOCKLESS_POOLS
|
||||
#if !defined(CONFIG_HAP_LOCKLESS_POOLS) && !defined(CONFIG_HAP_NO_GLOBAL_POOLS)
|
||||
HA_SPIN_UNLOCK(POOL_LOCK, &pool->lock);
|
||||
#endif
|
||||
return p;
|
||||
|
58
src/pool.c
58
src/pool.c
@ -170,7 +170,63 @@ void pool_evict_from_cache()
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_HAP_LOCKLESS_POOLS
|
||||
#if defined(CONFIG_HAP_NO_GLOBAL_POOLS)
|
||||
|
||||
/* simply fall back on the default OS' allocator */
|
||||
|
||||
void *__pool_refill_alloc(struct pool_head *pool, unsigned int avail)
|
||||
{
|
||||
int allocated = pool->allocated;
|
||||
int limit = pool->limit;
|
||||
void *ptr = NULL;
|
||||
|
||||
if (limit && allocated >= limit) {
|
||||
_HA_ATOMIC_ADD(&pool->allocated, 1);
|
||||
activity[tid].pool_fail++;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ptr = pool_alloc_area(pool->size + POOL_EXTRA);
|
||||
if (!ptr) {
|
||||
_HA_ATOMIC_ADD(&pool->failed, 1);
|
||||
activity[tid].pool_fail++;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
_HA_ATOMIC_ADD(&pool->allocated, 1);
|
||||
_HA_ATOMIC_ADD(&pool->used, 1);
|
||||
|
||||
#ifdef DEBUG_MEMORY_POOLS
|
||||
/* keep track of where the element was allocated from */
|
||||
*POOL_LINK(pool, ptr) = (void *)pool;
|
||||
#endif
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/* legacy stuff */
|
||||
void *pool_refill_alloc(struct pool_head *pool, unsigned int avail)
|
||||
{
|
||||
void *ptr;
|
||||
|
||||
ptr = __pool_refill_alloc(pool, avail);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/* legacy stuff */
|
||||
void pool_flush(struct pool_head *pool)
|
||||
{
|
||||
}
|
||||
|
||||
/* This function might ask the malloc library to trim its buffers. */
|
||||
void pool_gc(struct pool_head *pool_ctx)
|
||||
{
|
||||
#if defined(HA_HAVE_MALLOC_TRIM)
|
||||
malloc_trim(0);
|
||||
#endif
|
||||
}
|
||||
|
||||
#elif defined(CONFIG_HAP_LOCKLESS_POOLS)
|
||||
|
||||
/* Allocates new entries for pool <pool> until there are at least <avail> + 1
|
||||
* available, then returns the last one for immediate use, so that at least
|
||||
* <avail> are left available in the pool upon return. NULL is returned if the
|
||||
|
Loading…
Reference in New Issue
Block a user