MEDIUM: pools: implement a thread-local cache for pool entries

Each thread now keeps the last ~512 kB of freed objects into a local
cache. There are some heuristics involved so that a specific pool cannot
use more than 1/8 of the total cache in number of objects. Tests have
shown that 512 kB is an optimal size on a 24-thread test running on a
dual-socket machine, resulting in an overall 7.5% performance increase
and a cache miss ratio reducing from 19.2 to 17.7%. Anyway it seems
pointless to keep more than an L2 cache, which probably explains why
sizes between 256 and 512 kB are optimal.

Cached objects appear in two lists, one per pool and one LRU to help
with fair eviction. Currently there is no way to check each thread's
cache state nor to flush it. This cache cannot be disabled and is
enabled as soon as the lockless pools are enabled (i.e.: threads are
enabled, no pool debugging is in use and the CPU supports a double word
CAS).
This commit is contained in:
Willy Tarreau 2018-10-16 10:28:54 +02:00
parent 146794dc4f
commit e18db9e984
4 changed files with 126 additions and 2 deletions

View File

@ -141,10 +141,17 @@ static inline void b_free(struct buffer *buf)
static inline struct buffer *b_alloc_margin(struct buffer *buf, int margin)
{
char *area;
ssize_t idx;
unsigned int cached;
if (buf->size)
return buf;
cached = 0;
idx = pool_get_index(pool_head_buffer);
if (idx >= 0)
cached = pool_cache[idx].count;
*buf = BUF_WANTED;
#ifndef CONFIG_HAP_LOCKLESS_POOLS
@ -152,7 +159,7 @@ static inline struct buffer *b_alloc_margin(struct buffer *buf, int margin)
#endif
/* fast path */
if ((pool_head_buffer->allocated - pool_head_buffer->used) > margin) {
if ((pool_head_buffer->allocated - pool_head_buffer->used + cached) > margin) {
area = __pool_get_first(pool_head_buffer);
if (likely(area)) {
#ifndef CONFIG_HAP_LOCKLESS_POOLS

View File

@ -60,6 +60,9 @@
*/
#if defined(USE_THREAD) && defined(HA_HAVE_CAS_DW) && !defined(DEBUG_NO_LOCKLESS_POOLS) && !defined(DEBUG_UAF)
#define CONFIG_HAP_LOCKLESS_POOLS
#ifndef CONFIG_HAP_POOL_CACHE_SIZE
#define CONFIG_HAP_POOL_CACHE_SIZE 524288
#endif
#endif
/* CONFIG_HAP_INLINE_FD_SET

View File

@ -50,6 +50,22 @@
#define MAX_BASE_POOLS 32
struct pool_cache_head {
struct list list; /* head of objects in this pool */
size_t size; /* size of an object */
unsigned int count; /* number of objects in this pool */
};
struct pool_cache_item {
struct list by_pool; /* link to objects in this pool */
struct list by_lru; /* link to objects by LRU order */
};
extern THREAD_LOCAL struct pool_cache_head pool_cache[MAX_BASE_POOLS];
extern THREAD_LOCAL struct list pool_lru_head; /* oldest objects */
extern THREAD_LOCAL size_t pool_cache_bytes; /* total cache size */
extern THREAD_LOCAL size_t pool_cache_count; /* #cache objects */
#ifdef CONFIG_HAP_LOCKLESS_POOLS
struct pool_free_list {
void **free_list;
@ -141,6 +157,32 @@ static inline ssize_t pool_get_index(const struct pool_head *pool)
}
#ifdef CONFIG_HAP_LOCKLESS_POOLS
/* Tries to retrieve an object from the local pool cache corresponding to pool
* <pool>. Returns NULL if none is available.
*/
static inline void *__pool_get_from_cache(struct pool_head *pool)
{
ssize_t idx = pool_get_index(pool);
struct pool_cache_item *item;
/* pool not in cache */
if (idx < 0)
return NULL;
/* never allocated or empty */
if (pool_cache[idx].list.n == NULL || LIST_ISEMPTY(&pool_cache[idx].list))
return NULL;
item = LIST_NEXT(&pool_cache[idx].list, typeof(item), by_pool);
pool_cache[idx].count--;
pool_cache_bytes -= pool_cache[idx].size;
pool_cache_count--;
LIST_DEL(&item->by_pool);
LIST_DEL(&item->by_lru);
return item;
}
/*
* Returns a pointer to type <type> taken from the pool <pool_type> if
* available, otherwise returns NULL. No malloc() is attempted, and poisonning
@ -149,6 +191,10 @@ static inline ssize_t pool_get_index(const struct pool_head *pool)
static inline void *__pool_get_first(struct pool_head *pool)
{
struct pool_free_list cmp, new;
void *ret = __pool_get_from_cache(pool);
if (ret)
return ret;
cmp.seq = pool->seq;
__ha_barrier_load();
@ -230,6 +276,27 @@ static inline void __pool_free(struct pool_head *pool, void *ptr)
HA_ATOMIC_SUB(&pool->used, 1);
}
/* frees an object to the local cache, possibly pushing oldest objects to the
* global pool.
*/
void __pool_put_to_cache(struct pool_head *pool, void *ptr, ssize_t idx);
static inline void pool_put_to_cache(struct pool_head *pool, void *ptr)
{
ssize_t idx = pool_get_index(pool);
/* pool not in cache or too many objects for this pool (more than
* half of the cache is used and this pool uses more than 1/8 of
* the cache size).
*/
if (idx < 0 ||
(pool_cache_bytes > CONFIG_HAP_POOL_CACHE_SIZE * 3 / 4 &&
pool_cache[idx].count >= 16 + pool_cache_count / 8)) {
__pool_free(pool, ptr);
return;
}
__pool_put_to_cache(pool, ptr, idx);
}
/*
* Puts a memory area back to the corresponding pool.
* Items are chained directly through a pointer that
@ -247,7 +314,7 @@ static inline void pool_free(struct pool_head *pool, void *ptr)
if (*POOL_LINK(pool, ptr) != (void *)pool)
*(volatile int *)0 = 0;
#endif
__pool_free(pool, ptr);
pool_put_to_cache(pool, ptr);
}
}

View File

@ -17,6 +17,7 @@
#include <common/config.h>
#include <common/debug.h>
#include <common/hathreads.h>
#include <common/memory.h>
#include <common/mini-clist.h>
#include <common/standard.h>
@ -34,6 +35,11 @@
struct pool_head pool_base_start[MAX_BASE_POOLS] = { };
unsigned int pool_base_count = 0;
THREAD_LOCAL struct pool_cache_head pool_cache[MAX_BASE_POOLS] = { };
THREAD_LOCAL struct list pool_lru_head = { }; /* oldest objects */
THREAD_LOCAL size_t pool_cache_bytes = 0; /* total cache size */
THREAD_LOCAL size_t pool_cache_count = 0; /* #cache objects */
static struct list pools = LIST_HEAD_INIT(pools);
int mem_poison_byte = -1;
@ -242,6 +248,47 @@ void pool_gc(struct pool_head *pool_ctx)
HA_ATOMIC_STORE(&recurse, 0);
}
/* frees an object to the local cache, possibly pushing oldest objects to the
* global pool. Must not be called directly.
*/
void __pool_put_to_cache(struct pool_head *pool, void *ptr, ssize_t idx)
{
struct pool_cache_item *item = (struct pool_cache_item *)ptr;
struct pool_cache_head *ph = &pool_cache[idx];
/* never allocated or empty */
if (unlikely(ph->list.n == NULL)) {
LIST_INIT(&ph->list);
ph->size = pool->size;
if (pool_lru_head.n == NULL)
LIST_INIT(&pool_lru_head);
}
LIST_ADD(&ph->list, &item->by_pool);
LIST_ADD(&pool_lru_head, &item->by_lru);
ph->count++;
pool_cache_count++;
pool_cache_bytes += ph->size;
if (pool_cache_bytes <= CONFIG_HAP_POOL_CACHE_SIZE)
return;
do {
item = LIST_PREV(&pool_lru_head, struct pool_cache_item *, by_lru);
/* note: by definition we remove oldest objects so they also are the
* oldest in their own pools, thus their next is the pool's head.
*/
ph = LIST_NEXT(&item->by_pool, struct pool_cache_head *, list);
LIST_DEL(&item->by_pool);
LIST_DEL(&item->by_lru);
ph->count--;
pool_cache_count--;
pool_cache_bytes -= ph->size;
__pool_free(pool_base_start + (ph - pool_cache), item);
} while (pool_cache_bytes > CONFIG_HAP_POOL_CACHE_SIZE * 7 / 8);
}
#else /* CONFIG_HAP_LOCKLESS_POOLS */
/* Allocates new entries for pool <pool> until there are at least <avail> + 1