OPTIM: pool: improve needed_avg cache line access pattern
On an AMD EPYC 3rd gen, 20% of the CPU is spent calculating the amount of pools needed when using QUIC, because pool allocations/releases are quite frequent and the inter-CCX communication is super slow. Still, there's a way to save between 0.5 and 1% CPU by using fetch-add and sub-fetch that are converted to XADD so that the result is directly fed into the swrate_add argument without having to re-read the memory area. That's what this patch does.
This commit is contained in:
parent
9797a7718c
commit
87d269707b
16
src/pool.c
16
src/pool.c
|
@ -424,15 +424,17 @@ void *pool_alloc_nocache(struct pool_head *pool, const void *caller)
|
||||||
{
|
{
|
||||||
void *ptr = NULL;
|
void *ptr = NULL;
|
||||||
uint bucket;
|
uint bucket;
|
||||||
|
uint used;
|
||||||
|
|
||||||
ptr = pool_get_from_os_noinc(pool);
|
ptr = pool_get_from_os_noinc(pool);
|
||||||
if (!ptr)
|
if (!ptr)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
bucket = pool_pbucket(ptr);
|
bucket = pool_pbucket(ptr);
|
||||||
swrate_add_scaled_opportunistic(&pool->buckets[bucket].needed_avg, POOL_AVG_SAMPLES, pool->buckets[bucket].used, POOL_AVG_SAMPLES/4);
|
|
||||||
_HA_ATOMIC_INC(&pool->buckets[bucket].allocated);
|
_HA_ATOMIC_INC(&pool->buckets[bucket].allocated);
|
||||||
_HA_ATOMIC_INC(&pool->buckets[bucket].used);
|
used = _HA_ATOMIC_FETCH_ADD(&pool->buckets[bucket].used, 1);
|
||||||
|
swrate_add_scaled_opportunistic(&pool->buckets[bucket].needed_avg, POOL_AVG_SAMPLES, used, POOL_AVG_SAMPLES/4);
|
||||||
|
|
||||||
/* keep track of where the element was allocated from */
|
/* keep track of where the element was allocated from */
|
||||||
POOL_DEBUG_SET_MARK(pool, ptr);
|
POOL_DEBUG_SET_MARK(pool, ptr);
|
||||||
|
@ -447,10 +449,11 @@ void *pool_alloc_nocache(struct pool_head *pool, const void *caller)
|
||||||
void pool_free_nocache(struct pool_head *pool, void *ptr)
|
void pool_free_nocache(struct pool_head *pool, void *ptr)
|
||||||
{
|
{
|
||||||
uint bucket = pool_pbucket(ptr);
|
uint bucket = pool_pbucket(ptr);
|
||||||
|
uint used;
|
||||||
|
|
||||||
_HA_ATOMIC_DEC(&pool->buckets[bucket].used);
|
used = _HA_ATOMIC_SUB_FETCH(&pool->buckets[bucket].used, 1);
|
||||||
_HA_ATOMIC_DEC(&pool->buckets[bucket].allocated);
|
_HA_ATOMIC_DEC(&pool->buckets[bucket].allocated);
|
||||||
swrate_add_opportunistic(&pool->buckets[bucket].needed_avg, POOL_AVG_SAMPLES, pool->buckets[bucket].used);
|
swrate_add_opportunistic(&pool->buckets[bucket].needed_avg, POOL_AVG_SAMPLES, used);
|
||||||
|
|
||||||
pool_put_to_os_nodec(pool, ptr);
|
pool_put_to_os_nodec(pool, ptr);
|
||||||
}
|
}
|
||||||
|
@ -519,6 +522,7 @@ static void pool_evict_last_items(struct pool_head *pool, struct pool_cache_head
|
||||||
uint cluster = 0;
|
uint cluster = 0;
|
||||||
uint to_free_max;
|
uint to_free_max;
|
||||||
uint bucket;
|
uint bucket;
|
||||||
|
uint used;
|
||||||
|
|
||||||
BUG_ON(pool_debugging & POOL_DBG_NO_CACHE);
|
BUG_ON(pool_debugging & POOL_DBG_NO_CACHE);
|
||||||
|
|
||||||
|
@ -534,8 +538,8 @@ static void pool_evict_last_items(struct pool_head *pool, struct pool_cache_head
|
||||||
LIST_DELETE(&item->by_lru);
|
LIST_DELETE(&item->by_lru);
|
||||||
|
|
||||||
bucket = pool_pbucket(item);
|
bucket = pool_pbucket(item);
|
||||||
_HA_ATOMIC_DEC(&pool->buckets[bucket].used);
|
used = _HA_ATOMIC_SUB_FETCH(&pool->buckets[bucket].used, 1);
|
||||||
swrate_add_opportunistic(&pool->buckets[bucket].needed_avg, POOL_AVG_SAMPLES, pool->buckets[bucket].used);
|
swrate_add_opportunistic(&pool->buckets[bucket].needed_avg, POOL_AVG_SAMPLES, used);
|
||||||
|
|
||||||
if (to_free_max > released || cluster) {
|
if (to_free_max > released || cluster) {
|
||||||
/* will never match when global pools are disabled */
|
/* will never match when global pools are disabled */
|
||||||
|
|
Loading…
Reference in New Issue