MEDIUM: stick-tables: defer adding updates to a tasklet

There is a lot of contention trying to add updates to the tree. So
instead of trying to add the updates to the tree right away, just add
them to a mt-list (with one mt-list per thread group, so that the
mt-list does not become the new point of contention that much), and
create a tasklet dedicated to adding updates to the tree, in batchs, to
avoid keeping the update lock for too long.
This helps getting stick tables perform better under heavy load.
This commit is contained in:
Olivier Houchard 2025-05-02 11:46:54 +00:00 committed by Willy Tarreau
parent b3ad7b6371
commit 388539faa3
3 changed files with 104 additions and 70 deletions

View File

@ -654,4 +654,8 @@
#define QUIC_MAX_TX_MEM 0
#endif
#ifndef STKTABLE_MAX_UPDATES_AT_ONCE
#define STKTABLE_MAX_UPDATES_AT_ONCE 100
#endif /* STKTABLE_MAX_UPDATES_AT_ONCE */
#endif /* _HAPROXY_DEFAULTS_H */

View File

@ -151,6 +151,8 @@ struct stksess {
int seen; /* 0 only when no peer has seen this entry yet */
struct eb32_node exp; /* ebtree node used to hold the session in expiration tree */
struct eb32_node upd; /* ebtree node used to hold the update sequence tree */
struct mt_list pend_updts;/* list of entries to be inserted/moved in the update sequence tree */
int updt_is_local; /* is the update a local one ? */
struct ebmb_node key; /* ebtree node used to hold the session in table */
/* WARNING! do not put anything after <keys>, it's used by the key */
};
@ -220,9 +222,11 @@ struct stktable {
THREAD_ALIGN(64);
struct eb_root updates; /* head of sticky updates sequence tree, uses updt_lock */
struct mt_list *pend_updts; /* list of updates to be added to the update sequence tree, one per thread-group */
unsigned int update; /* uses updt_lock */
unsigned int localupdate; /* uses updt_lock */
unsigned int commitupdate;/* used to identify the latest local updates pending for sync, uses updt_lock */
struct tasklet *updt_task;/* tasklet responsable for pushing the pending updates into the tree */
THREAD_ALIGN(64);
/* this lock is heavily used and must be on its own cache line */

View File

@ -144,12 +144,13 @@ int __stksess_kill(struct stktable *t, struct stksess *ts)
if (HA_ATOMIC_LOAD(&ts->ref_cnt))
return 0;
if (ts->upd.node.leaf_p) {
if (ts->upd.node.leaf_p || !MT_LIST_ISEMPTY(&ts->pend_updts)) {
updt_locked = 1;
HA_RWLOCK_WRLOCK(STK_TABLE_UPDT_LOCK, &t->updt_lock);
if (HA_ATOMIC_LOAD(&ts->ref_cnt))
goto out_unlock;
}
MT_LIST_DELETE(&ts->pend_updts);
eb32_delete(&ts->exp);
eb32_delete(&ts->upd);
ebmb_delete(&ts->key);
@ -271,6 +272,7 @@ static struct stksess *__stksess_init(struct stktable *t, struct stksess * ts)
ts->key.node.leaf_p = NULL;
ts->exp.node.leaf_p = NULL;
ts->upd.node.leaf_p = NULL;
MT_LIST_INIT(&ts->pend_updts);
ts->expire = tick_add(now_ms, MS_TO_TICKS(t->expire));
HA_RWLOCK_INIT(&ts->lock);
return ts;
@ -362,20 +364,19 @@ int stktable_trash_oldest(struct stktable *t, int to_batch)
* with that lock held, will grab a ref_cnt before releasing the
* lock. So we must take this lock as well and check the ref_cnt.
*/
if (ts->upd.node.leaf_p) {
if (!updt_locked) {
updt_locked = 1;
HA_RWLOCK_WRLOCK(STK_TABLE_UPDT_LOCK, &t->updt_lock);
}
/* now we're locked, new peers can't grab it anymore,
* existing ones already have the ref_cnt.
*/
if (HA_ATOMIC_LOAD(&ts->ref_cnt))
continue;
if (!updt_locked) {
updt_locked = 1;
HA_RWLOCK_WRLOCK(STK_TABLE_UPDT_LOCK, &t->updt_lock);
}
/* now we're locked, new peers can't grab it anymore,
* existing ones already have the ref_cnt.
*/
if (HA_ATOMIC_LOAD(&ts->ref_cnt))
continue;
/* session expired, trash it */
ebmb_delete(&ts->key);
MT_LIST_DELETE(&ts->pend_updts);
eb32_delete(&ts->upd);
__stksess_free(t, ts);
batched++;
@ -585,9 +586,7 @@ struct stksess *stktable_lookup(struct stktable *t, struct stksess *ts)
*/
void stktable_touch_with_exp(struct stktable *t, struct stksess *ts, int local, int expire, int decrefcnt)
{
struct eb32_node * eb;
int use_wrlock = 0;
int do_wakeup = 0;
int did_append = 0;
if (expire != HA_ATOMIC_LOAD(&ts->expire)) {
/* we'll need to set the expiration and to wake up the expiration timer .*/
@ -602,63 +601,24 @@ void stktable_touch_with_exp(struct stktable *t, struct stksess *ts, int local,
* scheduled for at least one peer.
*/
if (!ts->upd.node.leaf_p || _HA_ATOMIC_LOAD(&ts->seen)) {
/* Time to upgrade the read lock to write lock */
HA_RWLOCK_WRLOCK(STK_TABLE_UPDT_LOCK, &t->updt_lock);
use_wrlock = 1;
/* here we're write-locked */
ts->seen = 0;
ts->upd.key = ++t->update;
t->localupdate = t->update;
eb32_delete(&ts->upd);
eb = eb32_insert(&t->updates, &ts->upd);
if (eb != &ts->upd) {
eb32_delete(eb);
eb32_insert(&t->updates, &ts->upd);
}
_HA_ATOMIC_STORE(&ts->updt_is_local, 1);
did_append = MT_LIST_TRY_APPEND(&t->pend_updts[tgid - 1], &ts->pend_updts);
}
do_wakeup = 1;
}
else {
/* Note: we land here when learning new entries from
* remote peers. We hold one ref_cnt so the entry
* cannot vanish under us, however if two peers create
* the same key at the exact same time, we must be
* careful not to perform two parallel inserts! Hence
* we need to first check leaf_p to know if the entry
* is new, then lock the tree and check the entry again
* (since another thread could have created it in the
* mean time).
*/
if (!ts->upd.node.leaf_p) {
/* Time to upgrade the read lock to write lock if needed */
HA_RWLOCK_WRLOCK(STK_TABLE_UPDT_LOCK, &t->updt_lock);
use_wrlock = 1;
/* here we're write-locked */
if (!ts->upd.node.leaf_p) {
ts->seen = 0;
ts->upd.key= (++t->update)+(2147483648U);
eb = eb32_insert(&t->updates, &ts->upd);
if (eb != &ts->upd) {
eb32_delete(eb);
eb32_insert(&t->updates, &ts->upd);
}
}
_HA_ATOMIC_STORE(&ts->updt_is_local, 0);
did_append = MT_LIST_TRY_APPEND(&t->pend_updts[tgid - 1], &ts->pend_updts);
}
}
/* drop the lock now */
if (use_wrlock)
HA_RWLOCK_WRUNLOCK(STK_TABLE_UPDT_LOCK, &t->updt_lock);
}
if (did_append)
tasklet_wakeup(t->updt_task);
if (decrefcnt)
HA_ATOMIC_DEC(&ts->ref_cnt);
if (do_wakeup)
task_wakeup(t->sync_task, TASK_WOKEN_MSG);
}
/* Update the expiration timer for <ts> but do not touch its expiration node.
@ -809,6 +769,60 @@ struct stksess *stktable_get_entry(struct stktable *table, struct stktable_key *
return ts;
}
static struct task *stktable_add_pend_updates(struct task *t, void *ctx, unsigned int state)
{
struct stktable *table = ctx;
struct eb32_node *eb;
int i, is_local, cur_tgid = tgid - 1, empty_tgid = 0;
HA_RWLOCK_WRLOCK(STK_TABLE_UPDT_LOCK, &table->updt_lock);
for (i = 0; i < STKTABLE_MAX_UPDATES_AT_ONCE; i++) {
struct stksess *stksess = MT_LIST_POP(&table->pend_updts[cur_tgid], typeof(stksess), pend_updts);
if (!stksess) {
empty_tgid++;
cur_tgid++;
if (cur_tgid == global.nbtgroups)
cur_tgid = 0;
if (empty_tgid == global.nbtgroups)
break;
continue;
}
cur_tgid++;
empty_tgid = 0;
if (cur_tgid == global.nbtgroups)
cur_tgid = 0;
is_local = stksess->updt_is_local;
stksess->seen = 0;
if (is_local) {
stksess->upd.key = ++table->update;
table->localupdate = table->update;
eb32_delete(&stksess->upd);
} else {
stksess->upd.key = (++table->update) + (2147483648U);
}
eb = eb32_insert(&table->updates, &stksess->upd);
if (eb != &stksess->upd) {
BUG_ON(1);
eb32_delete(eb);
eb32_insert(&table->updates, &stksess->upd);
}
}
HA_RWLOCK_WRUNLOCK(STK_TABLE_UPDT_LOCK, &table->updt_lock);
/* There's more to do, let's schedule another session */
if (empty_tgid < global.nbtgroups)
tasklet_wakeup(table->updt_task);
if (i > 0) {
/* We did at least one update, let's wake the sync task */
task_wakeup(table->sync_task, TASK_WOKEN_MSG);
}
return t;
}
/* Lookup for an entry with the same key and store the submitted
* stksess if not found. This function locks the table either shared or
* exclusively, and the refcount of the entry is increased.
@ -938,20 +952,19 @@ struct task *process_table_expire(struct task *task, void *context, unsigned int
* with that lock held, will grab a ref_cnt before releasing the
* lock. So we must take this lock as well and check the ref_cnt.
*/
if (ts->upd.node.leaf_p) {
if (!updt_locked) {
updt_locked = 1;
HA_RWLOCK_WRLOCK(STK_TABLE_UPDT_LOCK, &t->updt_lock);
}
/* now we're locked, new peers can't grab it anymore,
* existing ones already have the ref_cnt.
*/
if (HA_ATOMIC_LOAD(&ts->ref_cnt))
continue;
if (!updt_locked) {
updt_locked = 1;
HA_RWLOCK_WRLOCK(STK_TABLE_UPDT_LOCK, &t->updt_lock);
}
/* now we're locked, new peers can't grab it anymore,
* existing ones already have the ref_cnt.
*/
if (HA_ATOMIC_LOAD(&ts->ref_cnt))
continue;
/* session expired, trash it */
ebmb_delete(&ts->key);
MT_LIST_DELETE(&ts->pend_updts);
eb32_delete(&ts->upd);
__stksess_free(t, ts);
}
@ -988,6 +1001,7 @@ int stktable_init(struct stktable *t, char **err_msg)
{
int peers_retval = 0;
int shard;
int i;
t->hash_seed = XXH64(t->id, t->idlen, 0);
@ -1047,6 +1061,16 @@ int stktable_init(struct stktable *t, char **err_msg)
t->write_to.t = table;
}
t->pend_updts = calloc(global.nbtgroups, sizeof(*t->pend_updts));
if (!t->pend_updts)
goto mem_error;
for (i = 0; i < global.nbtgroups; i++)
MT_LIST_INIT(&t->pend_updts[i]);
t->updt_task = tasklet_new();
if (!t->updt_task)
goto mem_error;
t->updt_task->context = t;
t->updt_task->process = stktable_add_pend_updates;
return 1;
mem_error:
@ -1065,6 +1089,8 @@ void stktable_deinit(struct stktable *t)
if (!t)
return;
task_destroy(t->exp_task);
tasklet_free(t->updt_task);
ha_free(&t->pend_updts);
pool_destroy(t->pool);
}