From 7968fe38895ba36ac1d1382555abaa1b64b81a5a Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sat, 27 May 2023 16:55:48 +0000 Subject: [PATCH] MEDIUM: stick-table: change the ref_cnt atomically Due to the ts->ref_cnt being manipulated and checked inside wrlocks, we continue to have it updated under plenty of read locks, which have an important cost on many-thread machines. This patch turns them all to atomic ops and carefully moves them outside of locks every time this is possible: - the ref_cnt is incremented before write-unlocking on creation otherwise the element could vanish before we can do it - the ref_cnt is decremented after write-locking on release - for all other cases it's updated out of locks since it's guaranteed by the sequence that it cannot vanish - checks are done before locking every time it's used to decide whether we're going to release the element (saves several write locks) - expiration tests are just done using atomic loads, since there's no particular ordering constraint there, we just want consistent values. For Lua, the loop that is used to dump stick-tables could switch to read locks only, but this was not done. For peers, the loop that builds updates in peer_send_teachmsgs is extremely expensive in write locks and it doesn't seem this is really needed since the only updated variables are last_pushed and commitupdate, the first one being on the shared table (thus not used by other threads) and the commitupdate could likely be changed using a CAS. Thus all of this could theoretically move under a read lock, but that was not done here. On a 80-thread machine with a peers section enabled, the request rate increased from 415 to 520k rps. --- include/haproxy/stick_table.h | 13 ++------ src/hlua_fcn.c | 12 ++++---- src/peers.c | 13 ++++---- src/stick_table.c | 56 ++++++++++++++++------------------- 4 files changed, 38 insertions(+), 56 deletions(-) diff --git a/include/haproxy/stick_table.h b/include/haproxy/stick_table.h index 90ab6f3fe..c8e66bab4 100644 --- a/include/haproxy/stick_table.h +++ b/include/haproxy/stick_table.h @@ -202,21 +202,14 @@ static inline int __stksess_kill_if_expired(struct stktable *t, struct stksess * static inline void stksess_kill_if_expired(struct stktable *t, struct stksess *ts, int decrefcnt) { + if (decrefcnt && HA_ATOMIC_SUB_FETCH(&ts->ref_cnt, 1) != 0) + return; + if (t->expire != TICK_ETERNITY && tick_is_expired(ts->expire, now_ms)) { HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock); - if (decrefcnt) - ts->ref_cnt--; - __stksess_kill_if_expired(t, ts); HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock); } - else { - if (decrefcnt) { - HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->lock); - HA_ATOMIC_DEC(&ts->ref_cnt); - HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->lock); - } - } } /* sets the stick counter's entry pointer */ diff --git a/src/hlua_fcn.c b/src/hlua_fcn.c index 06805feef..091a23e6f 100644 --- a/src/hlua_fcn.c +++ b/src/hlua_fcn.c @@ -924,7 +924,7 @@ int hlua_stktable_lookup(lua_State *L) lua_newtable(L); lua_pushstring(L, "use"); - lua_pushinteger(L, ts->ref_cnt - 1); + lua_pushinteger(L, HA_ATOMIC_LOAD(&ts->ref_cnt) - 1); lua_settable(L, -3); lua_pushstring(L, "expire"); @@ -932,9 +932,7 @@ int hlua_stktable_lookup(lua_State *L) lua_settable(L, -3); hlua_stktable_entry(L, t, ts); - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock); - ts->ref_cnt--; - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock); + HA_ATOMIC_DEC(&ts->ref_cnt); return 1; } @@ -1052,7 +1050,7 @@ int hlua_stktable_dump(lua_State *L) HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock); return 1; } - ts->ref_cnt++; + HA_ATOMIC_INC(&ts->ref_cnt); HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock); /* multi condition/value filter */ @@ -1094,7 +1092,7 @@ int hlua_stktable_dump(lua_State *L) if (skip_entry) { HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock); - ts->ref_cnt--; + HA_ATOMIC_DEC(&ts->ref_cnt); continue; } @@ -1118,7 +1116,7 @@ int hlua_stktable_dump(lua_State *L) hlua_stktable_entry(L, t, ts); lua_settable(L, -3); HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock); - ts->ref_cnt--; + HA_ATOMIC_DEC(&ts->ref_cnt); } HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock); diff --git a/src/peers.c b/src/peers.c index 5bf85406b..9055760d5 100644 --- a/src/peers.c +++ b/src/peers.c @@ -1597,18 +1597,15 @@ static inline int peer_send_teachmsgs(struct appctx *appctx, struct peer *p, continue; } - ts->ref_cnt++; + HA_ATOMIC_INC(&ts->ref_cnt); HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &st->table->lock); ret = peer_send_updatemsg(st, appctx, ts, updateid, new_pushed, use_timed); - if (ret <= 0) { - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &st->table->lock); - ts->ref_cnt--; - break; - } - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &st->table->lock); - ts->ref_cnt--; + HA_ATOMIC_DEC(&ts->ref_cnt); + if (ret <= 0) + break; + st->last_pushed = updateid; if (peer_stksess_lookup == peer_teach_process_stksess_lookup && diff --git a/src/stick_table.c b/src/stick_table.c index ae6b79f60..6ba26ffa7 100644 --- a/src/stick_table.c +++ b/src/stick_table.c @@ -110,11 +110,12 @@ void stksess_free(struct stktable *t, struct stksess *ts) } /* - * Kill an stksess (only if its ref_cnt is zero). + * Kill an stksess (only if its ref_cnt is zero). This must be called under the + * write lock. Returns zero if could not deleted, non-zero otherwise. */ int __stksess_kill(struct stktable *t, struct stksess *ts) { - if (ts->ref_cnt) + if (HA_ATOMIC_LOAD(&ts->ref_cnt)) return 0; eb32_delete(&ts->exp); @@ -125,17 +126,18 @@ int __stksess_kill(struct stktable *t, struct stksess *ts) } /* - * Decrease the refcount if decrefcnt is not 0. - * and try to kill the stksess + * Decrease the refcount if decrefcnt is not 0, and try to kill the stksess. + * Returns non-zero if deleted, zero otherwise. * This function locks the table */ int stksess_kill(struct stktable *t, struct stksess *ts, int decrefcnt) { int ret; + if (decrefcnt && HA_ATOMIC_SUB_FETCH(&ts->ref_cnt, 1) != 0) + return 0; + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock); - if (decrefcnt) - ts->ref_cnt--; ret = __stksess_kill(t, ts); HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock); @@ -245,7 +247,7 @@ int __stktable_trash_oldest(struct stktable *t, int to_batch) eb = eb32_next(eb); /* don't delete an entry which is currently referenced */ - if (ts->ref_cnt) + if (HA_ATOMIC_LOAD(&ts->ref_cnt) != 0) continue; eb32_delete(&ts->exp); @@ -471,19 +473,12 @@ void stktable_touch_with_exp(struct stktable *t, struct stksess *ts, int local, } } - if (decrefcnt) { - if (locked) - ts->ref_cnt--; - else { - HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->lock); - HA_ATOMIC_DEC(&ts->ref_cnt); - HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->lock); - } - } - if (locked) HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock); + if (decrefcnt) + HA_ATOMIC_DEC(&ts->ref_cnt); + if (do_wakeup) task_wakeup(t->sync_task, TASK_WOKEN_MSG); } @@ -520,9 +515,7 @@ static void stktable_release(struct stktable *t, struct stksess *ts) { if (!ts) return; - HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->lock); HA_ATOMIC_DEC(&ts->ref_cnt); - HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->lock); } /* Insert new sticky session in the table. It is assumed that it does not @@ -609,6 +602,10 @@ struct stksess *stktable_get_entry(struct stktable *table, struct stktable_key * HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &table->lock); ts2 = __stktable_store(table, ts); + + HA_ATOMIC_INC(&ts2->ref_cnt); + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &table->lock); + if (unlikely(ts2 != ts)) { /* another entry was added in the mean time, let's * switch to it. @@ -617,9 +614,6 @@ struct stksess *stktable_get_entry(struct stktable *table, struct stktable_key * ts = ts2; } - HA_ATOMIC_INC(&ts->ref_cnt); - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &table->lock); - stktable_requeue_exp(table, ts); return ts; } @@ -702,7 +696,7 @@ struct task *process_table_expire(struct task *task, void *context, unsigned int eb = eb32_next(eb); /* don't delete an entry which is currently referenced */ - if (ts->ref_cnt) + if (HA_ATOMIC_LOAD(&ts->ref_cnt) != 0) continue; eb32_delete(&ts->exp); @@ -2374,7 +2368,7 @@ static int sample_conv_table_trackers(const struct arg *arg_p, struct sample *sm if (!ts) return 1; - smp->data.u.sint = ts->ref_cnt; + smp->data.u.sint = HA_ATOMIC_LOAD(&ts->ref_cnt); stktable_release(t, ts); return 1; @@ -4581,11 +4575,11 @@ smp_fetch_sc_trackers(const struct arg *args, struct sample *smp, const char *kw smp->flags = SMP_F_VOL_TEST; smp->data.type = SMP_T_SINT; if (stkctr == &tmpstkctr) { - smp->data.u.sint = stkctr_entry(stkctr) ? (stkctr_entry(stkctr)->ref_cnt-1) : 0; + smp->data.u.sint = stkctr_entry(stkctr) ? (HA_ATOMIC_LOAD(&stkctr_entry(stkctr)->ref_cnt) - 1) : 0; stktable_release(stkctr->table, stkctr_entry(stkctr)); } else { - smp->data.u.sint = stkctr_entry(stkctr) ? stkctr_entry(stkctr)->ref_cnt : 0; + smp->data.u.sint = stkctr_entry(stkctr) ? HA_ATOMIC_LOAD(&stkctr_entry(stkctr)->ref_cnt) : 0; } return 1; @@ -4663,7 +4657,7 @@ static int table_dump_entry_to_buffer(struct buffer *msg, dump_binary(msg, (const char *)entry->key.key, t->key_size); } - chunk_appendf(msg, " use=%d exp=%d shard=%d", entry->ref_cnt - 1, tick_remain(now_ms, entry->expire), entry->shard); + chunk_appendf(msg, " use=%d exp=%d shard=%d", HA_ATOMIC_LOAD(&entry->ref_cnt) - 1, tick_remain(now_ms, entry->expire), entry->shard); for (dt = 0; dt < STKTABLE_DATA_TYPES; dt++) { void *ptr; @@ -5082,7 +5076,7 @@ static int cli_io_handler_table(struct appctx *appctx) eb = ebmb_first(&ctx->t->keys); if (eb) { ctx->entry = ebmb_entry(eb, struct stksess, key); - ctx->entry->ref_cnt++; + HA_ATOMIC_INC(&ctx->entry->ref_cnt); ctx->state = STATE_DUMP; HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->lock); break; @@ -5156,7 +5150,7 @@ static int cli_io_handler_table(struct appctx *appctx) HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &ctx->entry->lock); HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &ctx->t->lock); - ctx->entry->ref_cnt--; + HA_ATOMIC_DEC(&ctx->entry->ref_cnt); eb = ebmb_next(&ctx->entry->key); if (eb) { @@ -5166,7 +5160,7 @@ static int cli_io_handler_table(struct appctx *appctx) __stksess_kill_if_expired(ctx->t, old); else if (!skip_entry && !ctx->entry->ref_cnt) __stksess_kill(ctx->t, old); - ctx->entry->ref_cnt++; + HA_ATOMIC_INC(&ctx->entry->ref_cnt); HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->lock); break; } @@ -5174,7 +5168,7 @@ static int cli_io_handler_table(struct appctx *appctx) if (show) __stksess_kill_if_expired(ctx->t, ctx->entry); - else if (!skip_entry && !ctx->entry->ref_cnt) + else if (!skip_entry && !HA_ATOMIC_LOAD(&ctx->entry->ref_cnt)) __stksess_kill(ctx->t, ctx->entry); HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->lock);