From c35bcfcc2116b46e89cad5b34656ed3eb5378d3a Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 29 Jun 2020 14:43:16 +0200 Subject: [PATCH] BUG/MINOR: server: start cleaning idle connections from various points There's a minor glitch with the way idle connections start to be evicted. The lookup always goes from thread 0 to thread N-1. This causes depletion of connections on the first threads and abundance on the last ones. This is visible with the takeover() stats below: $ socat - /tmp/sock1 <<< "show activity"|grep ^fd ; \ sleep 10 ; \ socat -/tmp/sock1 <<< "show activity"|grep ^fd fd_takeover: 300144 [ 91887 84029 66254 57974 ] fd_takeover: 359631 [ 111369 99699 79145 69418 ] There are respectively 19k, 15k, 13k and 11k takeovers for only 4 threads, indicating that the first thread needs a foreign FD twice more often than the 4th one. This patch changes this si that all threads are scanned in round robin starting with the current one. The takeovers now happen in a much more distributed way (about 4 times 9k) : fd_takeover: 1420081 [ 359562 359453 346586 354480 ] fd_takeover: 1457044 [ 368779 368429 355990 363846 ] There is no need to backport this, as this happened along a few patches that were merged during 2.2 development. --- src/backend.c | 5 +---- src/server.c | 12 ++++++++++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/backend.c b/src/backend.c index 70487870c..a3ec782e2 100644 --- a/src/backend.c +++ b/src/backend.c @@ -1274,10 +1274,7 @@ int connect_server(struct stream *s) else { int i; - for (i = 0; i < global.nbthread; i++) { - if (i == tid) - continue; - + for (i = tid; (i = ((i + 1 == global.nbthread) ? 0 : i + 1)) != tid;) { // just silence stupid gcc which reports an absurd // out-of-bounds warning for which is always // exactly zero without threads, but it seems to diff --git a/src/server.c b/src/server.c index 1a00363e8..8cc5bbff6 100644 --- a/src/server.c +++ b/src/server.c @@ -5187,8 +5187,9 @@ static void srv_cleanup_connections(struct server *srv) int i; int j; + /* check all threads starting with ours */ HA_SPIN_LOCK(OTHER_LOCK, &idle_conn_srv_lock); - for (i = 0; i < global.nbthread; i++) { + for (i = tid;;) { did_remove = 0; HA_SPIN_LOCK(OTHER_LOCK, &idle_conns[i].toremove_lock); for (j = 0; j < srv->curr_idle_conns; j++) { @@ -5204,6 +5205,9 @@ static void srv_cleanup_connections(struct server *srv) HA_SPIN_UNLOCK(OTHER_LOCK, &idle_conns[i].toremove_lock); if (did_remove) task_wakeup(idle_conns[i].cleanup_task, TASK_WOKEN_OTHER); + + if ((i = ((i + 1 == global.nbthread) ? 0 : i + 1)) == tid) + break; } HA_SPIN_UNLOCK(OTHER_LOCK, &idle_conn_srv_lock); } @@ -5255,7 +5259,8 @@ struct task *srv_cleanup_idle_connections(struct task *task, void *context, unsi exceed_conns = to_kill = exceed_conns / 2 + (exceed_conns & 1); srv->max_used_conns = srv->curr_used_conns; - for (i = 0; i < global.nbthread && to_kill > 0; i++) { + /* check all threads starting with ours */ + for (i = tid;;) { int max_conn; int j; int did_remove = 0; @@ -5278,6 +5283,9 @@ struct task *srv_cleanup_idle_connections(struct task *task, void *context, unsi srv_is_empty = 0; if (did_remove) task_wakeup(idle_conns[i].cleanup_task, TASK_WOKEN_OTHER); + + if ((i = ((i + 1 == global.nbthread) ? 0 : i + 1)) == tid) + break; } remove: eb32_delete(&srv->idle_node);