BUG/MEDIUM: server: make it possible to kill last idle connections

In issue #933, @jaroslawr provided a report indicating that when using many threads and many servers, it's very difficult to terminate the last idle connections on each server. The issue has two causes in fact. The first one is that during the calculation of the estimate of needed connections, we round the computation up while in previous round it was already rounded up, so we end up adding 1 to 1 which once divided by 2 remains 1. The second issue is that servers are not woken up anymore for purging their connections if they don't have activity. The only reason that was there to wake them up again was in case insufficient connections were purged. And even then the purge task itself was not woken up. But that is not enough for getting rid of the long tail of old connections nor updating est_need_conns. This patch makes sure to properly wake up as long as at least one idle connection remains, and not to round up the needed connections anymore. Prior to this patch, a test involving many connections which suddenly stopped would keep many idle connections, now they're effectively halved every pool-purge-delay. This needs to be backported to 2.2.
2024-12-18 09:24:31 +00:00 · 2020-11-05 09:12:20 +01:00 · 2020-11-05 09:12:20 +01:00 · 21b9ff59b2
commit 21b9ff59b2
parent f9bc6e4968
1 changed files with 7 additions and 14 deletions
--- a/src/server.c
+++ b/src/server.c
@ -5244,11 +5244,10 @@ struct task *srv_cleanup_idle_connections(struct task *task, void *context, unsi
 	struct eb32_node *eb;
 	int i;
 	unsigned int next_wakeup;
-	int need_wakeup = 0;

+	next_wakeup = TICK_ETERNITY;
 	HA_SPIN_LOCK(OTHER_LOCK, &idle_conn_srv_lock);
 	while (1) {
-		int srv_is_empty = 1;
 		int exceed_conns;
 		int to_kill;
 		int curr_idle;
@ -5267,7 +5266,6 @@ struct task *srv_cleanup_idle_connections(struct task *task, void *context, unsi
 		if (tick_is_lt(now_ms, eb->key)) {
 			/* timer not expired yet, revisit it later */
 			next_wakeup = eb->key;
-			need_wakeup = 1;
 			break;
 		}
 		srv = eb32_entry(eb, struct server, idle_node);
@ -5283,7 +5281,7 @@ struct task *srv_cleanup_idle_connections(struct task *task, void *context, unsi
 		exceed_conns = srv->curr_used_conns + curr_idle - MAX(srv->max_used_conns, srv->est_need_conns);
 		exceed_conns = to_kill = exceed_conns / 2 + (exceed_conns & 1);

-		srv->est_need_conns = (srv->est_need_conns + srv->max_used_conns + 1) / 2;
+		srv->est_need_conns = (srv->est_need_conns + srv->max_used_conns) / 2;
 		if (srv->est_need_conns < srv->max_used_conns)
 			srv->est_need_conns = srv->max_used_conns;

@ -5308,8 +5306,6 @@ struct task *srv_cleanup_idle_connections(struct task *task, void *context, unsi
 			    srv_migrate_conns_to_remove(&srv->safe_conns[i], &idle_conns[i].toremove_conns, max_conn - j) > 0)
 				did_remove = 1;

-			if (did_remove && max_conn < srv->curr_idle_thr[i])
-				srv_is_empty = 0;
 			if (did_remove)
 				task_wakeup(idle_conns[i].cleanup_task, TASK_WOKEN_OTHER);

@ -5318,22 +5314,19 @@ struct task *srv_cleanup_idle_connections(struct task *task, void *context, unsi
 		}
 remove:
 		eb32_delete(&srv->idle_node);
-		if (!srv_is_empty) {
+
+		if (srv->curr_idle_conns) {
 			/* There are still more idle connections, add the
 			 * server back in the tree.
 			 */
-			srv->idle_node.key = tick_add(srv->pool_purge_delay,
-			    now_ms);
+			srv->idle_node.key = tick_add(srv->pool_purge_delay, now_ms);
 			eb32_insert(&idle_conn_srv, &srv->idle_node);
+			next_wakeup = tick_first(next_wakeup, srv->idle_node.key);
 		}
 	}
 	HA_SPIN_UNLOCK(OTHER_LOCK, &idle_conn_srv_lock);

-	if (need_wakeup)
-		task->expire = next_wakeup;
-	else
-		task->expire = TICK_ETERNITY;
-
+	task->expire = next_wakeup;
 	return task;
 }