From 8f100427c4bf56acff967e313cd9cb8d42da035d Mon Sep 17 00:00:00 2001 From: Christopher Faulet Date: Mon, 18 Jan 2021 15:47:03 +0100 Subject: [PATCH] BUG/MEDIUM: tcpcheck: Don't destroy connection in the wake callback context When a tcpcheck ruleset uses multiple connections, the existing one must be closed and destroyed before openning the new one. This part is handled in the tcpcheck_main() function, when called from the wake callback function (wake_srv_chk). But it is indeed a problem, because this function may be called from the mux layer. This means a mux may call the wake callback function of the data layer, which may release the connection and the mux. It is easy to see how it is hazardous. And actually, depending on the scheduling, it leads to crashes. Thus, we must avoid to release the connection in the wake callback context, and move this part in the check's process function instead. To do so, we rely on the CHK_ST_CLOSE_CONN flags. When a connection must be replaced by a new one, this flag is set on the check, in tcpcheck_main() function, and the check's task is woken up. Then, the connection is really closed in process_chk_conn() function. This patch must be backported as far as 2.2, with some adaptations however because the code is not exactly the same. --- include/haproxy/check-t.h | 1 + src/check.c | 20 +++++++++++++++++--- src/tcpcheck.c | 24 ++++++++++-------------- 3 files changed, 28 insertions(+), 17 deletions(-) diff --git a/include/haproxy/check-t.h b/include/haproxy/check-t.h index 274e162b5..b78a9f9f5 100644 --- a/include/haproxy/check-t.h +++ b/include/haproxy/check-t.h @@ -52,6 +52,7 @@ enum chk_result { #define CHK_ST_PORT_MISS 0x0020 /* check can't be send because no port is configured to run it */ #define CHK_ST_IN_ALLOC 0x0040 /* check blocked waiting for input buffer allocation */ #define CHK_ST_OUT_ALLOC 0x0080 /* check blocked waiting for output buffer allocation */ +#define CHK_ST_CLOSE_CONN 0x0100 /* check is waiting that the connection gets closed */ /* check status */ enum healthcheck_status { diff --git a/src/check.c b/src/check.c index 55470d551..2c0a0aceb 100644 --- a/src/check.c +++ b/src/check.c @@ -793,12 +793,16 @@ static int wake_srv_chk(struct conn_stream *cs) task_wakeup(check->task, TASK_WOKEN_IO); } - if (check->result != CHK_RES_UNKNOWN) { + if (check->result != CHK_RES_UNKNOWN || ret == -1) { /* Check complete or aborted. If connection not yet closed do it * now and wake the check task up to be sure the result is * handled ASAP. */ cs_drain_and_close(cs); ret = -1; + + if (check->wait_list.events) + cs->conn->mux->unsubscribe(cs, check->wait_list.events, &check->wait_list); + /* We may have been scheduled to run, and the * I/O handler expects to have a cs, so remove * the tasklet @@ -878,8 +882,18 @@ static struct task *process_chk_conn(struct task *t, void *context, unsigned sho if ((conn->flags & CO_FL_ERROR) || cs->flags & CS_FL_ERROR || expired) { chk_report_conn_err(check, 0, expired); } - else - goto out_unlock; /* timeout not reached, wait again */ + else { + if (check->state & CHK_ST_CLOSE_CONN) { + cs_destroy(cs); + cs = NULL; + conn = NULL; + check->cs = NULL; + check->state &= ~CHK_ST_CLOSE_CONN; + tcpcheck_main(check); + } + if (check->result == CHK_RES_UNKNOWN) + goto out_unlock; /* timeout not reached, wait again */ + } } /* check complete or aborted */ diff --git a/src/tcpcheck.c b/src/tcpcheck.c index 40c745091..2a83dd196 100644 --- a/src/tcpcheck.c +++ b/src/tcpcheck.c @@ -2013,24 +2013,20 @@ int tcpcheck_main(struct check *check) check->code = 0; switch (rule->action) { case TCPCHK_ACT_CONNECT: - /* release the previous connection (from a previous connect rule) */ + /* Not the first connection, release it first */ if (cs && check->current_step != rule) { - cs_close(cs); - if (check->wait_list.events) - cs->conn->mux->unsubscribe(cs, check->wait_list.events, &check->wait_list); - - /* We may have been scheduled to run, and the I/O handler - * expects to have a cs, so remove the tasklet - */ - tasklet_remove_from_tasklet_list(check->wait_list.tasklet); - cs_destroy(cs); - cs = NULL; - conn = NULL; - check->cs = NULL; - retcode = -1; /* do not reuse the fd in the caller! */ + check->state |= CHK_ST_CLOSE_CONN; + retcode = -1; } check->current_step = rule; + + /* We are still waiting the connection gets closed */ + if (cs && (check->state & CHK_ST_CLOSE_CONN)) { + eval_ret = TCPCHK_EVAL_WAIT; + break; + } + eval_ret = tcpcheck_eval_connect(check, rule); /* Refresh conn-stream and connection */