BUG/MEDIUM: tcpcheck: Don't destroy connection in the wake callback context

When a tcpcheck ruleset uses multiple connections, the existing one must be
closed and destroyed before openning the new one. This part is handled in
the tcpcheck_main() function, when called from the wake callback function
(wake_srv_chk). But it is indeed a problem, because this function may be
called from the mux layer. This means a mux may call the wake callback
function of the data layer, which may release the connection and the mux. It
is easy to see how it is hazardous. And actually, depending on the
scheduling, it leads to crashes.

Thus, we must avoid to release the connection in the wake callback context,
and move this part in the check's process function instead. To do so, we
rely on the CHK_ST_CLOSE_CONN flags. When a connection must be replaced by a
new one, this flag is set on the check, in tcpcheck_main() function, and the
check's task is woken up. Then, the connection is really closed in
process_chk_conn() function.

This patch must be backported as far as 2.2, with some adaptations however
because the code is not exactly the same.
This commit is contained in:
Christopher Faulet 2021-01-18 15:47:03 +01:00
parent 25439de181
commit 8f100427c4
3 changed files with 28 additions and 17 deletions

View File

@ -52,6 +52,7 @@ enum chk_result {
#define CHK_ST_PORT_MISS 0x0020 /* check can't be send because no port is configured to run it */
#define CHK_ST_IN_ALLOC 0x0040 /* check blocked waiting for input buffer allocation */
#define CHK_ST_OUT_ALLOC 0x0080 /* check blocked waiting for output buffer allocation */
#define CHK_ST_CLOSE_CONN 0x0100 /* check is waiting that the connection gets closed */
/* check status */
enum healthcheck_status {

View File

@ -793,12 +793,16 @@ static int wake_srv_chk(struct conn_stream *cs)
task_wakeup(check->task, TASK_WOKEN_IO);
}
if (check->result != CHK_RES_UNKNOWN) {
if (check->result != CHK_RES_UNKNOWN || ret == -1) {
/* Check complete or aborted. If connection not yet closed do it
* now and wake the check task up to be sure the result is
* handled ASAP. */
cs_drain_and_close(cs);
ret = -1;
if (check->wait_list.events)
cs->conn->mux->unsubscribe(cs, check->wait_list.events, &check->wait_list);
/* We may have been scheduled to run, and the
* I/O handler expects to have a cs, so remove
* the tasklet
@ -878,8 +882,18 @@ static struct task *process_chk_conn(struct task *t, void *context, unsigned sho
if ((conn->flags & CO_FL_ERROR) || cs->flags & CS_FL_ERROR || expired) {
chk_report_conn_err(check, 0, expired);
}
else
goto out_unlock; /* timeout not reached, wait again */
else {
if (check->state & CHK_ST_CLOSE_CONN) {
cs_destroy(cs);
cs = NULL;
conn = NULL;
check->cs = NULL;
check->state &= ~CHK_ST_CLOSE_CONN;
tcpcheck_main(check);
}
if (check->result == CHK_RES_UNKNOWN)
goto out_unlock; /* timeout not reached, wait again */
}
}
/* check complete or aborted */

View File

@ -2013,24 +2013,20 @@ int tcpcheck_main(struct check *check)
check->code = 0;
switch (rule->action) {
case TCPCHK_ACT_CONNECT:
/* release the previous connection (from a previous connect rule) */
/* Not the first connection, release it first */
if (cs && check->current_step != rule) {
cs_close(cs);
if (check->wait_list.events)
cs->conn->mux->unsubscribe(cs, check->wait_list.events, &check->wait_list);
/* We may have been scheduled to run, and the I/O handler
* expects to have a cs, so remove the tasklet
*/
tasklet_remove_from_tasklet_list(check->wait_list.tasklet);
cs_destroy(cs);
cs = NULL;
conn = NULL;
check->cs = NULL;
retcode = -1; /* do not reuse the fd in the caller! */
check->state |= CHK_ST_CLOSE_CONN;
retcode = -1;
}
check->current_step = rule;
/* We are still waiting the connection gets closed */
if (cs && (check->state & CHK_ST_CLOSE_CONN)) {
eval_ret = TCPCHK_EVAL_WAIT;
break;
}
eval_ret = tcpcheck_eval_connect(check, rule);
/* Refresh conn-stream and connection */