From b381a505c1010bb11abbe7b31e8d2307c4dab541 Mon Sep 17 00:00:00 2001 From: Christopher Faulet Date: Wed, 25 Nov 2020 13:47:00 +0100 Subject: [PATCH] BUG/MAJOR: tcpcheck: Allocate input and output buffers from the buffer pool Historically, the input and output buffers of a check are allocated by hand during the startup, with a specific size (not necessarily the same than other buffers). But since the recent refactoring of the checks to rely exclusively on the tcp-checks and to use the underlying mux layer, this part is totally buggy. Indeed, because these buffers are now passed to a mux, they maybe be swapped if a zero-copy is possible. In fact, for now it is only possible in h2_rcv_buf(). Thus the bug concretely only exists if a h2 health-check is performed. But, it is a latent bug for other muxes. Another problem is the size of these buffers. because it may differ for the other buffer size, it might be source of bugs. Finally, for configurations with hundreds of thousands of servers, having 2 buffers per check always allocated may be an issue. To fix the bug, we now allocate these buffers when required using the buffer pool. Thus not-running checks don't waste memory and muxes may swap them if possible. The only drawback is the check buffers have now always the same size than buffers used by the streams. This deprecates indirectly the "tune.chksize" global option. In addition, the http-check regtest have been update to perform some h2 health-checks. Many thanks to @VigneshSP94 for its help on this bug. This patch should solve the issue #936. It relies on the commit "MINOR: tcpcheck: Don't handle anymore in-progress send rules in tcpcheck_main". Both must be backport as far as 2.2. bla --- include/haproxy/check-t.h | 4 ++ include/haproxy/check.h | 3 ++ reg-tests/checks/http-check.vtc | 19 +++++++++ src/check.c | 74 +++++++++++++++++++++++++++------ src/tcpcheck.c | 36 ++++++++++++++-- 5 files changed, 120 insertions(+), 16 deletions(-) diff --git a/include/haproxy/check-t.h b/include/haproxy/check-t.h index 727f837bb..274e162b5 100644 --- a/include/haproxy/check-t.h +++ b/include/haproxy/check-t.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -49,6 +50,8 @@ enum chk_result { #define CHK_ST_PAUSED 0x0008 /* checks are paused because of maintenance (health only) */ #define CHK_ST_AGENT 0x0010 /* check is an agent check (otherwise it's a health check) */ #define CHK_ST_PORT_MISS 0x0020 /* check can't be send because no port is configured to run it */ +#define CHK_ST_IN_ALLOC 0x0040 /* check blocked waiting for input buffer allocation */ +#define CHK_ST_OUT_ALLOC 0x0080 /* check blocked waiting for output buffer allocation */ /* check status */ enum healthcheck_status { @@ -145,6 +148,7 @@ struct check { struct xprt_ops *xprt; /* transport layer operations for health checks */ struct conn_stream *cs; /* conn_stream state for health checks */ struct buffer bi, bo; /* input and output buffers to send/recv check */ + struct buffer_wait buf_wait; /* Wait list for buffer allocation */ struct task *task; /* the task associated to the health check processing, NULL if disabled */ struct timeval start; /* last health check start time */ long duration; /* time in ms took to finish last health check */ diff --git a/include/haproxy/check.h b/include/haproxy/check.h index a1bdb012d..5e55da112 100644 --- a/include/haproxy/check.h +++ b/include/haproxy/check.h @@ -40,6 +40,9 @@ void check_notify_stopping(struct check *check); void check_notify_success(struct check *check); struct task *process_chk(struct task *t, void *context, unsigned short state); +int check_buf_available(void *target); +struct buffer *check_get_buf(struct check *check, struct buffer *bptr); +void check_release_buf(struct check *check, struct buffer *bptr); const char *init_check(struct check *check, int type); void free_check(struct check *check); diff --git a/reg-tests/checks/http-check.vtc b/reg-tests/checks/http-check.vtc index 1180c3de6..9ece54b29 100644 --- a/reg-tests/checks/http-check.vtc +++ b/reg-tests/checks/http-check.vtc @@ -82,6 +82,10 @@ syslog S1 -level notice { expect ~ "[^:\\[ ]\\[${h1_pid}\\]: Health check for server be[0-9]/srv succeeded.*code: 200" recv expect ~ "[^:\\[ ]\\[${h1_pid}\\]: Health check for server be[0-9]/srv succeeded.*code: 200" + recv + expect ~ "[^:\\[ ]\\[${h1_pid}\\]: Health check for server be[0-9]/srv succeeded.*code: 200" + recv + expect ~ "[^:\\[ ]\\[${h1_pid}\\]: Health check for server be[0-9]/srv succeeded.*code: 200" } -start haproxy h1 -conf { @@ -133,6 +137,21 @@ haproxy h1 -conf { ## implicit expect rule server srv ${s1_addr}:${s1_port} check inter 100ms rise 1 fall 1 + backend be5 + log ${S1_addr}:${S1_port} len 2048 local0 + option httpchk + server srv ${h1_li1_addr}:${h1_li1_port} proto h2 check inter 100ms rise 1 fall 1 + + backend be6 + log ${S1_addr}:${S1_port} len 2048 local0 + option httpchk GET /status HTTP/1.1 + server srv ${h1_li1_addr}:${h1_li1_port} check check-proto h2 inter 100ms rise 1 fall 1 + + listen li1 + mode http + bind "fd@${li1}" proto h2 + http-request return status 200 + } -start syslog S1 -wait diff --git a/src/check.c b/src/check.c index 03966d858..5008fce84 100644 --- a/src/check.c +++ b/src/check.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -866,8 +867,6 @@ static struct task *process_chk_conn(struct task *t, void *context, unsigned sho set_server_check_status(check, HCHK_STATUS_START, NULL); check->state |= CHK_ST_INPROGRESS; - b_reset(&check->bi); - b_reset(&check->bo); task_set_affinity(t, tid_bit); @@ -936,7 +935,9 @@ static struct task *process_chk_conn(struct task *t, void *context, unsigned sho } } task_set_affinity(t, MAX_THREADS_MASK); - check->state &= ~CHK_ST_INPROGRESS; + check_release_buf(check, &check->bi); + check_release_buf(check, &check->bo); + check->state &= ~(CHK_ST_INPROGRESS|CHK_ST_IN_ALLOC|CHK_ST_OUT_ALLOC); if (check->server) { rv = 0; @@ -961,18 +962,65 @@ static struct task *process_chk_conn(struct task *t, void *context, unsigned sho /**************************************************************************/ /************************** Init/deinit checks ****************************/ /**************************************************************************/ +/* + * Tries to grab a buffer and to re-enables processing on check . The + * check flags are used to figure what buffer was requested. It returns 1 if the + * allocation succeeds, in which case the I/O tasklet is woken up, or 0 if it's + * impossible to wake up and we prefer to be woken up later. + */ +int check_buf_available(void *target) +{ + struct check *check = target; + + if ((check->state & CHK_ST_IN_ALLOC) && b_alloc_margin(&check->bi, 0)) { + check->state &= ~CHK_ST_IN_ALLOC; + tasklet_wakeup(check->wait_list.tasklet); + return 1; + } + if ((check->state & CHK_ST_OUT_ALLOC) && b_alloc_margin(&check->bo, 0)) { + check->state &= ~CHK_ST_OUT_ALLOC; + tasklet_wakeup(check->wait_list.tasklet); + return 1; + } + + return 0; +} + +/* + * Allocate a buffer. If if fails, it adds the check in buffer wait queue. + */ +struct buffer *check_get_buf(struct check *check, struct buffer *bptr) +{ + struct buffer *buf = NULL; + + if (likely(!MT_LIST_ADDED(&check->buf_wait.list)) && + unlikely((buf = b_alloc_margin(bptr, 0)) == NULL)) { + check->buf_wait.target = check; + check->buf_wait.wakeup_cb = check_buf_available; + MT_LIST_ADDQ(&buffer_wq, &check->buf_wait.list); + } + return buf; +} + +/* + * Release a buffer, if any, and try to wake up entities waiting in the buffer + * wait queue. + */ +void check_release_buf(struct check *check, struct buffer *bptr) +{ + if (bptr->size) { + b_free(bptr); + offer_buffers(check->buf_wait.target, tasks_run_queue); + } +} + const char *init_check(struct check *check, int type) { check->type = type; - b_reset(&check->bi); check->bi.size = global.tune.chksize; - b_reset(&check->bo); check->bo.size = global.tune.chksize; - - check->bi.area = calloc(check->bi.size, sizeof(*check->bi.area)); - check->bo.area = calloc(check->bo.size, sizeof(*check->bo.area)); - - if (!check->bi.area || !check->bo.area) - return "out of memory while allocating check buffer"; + check->bi = BUF_NULL; + check->bo = BUF_NULL; + MT_LIST_INIT(&check->buf_wait.list); check->wait_list.tasklet = tasklet_new(); if (!check->wait_list.tasklet) @@ -989,8 +1037,8 @@ void free_check(struct check *check) if (check->wait_list.tasklet) tasklet_free(check->wait_list.tasklet); - free(check->bi.area); - free(check->bo.area); + check_release_buf(check, &check->bi); + check_release_buf(check, &check->bo); if (check->cs) { free(check->cs->conn); check->cs->conn = NULL; diff --git a/src/tcpcheck.c b/src/tcpcheck.c index e87b9f3da..3cde23f8b 100644 --- a/src/tcpcheck.c +++ b/src/tcpcheck.c @@ -993,6 +993,10 @@ enum tcpcheck_eval_ret tcpcheck_eval_connect(struct check *check, struct tcpchec * 3: release and replace the old one on success */ + /* Always release input and output buffer when a new connect is evaluated */ + check_release_buf(check, &check->bi); + check_release_buf(check, &check->bo); + /* 2- prepare new connection */ cs = cs_new(NULL, (s ? &s->obj_type : &proxy->obj_type)); if (!cs) { @@ -1222,13 +1226,23 @@ enum tcpcheck_eval_ret tcpcheck_eval_send(struct check *check, struct tcpcheck_r struct buffer *tmp = NULL; struct htx *htx = NULL; + if (check->state & CHK_ST_OUT_ALLOC) { + ret = TCPCHK_EVAL_WAIT; + goto out; + } + + if (!check_get_buf(check, &check->bo)) { + check->state |= CHK_ST_OUT_ALLOC; + ret = TCPCHK_EVAL_WAIT; + goto out; + } + /* Data already pending in the output buffer, send them now */ if (b_data(&check->bo)) goto do_send; - /* reset the read & write buffer */ - b_reset(&check->bi); - b_reset(&check->bo); + /* Always release input buffer when a new send is evaluated */ + check_release_buf(check, &check->bi); switch (send->type) { case TCPCHK_SEND_STRING: @@ -1372,6 +1386,8 @@ enum tcpcheck_eval_ret tcpcheck_eval_send(struct check *check, struct tcpcheck_r out: free_trash_chunk(tmp); + if (!b_data(&check->bo) || ret == TCPCHK_EVAL_STOP) + check_release_buf(check, &check->bo); return ret; error_htx: @@ -1414,6 +1430,14 @@ enum tcpcheck_eval_ret tcpcheck_eval_recv(struct check *check, struct tcpcheck_r if (cs->flags & CS_FL_EOS) goto end_recv; + if (check->state & CHK_ST_IN_ALLOC) + goto wait_more_data; + + if (!check_get_buf(check, &check->bi)) { + check->state |= CHK_ST_IN_ALLOC; + goto wait_more_data; + } + /* errors on the connection and the conn-stream were already checked */ /* prepare to detect if the mux needs more room */ @@ -1461,6 +1485,8 @@ enum tcpcheck_eval_ret tcpcheck_eval_recv(struct check *check, struct tcpcheck_r } out: + if (!b_data(&check->bi) || ret == TCPCHK_EVAL_STOP) + check_release_buf(check, &check->bi); return ret; stop: @@ -2115,6 +2141,10 @@ int tcpcheck_main(struct check *check) if ((conn && conn->flags & CO_FL_ERROR) || (cs && cs->flags & CS_FL_ERROR)) chk_report_conn_err(check, errno, 0); + /* the tcpcheck is finished, release in/out buffer now */ + check_release_buf(check, &check->bi); + check_release_buf(check, &check->bo); + out: return retcode; }