BUG/MAJOR: tcpcheck: Allocate input and output buffers from the buffer pool

Historically, the input and output buffers of a check are allocated by hand
during the startup, with a specific size (not necessarily the same than
other buffers). But since the recent refactoring of the checks to rely
exclusively on the tcp-checks and to use the underlying mux layer, this part
is totally buggy. Indeed, because these buffers are now passed to a mux,
they maybe be swapped if a zero-copy is possible. In fact, for now it is
only possible in h2_rcv_buf(). Thus the bug concretely only exists if a h2
health-check is performed. But, it is a latent bug for other muxes.

Another problem is the size of these buffers. because it may differ for the
other buffer size, it might be source of bugs.

Finally, for configurations with hundreds of thousands of servers, having 2
buffers per check always allocated may be an issue.

To fix the bug, we now allocate these buffers when required using the buffer
pool. Thus not-running checks don't waste memory and muxes may swap them if
possible. The only drawback is the check buffers have now always the same
size than buffers used by the streams. This deprecates indirectly the
"tune.chksize" global option.

In addition, the http-check regtest have been update to perform some h2
health-checks.

Many thanks to @VigneshSP94 for its help on this bug.

This patch should solve the issue #936. It relies on the commit "MINOR:
tcpcheck: Don't handle anymore in-progress send rules in tcpcheck_main".
Both must be backport as far as 2.2.

bla
This commit is contained in:
Christopher Faulet 2020-11-25 13:47:00 +01:00
parent 39066c2738
commit b381a505c1
5 changed files with 120 additions and 16 deletions

View File

@ -20,6 +20,7 @@
#include <haproxy/api-t.h> #include <haproxy/api-t.h>
#include <haproxy/buf-t.h> #include <haproxy/buf-t.h>
#include <haproxy/connection-t.h> #include <haproxy/connection-t.h>
#include <haproxy/dynbuf-t.h>
#include <haproxy/obj_type-t.h> #include <haproxy/obj_type-t.h>
#include <haproxy/vars-t.h> #include <haproxy/vars-t.h>
@ -49,6 +50,8 @@ enum chk_result {
#define CHK_ST_PAUSED 0x0008 /* checks are paused because of maintenance (health only) */ #define CHK_ST_PAUSED 0x0008 /* checks are paused because of maintenance (health only) */
#define CHK_ST_AGENT 0x0010 /* check is an agent check (otherwise it's a health check) */ #define CHK_ST_AGENT 0x0010 /* check is an agent check (otherwise it's a health check) */
#define CHK_ST_PORT_MISS 0x0020 /* check can't be send because no port is configured to run it */ #define CHK_ST_PORT_MISS 0x0020 /* check can't be send because no port is configured to run it */
#define CHK_ST_IN_ALLOC 0x0040 /* check blocked waiting for input buffer allocation */
#define CHK_ST_OUT_ALLOC 0x0080 /* check blocked waiting for output buffer allocation */
/* check status */ /* check status */
enum healthcheck_status { enum healthcheck_status {
@ -145,6 +148,7 @@ struct check {
struct xprt_ops *xprt; /* transport layer operations for health checks */ struct xprt_ops *xprt; /* transport layer operations for health checks */
struct conn_stream *cs; /* conn_stream state for health checks */ struct conn_stream *cs; /* conn_stream state for health checks */
struct buffer bi, bo; /* input and output buffers to send/recv check */ struct buffer bi, bo; /* input and output buffers to send/recv check */
struct buffer_wait buf_wait; /* Wait list for buffer allocation */
struct task *task; /* the task associated to the health check processing, NULL if disabled */ struct task *task; /* the task associated to the health check processing, NULL if disabled */
struct timeval start; /* last health check start time */ struct timeval start; /* last health check start time */
long duration; /* time in ms took to finish last health check */ long duration; /* time in ms took to finish last health check */

View File

@ -40,6 +40,9 @@ void check_notify_stopping(struct check *check);
void check_notify_success(struct check *check); void check_notify_success(struct check *check);
struct task *process_chk(struct task *t, void *context, unsigned short state); struct task *process_chk(struct task *t, void *context, unsigned short state);
int check_buf_available(void *target);
struct buffer *check_get_buf(struct check *check, struct buffer *bptr);
void check_release_buf(struct check *check, struct buffer *bptr);
const char *init_check(struct check *check, int type); const char *init_check(struct check *check, int type);
void free_check(struct check *check); void free_check(struct check *check);

View File

@ -82,6 +82,10 @@ syslog S1 -level notice {
expect ~ "[^:\\[ ]\\[${h1_pid}\\]: Health check for server be[0-9]/srv succeeded.*code: 200" expect ~ "[^:\\[ ]\\[${h1_pid}\\]: Health check for server be[0-9]/srv succeeded.*code: 200"
recv recv
expect ~ "[^:\\[ ]\\[${h1_pid}\\]: Health check for server be[0-9]/srv succeeded.*code: 200" expect ~ "[^:\\[ ]\\[${h1_pid}\\]: Health check for server be[0-9]/srv succeeded.*code: 200"
recv
expect ~ "[^:\\[ ]\\[${h1_pid}\\]: Health check for server be[0-9]/srv succeeded.*code: 200"
recv
expect ~ "[^:\\[ ]\\[${h1_pid}\\]: Health check for server be[0-9]/srv succeeded.*code: 200"
} -start } -start
haproxy h1 -conf { haproxy h1 -conf {
@ -133,6 +137,21 @@ haproxy h1 -conf {
## implicit expect rule ## implicit expect rule
server srv ${s1_addr}:${s1_port} check inter 100ms rise 1 fall 1 server srv ${s1_addr}:${s1_port} check inter 100ms rise 1 fall 1
backend be5
log ${S1_addr}:${S1_port} len 2048 local0
option httpchk
server srv ${h1_li1_addr}:${h1_li1_port} proto h2 check inter 100ms rise 1 fall 1
backend be6
log ${S1_addr}:${S1_port} len 2048 local0
option httpchk GET /status HTTP/1.1
server srv ${h1_li1_addr}:${h1_li1_port} check check-proto h2 inter 100ms rise 1 fall 1
listen li1
mode http
bind "fd@${li1}" proto h2
http-request return status 200
} -start } -start
syslog S1 -wait syslog S1 -wait

View File

@ -37,6 +37,7 @@
#include <haproxy/chunk.h> #include <haproxy/chunk.h>
#include <haproxy/dgram.h> #include <haproxy/dgram.h>
#include <haproxy/dns.h> #include <haproxy/dns.h>
#include <haproxy/dynbuf-t.h>
#include <haproxy/extcheck.h> #include <haproxy/extcheck.h>
#include <haproxy/fd.h> #include <haproxy/fd.h>
#include <haproxy/global.h> #include <haproxy/global.h>
@ -866,8 +867,6 @@ static struct task *process_chk_conn(struct task *t, void *context, unsigned sho
set_server_check_status(check, HCHK_STATUS_START, NULL); set_server_check_status(check, HCHK_STATUS_START, NULL);
check->state |= CHK_ST_INPROGRESS; check->state |= CHK_ST_INPROGRESS;
b_reset(&check->bi);
b_reset(&check->bo);
task_set_affinity(t, tid_bit); task_set_affinity(t, tid_bit);
@ -936,7 +935,9 @@ static struct task *process_chk_conn(struct task *t, void *context, unsigned sho
} }
} }
task_set_affinity(t, MAX_THREADS_MASK); task_set_affinity(t, MAX_THREADS_MASK);
check->state &= ~CHK_ST_INPROGRESS; check_release_buf(check, &check->bi);
check_release_buf(check, &check->bo);
check->state &= ~(CHK_ST_INPROGRESS|CHK_ST_IN_ALLOC|CHK_ST_OUT_ALLOC);
if (check->server) { if (check->server) {
rv = 0; rv = 0;
@ -961,18 +962,65 @@ static struct task *process_chk_conn(struct task *t, void *context, unsigned sho
/**************************************************************************/ /**************************************************************************/
/************************** Init/deinit checks ****************************/ /************************** Init/deinit checks ****************************/
/**************************************************************************/ /**************************************************************************/
/*
* Tries to grab a buffer and to re-enables processing on check <target>. The
* check flags are used to figure what buffer was requested. It returns 1 if the
* allocation succeeds, in which case the I/O tasklet is woken up, or 0 if it's
* impossible to wake up and we prefer to be woken up later.
*/
int check_buf_available(void *target)
{
struct check *check = target;
if ((check->state & CHK_ST_IN_ALLOC) && b_alloc_margin(&check->bi, 0)) {
check->state &= ~CHK_ST_IN_ALLOC;
tasklet_wakeup(check->wait_list.tasklet);
return 1;
}
if ((check->state & CHK_ST_OUT_ALLOC) && b_alloc_margin(&check->bo, 0)) {
check->state &= ~CHK_ST_OUT_ALLOC;
tasklet_wakeup(check->wait_list.tasklet);
return 1;
}
return 0;
}
/*
* Allocate a buffer. If if fails, it adds the check in buffer wait queue.
*/
struct buffer *check_get_buf(struct check *check, struct buffer *bptr)
{
struct buffer *buf = NULL;
if (likely(!MT_LIST_ADDED(&check->buf_wait.list)) &&
unlikely((buf = b_alloc_margin(bptr, 0)) == NULL)) {
check->buf_wait.target = check;
check->buf_wait.wakeup_cb = check_buf_available;
MT_LIST_ADDQ(&buffer_wq, &check->buf_wait.list);
}
return buf;
}
/*
* Release a buffer, if any, and try to wake up entities waiting in the buffer
* wait queue.
*/
void check_release_buf(struct check *check, struct buffer *bptr)
{
if (bptr->size) {
b_free(bptr);
offer_buffers(check->buf_wait.target, tasks_run_queue);
}
}
const char *init_check(struct check *check, int type) const char *init_check(struct check *check, int type)
{ {
check->type = type; check->type = type;
b_reset(&check->bi); check->bi.size = global.tune.chksize; check->bi = BUF_NULL;
b_reset(&check->bo); check->bo.size = global.tune.chksize; check->bo = BUF_NULL;
MT_LIST_INIT(&check->buf_wait.list);
check->bi.area = calloc(check->bi.size, sizeof(*check->bi.area));
check->bo.area = calloc(check->bo.size, sizeof(*check->bo.area));
if (!check->bi.area || !check->bo.area)
return "out of memory while allocating check buffer";
check->wait_list.tasklet = tasklet_new(); check->wait_list.tasklet = tasklet_new();
if (!check->wait_list.tasklet) if (!check->wait_list.tasklet)
@ -989,8 +1037,8 @@ void free_check(struct check *check)
if (check->wait_list.tasklet) if (check->wait_list.tasklet)
tasklet_free(check->wait_list.tasklet); tasklet_free(check->wait_list.tasklet);
free(check->bi.area); check_release_buf(check, &check->bi);
free(check->bo.area); check_release_buf(check, &check->bo);
if (check->cs) { if (check->cs) {
free(check->cs->conn); free(check->cs->conn);
check->cs->conn = NULL; check->cs->conn = NULL;

View File

@ -993,6 +993,10 @@ enum tcpcheck_eval_ret tcpcheck_eval_connect(struct check *check, struct tcpchec
* 3: release and replace the old one on success * 3: release and replace the old one on success
*/ */
/* Always release input and output buffer when a new connect is evaluated */
check_release_buf(check, &check->bi);
check_release_buf(check, &check->bo);
/* 2- prepare new connection */ /* 2- prepare new connection */
cs = cs_new(NULL, (s ? &s->obj_type : &proxy->obj_type)); cs = cs_new(NULL, (s ? &s->obj_type : &proxy->obj_type));
if (!cs) { if (!cs) {
@ -1222,13 +1226,23 @@ enum tcpcheck_eval_ret tcpcheck_eval_send(struct check *check, struct tcpcheck_r
struct buffer *tmp = NULL; struct buffer *tmp = NULL;
struct htx *htx = NULL; struct htx *htx = NULL;
if (check->state & CHK_ST_OUT_ALLOC) {
ret = TCPCHK_EVAL_WAIT;
goto out;
}
if (!check_get_buf(check, &check->bo)) {
check->state |= CHK_ST_OUT_ALLOC;
ret = TCPCHK_EVAL_WAIT;
goto out;
}
/* Data already pending in the output buffer, send them now */ /* Data already pending in the output buffer, send them now */
if (b_data(&check->bo)) if (b_data(&check->bo))
goto do_send; goto do_send;
/* reset the read & write buffer */ /* Always release input buffer when a new send is evaluated */
b_reset(&check->bi); check_release_buf(check, &check->bi);
b_reset(&check->bo);
switch (send->type) { switch (send->type) {
case TCPCHK_SEND_STRING: case TCPCHK_SEND_STRING:
@ -1372,6 +1386,8 @@ enum tcpcheck_eval_ret tcpcheck_eval_send(struct check *check, struct tcpcheck_r
out: out:
free_trash_chunk(tmp); free_trash_chunk(tmp);
if (!b_data(&check->bo) || ret == TCPCHK_EVAL_STOP)
check_release_buf(check, &check->bo);
return ret; return ret;
error_htx: error_htx:
@ -1414,6 +1430,14 @@ enum tcpcheck_eval_ret tcpcheck_eval_recv(struct check *check, struct tcpcheck_r
if (cs->flags & CS_FL_EOS) if (cs->flags & CS_FL_EOS)
goto end_recv; goto end_recv;
if (check->state & CHK_ST_IN_ALLOC)
goto wait_more_data;
if (!check_get_buf(check, &check->bi)) {
check->state |= CHK_ST_IN_ALLOC;
goto wait_more_data;
}
/* errors on the connection and the conn-stream were already checked */ /* errors on the connection and the conn-stream were already checked */
/* prepare to detect if the mux needs more room */ /* prepare to detect if the mux needs more room */
@ -1461,6 +1485,8 @@ enum tcpcheck_eval_ret tcpcheck_eval_recv(struct check *check, struct tcpcheck_r
} }
out: out:
if (!b_data(&check->bi) || ret == TCPCHK_EVAL_STOP)
check_release_buf(check, &check->bi);
return ret; return ret;
stop: stop:
@ -2115,6 +2141,10 @@ int tcpcheck_main(struct check *check)
if ((conn && conn->flags & CO_FL_ERROR) || (cs && cs->flags & CS_FL_ERROR)) if ((conn && conn->flags & CO_FL_ERROR) || (cs && cs->flags & CS_FL_ERROR))
chk_report_conn_err(check, errno, 0); chk_report_conn_err(check, errno, 0);
/* the tcpcheck is finished, release in/out buffer now */
check_release_buf(check, &check->bi);
check_release_buf(check, &check->bo);
out: out:
return retcode; return retcode;
} }