From c8dc20a825644bb4003ecb62e0eb2d20c8eaf6c8 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Fri, 27 Dec 2019 12:03:27 +0100 Subject: [PATCH] BUG/MINOR: checks: refine which errno values are really errors. Two regtest regularly fail in a random fashion depending on the machine's load (one could really wonder if it's really worth keeping such unreproducible tests) : - tcp-check_multiple_ports.vtc - 4be_1srv_smtpchk_httpchk_layer47errors.vtc It happens that one of the reason is the time it takes to connect to the local socket (hence the load-dependent aspect): if connect() on the loopback returns EINPROGRESS then this status is reported instead of a real error. Normally such a test is expected to see the error cleaned by tcp_connect_probe() but it really depends on the timing and instead we may very well send() first and see this error. The problem is that everything is collected based on errno, hoping it won't get molested in the way from the last unsuccesful syscall to wake_srv_chk(), which obviously is hard to guarantee. This patch at least makes sure that a few non-errors are reported as zero just like EAGAIN. It doesn't fix the root cause but makes it less likely to report incorrect failures. This fix could be backported as far as 1.9. --- src/checks.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/src/checks.c b/src/checks.c index aa93bab48..0cd844ccb 100644 --- a/src/checks.c +++ b/src/checks.c @@ -137,6 +137,17 @@ static const struct analyze_status analyze_statuses[HANA_STATUS_SIZE] = { /* 0: [HANA_STATUS_HTTP_BROKEN_PIPE] = { "Close from server (http)", { 0, 1 }}, }; +/* checks if is a real error for errno or one that can be ignored, and + * return 0 for these ones or for real ones. + */ +static inline int unclean_errno(int err) +{ + if (err == EAGAIN || err == EINPROGRESS || + err == EISCONN || err == EALREADY) + return 0; + return err; +} + /* * Convert check_status code to description */ @@ -548,7 +559,7 @@ static int retrieve_errno_from_socket(struct connection *conn) int skerr; socklen_t lskerr = sizeof(skerr); - if (conn->flags & CO_FL_ERROR && ((errno && errno != EAGAIN) || !conn->ctrl)) + if (conn->flags & CO_FL_ERROR && (unclean_errno(errno) || !conn->ctrl)) return 1; if (!conn_ctrl_ready(conn)) @@ -557,8 +568,7 @@ static int retrieve_errno_from_socket(struct connection *conn) if (getsockopt(conn->handle.fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr) == 0) errno = skerr; - if (errno == EAGAIN) - errno = 0; + errno = unclean_errno(errno); if (!errno) { /* we could not retrieve an error, that does not mean there is @@ -599,8 +609,8 @@ static void chk_report_conn_err(struct check *check, int errno_bck, int expired) if (check->result != CHK_RES_UNKNOWN) return; - errno = errno_bck; - if (conn && (!errno || errno == EAGAIN)) + errno = unclean_errno(errno_bck); + if (conn && errno) retrieve_errno_from_socket(conn); if (conn && !(conn->flags & CO_FL_ERROR) && @@ -644,7 +654,7 @@ static void chk_report_conn_err(struct check *check, int errno_bck, int expired) } if (conn && conn->err_code) { - if (errno && errno != EAGAIN) + if (unclean_errno(errno)) chunk_printf(&trash, "%s (%s)%s", conn_err_code_str(conn), strerror(errno), chk->area); else @@ -653,7 +663,7 @@ static void chk_report_conn_err(struct check *check, int errno_bck, int expired) err_msg = trash.area; } else { - if (errno && errno != EAGAIN) { + if (unclean_errno(errno)) { chunk_printf(&trash, "%s%s", strerror(errno), chk->area); err_msg = trash.area;