mirror of
http://git.haproxy.org/git/haproxy.git/
synced 2025-01-10 07:49:54 +00:00
BUG/MEDIUM: checks: fix a long-standing issue with reporting connection errors
In 1.5-dev14 we fixed a bug induced by the new connection system which caused handshake failures not to be reported in health checks. It was done with commit6c560da
(BUG/MEDIUM: checks: report handshake failures). This fix caused another issue which is that every check getting a TCP RST after a valid response was flagged as error. This was fixed using commitc5c61fc
(BUG/MEDIUM: checks: ignore late resets after valid responses). But because of this, we completely miss the status report. These two fixes only set the check result as failed and did not call set_server_check_status() to pass the information to upper layers. The impact is that some failed checks are reported as INI or are simply not updated if they happen fast enough (eg: TCP RST in response to connect() without data in a pure TCP check). So the server appears down but the check status says "L4OK". After commit6c560da
, the handshake failures have been correctly dealt with and every error causes process_chk() to be called with the appropriate information still present on the socket. So let's get the error code in process_chk() instead and stop mangling it in wake_srv_chk(). Now both L4 and L6 checks are correctly reported. This bug was first introduced in 1.5-dev12 so no backport is needed.
This commit is contained in:
parent
2f075e98fb
commit
02b0f58c43
24
src/checks.c
24
src/checks.c
@ -1272,9 +1272,11 @@ static int wake_srv_chk(struct connection *conn)
|
||||
struct check *check = conn->owner;
|
||||
|
||||
if (unlikely(conn->flags & CO_FL_ERROR)) {
|
||||
/* Note that we might as well have been woken up by a handshake handler */
|
||||
if (check->result == SRV_CHK_UNKNOWN)
|
||||
check->result |= SRV_CHK_FAILED;
|
||||
/* We may get error reports bypassing the I/O handlers, typically
|
||||
* the case when sending a pure TCP check which fails, then the I/O
|
||||
* handlers above are not called. This is completely handled by the
|
||||
* main processing task so let's simply wake it up.
|
||||
*/
|
||||
__conn_data_stop_both(conn);
|
||||
task_wakeup(check->task, TASK_WOKEN_IO);
|
||||
}
|
||||
@ -1465,17 +1467,29 @@ static struct task *process_chk(struct task *t)
|
||||
* which can happen on connect timeout or error.
|
||||
*/
|
||||
if (s->check.result == SRV_CHK_UNKNOWN) {
|
||||
int skerr, err = errno;
|
||||
socklen_t lskerr = sizeof(skerr);
|
||||
|
||||
if (conn->ctrl) {
|
||||
if (getsockopt(conn->t.sock.fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr) == 0 && skerr)
|
||||
err = skerr;
|
||||
}
|
||||
|
||||
/* eagain is normal on a timeout */
|
||||
if (err == EAGAIN)
|
||||
err = 0;
|
||||
|
||||
if ((conn->flags & (CO_FL_CONNECTED|CO_FL_WAIT_L4_CONN)) == CO_FL_WAIT_L4_CONN) {
|
||||
/* L4 not established (yet) */
|
||||
if (conn->flags & CO_FL_ERROR)
|
||||
set_server_check_status(check, HCHK_STATUS_L4CON, NULL);
|
||||
set_server_check_status(check, HCHK_STATUS_L4CON, err ? strerror(err) : NULL);
|
||||
else if (expired)
|
||||
set_server_check_status(check, HCHK_STATUS_L4TOUT, NULL);
|
||||
}
|
||||
else if ((conn->flags & (CO_FL_CONNECTED|CO_FL_WAIT_L6_CONN)) == CO_FL_WAIT_L6_CONN) {
|
||||
/* L6 not established (yet) */
|
||||
if (conn->flags & CO_FL_ERROR)
|
||||
set_server_check_status(check, HCHK_STATUS_L6RSP, NULL);
|
||||
set_server_check_status(check, HCHK_STATUS_L6RSP, err ? strerror(err) : NULL);
|
||||
else if (expired)
|
||||
set_server_check_status(check, HCHK_STATUS_L6TOUT, NULL);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user