From f12a20ebce8973537c42c600324cce7c47bd5359 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Wed, 4 Dec 2013 16:11:04 +0100 Subject: [PATCH] BUG/MINOR: tcp: check that no error is pending during a connect probe The tcp_connect_probe() function may be called upon I/O activity when no recv/send callbacks were called (eg: recv not possible, nothing to send). It only relies on connect() to observe the connection establishment progress but that does not work when some network errors are pending on the socket (eg: a delayed connection refused). For this reason we need to run a getsockopt() in the case where the poller reports FD_POLL_ERR on the socket. We use this opportunity to update errno so that the conn->data->wake() function has all relevant info when it sees CO_FL_ERROR. At the moment no code is impacted by this bug because recv polling is always enabled during a connect, so recvfrom() always sees the error first. But this may change with the health check cleanup. No backport is needed. --- src/proto_tcp.c | 45 ++++++++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/src/proto_tcp.c b/src/proto_tcp.c index 17a100ad5..21b17715c 100644 --- a/src/proto_tcp.c +++ b/src/proto_tcp.c @@ -552,15 +552,25 @@ int tcp_drain(int fd) } /* This is the callback which is set when a connection establishment is pending - * and we have nothing to send, or if we have an init function we want to call - * once the connection is established. It updates the FD polling status. It - * returns 0 if it fails in a fatal way or needs to poll to go further, otherwise - * it returns non-zero and removes itself from the connection's flags (the bit is - * provided in by the caller). + * and we have nothing to send. It updates the FD polling status. It returns 0 + * if it fails in a fatal way or needs to poll to go further, otherwise it + * returns non-zero and removes the CO_FL_WAIT_L4_CONN flag from the connection's + * flags. In case of error, it sets CO_FL_ERROR and leaves the error code in + * errno. The error checking is done in two passes in order to limit the number + * of syscalls in the normal case : + * - if POLL_ERR was reported by the poller, we check for a pending error on + * the socket before proceeding. If found, it's assigned to errno so that + * upper layers can see it. + * - otherwise connect() is used to check the connection state again, since + * the getsockopt return cannot reliably be used to know if the connection + * is still pending or ready. This one may often return an error as well, + * since we don't always have POLL_ERR (eg: OSX or cached events). */ int tcp_connect_probe(struct connection *conn) { int fd = conn->t.sock.fd; + socklen_t lskerr; + int skerr; if (conn->flags & CO_FL_ERROR) return 0; @@ -568,15 +578,24 @@ int tcp_connect_probe(struct connection *conn) if (!(conn->flags & CO_FL_WAIT_L4_CONN)) return 1; /* strange we were called while ready */ - /* stop here if we reached the end of data */ - if ((fdtab[fd].ev & (FD_POLL_IN|FD_POLL_HUP)) == FD_POLL_HUP) - goto out_error; + /* we might be the first witness of FD_POLL_ERR. Note that FD_POLL_HUP + * without FD_POLL_IN also indicates a hangup without input data meaning + * there was no connection. + */ + if (fdtab[fd].ev & FD_POLL_ERR || + (fdtab[fd].ev & (FD_POLL_IN|FD_POLL_HUP)) == FD_POLL_HUP) { + skerr = 0; + lskerr = sizeof(skerr); + getsockopt(fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr); + errno = skerr; + if (errno == EAGAIN) + errno = 0; + if (errno) + goto out_error; + } - /* We have no data to send to check the connection, and - * getsockopt() will not inform us whether the connection - * is still pending. So we'll reuse connect() to check the - * state of the socket. This has the advantage of giving us - * the following info : + /* Use connect() to check the state of the socket. This has the + * advantage of giving us the following info : * - error * - connecting (EALREADY, EINPROGRESS) * - connected (EISCONN, 0)