MAJOR: fd: remove the need for the socket layer to recheck the connection
Up to now, if an outgoing connection had no data to send, the socket layer had to perform a connect() again to check for establishment. This is not acceptable for SSL, and will cause problems with socketpair(). Some socket layers will also need an initializer before sending data (eg: SSL). The solution consists in moving the connect() test to the protocol layer (eg: TCP) and to make it hold the fd->write callback until the connection is validated. At this point, it will switch the write callback to the socket layer's write function. In fact we need to hold both read and write callbacks to ensure the socket layer is never called before being initialized. This intermediate callback is used only if there is a socket init function or if there are no data to send. The socket layer does not have any code to check for connection establishment anymore, which makes sense.
This commit is contained in:
parent
d02394b5a1
commit
eeda90e68c
133
src/proto_tcp.c
133
src/proto_tcp.c
|
@ -60,6 +60,8 @@
|
|||
|
||||
static int tcp_bind_listeners(struct protocol *proto, char *errmsg, int errlen);
|
||||
static int tcp_bind_listener(struct listener *listener, char *errmsg, int errlen);
|
||||
static int tcp_connect_write(int fd);
|
||||
static int tcp_connect_read(int fd);
|
||||
|
||||
/* Note: must not be declared <const> as its list will be overwritten */
|
||||
static struct protocol proto_tcpv4 = {
|
||||
|
@ -449,11 +451,22 @@ int tcp_connect_server(struct stream_interface *si)
|
|||
fdtab[fd].owner = si;
|
||||
fdtab[fd].state = FD_STCONN; /* connection in progress */
|
||||
fdtab[fd].flags = FD_FL_TCP | FD_FL_TCP_NODELAY;
|
||||
fdtab[fd].cb[DIR_RD].f = si->sock.read;
|
||||
fdtab[fd].cb[DIR_RD].b = si->ib;
|
||||
fdtab[fd].cb[DIR_WR].f = si->sock.write;
|
||||
fdtab[fd].cb[DIR_WR].b = si->ob;
|
||||
|
||||
/* If we have nothing to send or if we want to initialize the sock layer,
|
||||
* we want to confirm that the TCP connection is established before doing
|
||||
* so, so we use our own write callback then switch to the sock layer.
|
||||
*/
|
||||
if (si->sock.init || ((si->ob->flags & BF_OUT_EMPTY) && !si->send_proxy_ofs)) {
|
||||
fdtab[fd].cb[DIR_RD].f = tcp_connect_read;
|
||||
fdtab[fd].cb[DIR_WR].f = tcp_connect_write;
|
||||
}
|
||||
else {
|
||||
fdtab[fd].cb[DIR_RD].f = si->sock.read;
|
||||
fdtab[fd].cb[DIR_WR].f = si->sock.write;
|
||||
}
|
||||
|
||||
fdinfo[fd].peeraddr = (struct sockaddr *)&si->addr.to;
|
||||
fdinfo[fd].peerlen = get_addr_len(&si->addr.to);
|
||||
|
||||
|
@ -502,6 +515,122 @@ int tcp_get_dst(int fd, struct sockaddr *sa, socklen_t salen, int dir)
|
|||
return getsockname(fd, sa, &salen);
|
||||
}
|
||||
|
||||
/* This is the callback which is set when a connection establishment is pending
|
||||
* and we have nothing to send, or if we have an init function we want to call
|
||||
* once the connection is established.
|
||||
*/
|
||||
static int tcp_connect_write(int fd)
|
||||
{
|
||||
struct stream_interface *si = fdtab[fd].owner;
|
||||
struct buffer *b = si->ob;
|
||||
int retval = 1;
|
||||
|
||||
if (fdtab[fd].state == FD_STERROR)
|
||||
goto out_error;
|
||||
|
||||
if (fdtab[fd].state != FD_STCONN) {
|
||||
retval = 0;
|
||||
goto out_ignore; /* strange we were called while ready */
|
||||
}
|
||||
|
||||
/* we might have been called just after an asynchronous shutw */
|
||||
if (b->flags & BF_SHUTW)
|
||||
goto out_wakeup;
|
||||
|
||||
/* We have no data to send to check the connection, and
|
||||
* getsockopt() will not inform us whether the connection
|
||||
* is still pending. So we'll reuse connect() to check the
|
||||
* state of the socket. This has the advantage of giving us
|
||||
* the following info :
|
||||
* - error
|
||||
* - connecting (EALREADY, EINPROGRESS)
|
||||
* - connected (EISCONN, 0)
|
||||
*/
|
||||
if ((connect(fd, fdinfo[fd].peeraddr, fdinfo[fd].peerlen) == 0))
|
||||
errno = 0;
|
||||
|
||||
if (errno == EALREADY || errno == EINPROGRESS) {
|
||||
retval = 0;
|
||||
goto out_ignore;
|
||||
}
|
||||
|
||||
if (errno && errno != EISCONN)
|
||||
goto out_error;
|
||||
|
||||
/* OK we just need to indicate that we got a connection
|
||||
* and that we wrote nothing.
|
||||
*/
|
||||
b->flags |= BF_WRITE_NULL;
|
||||
|
||||
/* The FD is ready now, we can hand the handlers to the socket layer */
|
||||
fdtab[fd].cb[DIR_RD].f = si->sock.read;
|
||||
fdtab[fd].cb[DIR_WR].f = si->sock.write;
|
||||
fdtab[fd].state = FD_STREADY;
|
||||
|
||||
out_wakeup:
|
||||
task_wakeup(si->owner, TASK_WOKEN_IO);
|
||||
|
||||
out_ignore:
|
||||
fdtab[fd].ev &= ~FD_POLL_OUT;
|
||||
return retval;
|
||||
|
||||
out_error:
|
||||
/* Write error on the file descriptor. We mark the FD as STERROR so
|
||||
* that we don't use it anymore. The error is reported to the stream
|
||||
* interface which will take proper action. We must not perturbate the
|
||||
* buffer because the stream interface wants to ensure transparent
|
||||
* connection retries.
|
||||
*/
|
||||
|
||||
fdtab[fd].state = FD_STERROR;
|
||||
fdtab[fd].ev &= ~FD_POLL_STICKY;
|
||||
EV_FD_REM(fd);
|
||||
si->flags |= SI_FL_ERR;
|
||||
goto out_wakeup;
|
||||
}
|
||||
|
||||
|
||||
/* might be used on connect error */
|
||||
static int tcp_connect_read(int fd)
|
||||
{
|
||||
struct stream_interface *si = fdtab[fd].owner;
|
||||
int retval;
|
||||
|
||||
retval = 1;
|
||||
|
||||
if (fdtab[fd].state == FD_STERROR)
|
||||
goto out_error;
|
||||
|
||||
if (fdtab[fd].state != FD_STCONN) {
|
||||
retval = 0;
|
||||
goto out_ignore; /* strange we were called while ready */
|
||||
}
|
||||
|
||||
/* stop here if we reached the end of data */
|
||||
if ((fdtab[fd].ev & (FD_POLL_IN|FD_POLL_HUP)) == FD_POLL_HUP)
|
||||
goto out_error;
|
||||
|
||||
out_wakeup:
|
||||
task_wakeup(si->owner, TASK_WOKEN_IO);
|
||||
out_ignore:
|
||||
fdtab[fd].ev &= ~FD_POLL_IN;
|
||||
return retval;
|
||||
|
||||
out_error:
|
||||
/* Read error on the file descriptor. We mark the FD as STERROR so
|
||||
* that we don't use it anymore. The error is reported to the stream
|
||||
* interface which will take proper action. We must not perturbate the
|
||||
* buffer because the stream interface wants to ensure transparent
|
||||
* connection retries.
|
||||
*/
|
||||
|
||||
fdtab[fd].state = FD_STERROR;
|
||||
fdtab[fd].ev &= ~FD_POLL_STICKY;
|
||||
EV_FD_REM(fd);
|
||||
si->flags |= SI_FL_ERR;
|
||||
goto out_wakeup;
|
||||
}
|
||||
|
||||
|
||||
/* This function tries to bind a TCPv4/v6 listener. It may return a warning or
|
||||
* an error message in <err> if the message is at most <errlen> bytes long
|
||||
|
|
|
@ -714,51 +714,11 @@ static int sock_raw_write(int fd)
|
|||
if (b->flags & BF_SHUTW)
|
||||
goto out_wakeup;
|
||||
|
||||
if (likely(!(b->flags & BF_OUT_EMPTY) || si->send_proxy_ofs)) {
|
||||
/* OK there are data waiting to be sent */
|
||||
retval = sock_raw_write_loop(si, b);
|
||||
if (retval < 0)
|
||||
goto out_error;
|
||||
else if (retval == 0 && si->send_proxy_ofs)
|
||||
goto out_may_wakeup; /* we failed to send the PROXY string */
|
||||
}
|
||||
else {
|
||||
/* may be we have received a connection acknowledgement in TCP mode without data */
|
||||
if (likely(fdtab[fd].state == FD_STCONN)) {
|
||||
/* We have no data to send to check the connection, and
|
||||
* getsockopt() will not inform us whether the connection
|
||||
* is still pending. So we'll reuse connect() to check the
|
||||
* state of the socket. This has the advantage of givig us
|
||||
* the following info :
|
||||
* - error
|
||||
* - connecting (EALREADY, EINPROGRESS)
|
||||
* - connected (EISCONN, 0)
|
||||
*/
|
||||
if ((connect(fd, fdinfo[fd].peeraddr, fdinfo[fd].peerlen) == 0))
|
||||
errno = 0;
|
||||
|
||||
if (errno == EALREADY || errno == EINPROGRESS) {
|
||||
retval = 0;
|
||||
goto out_may_wakeup;
|
||||
}
|
||||
|
||||
if (errno && errno != EISCONN)
|
||||
goto out_error;
|
||||
|
||||
/* OK we just need to indicate that we got a connection
|
||||
* and that we wrote nothing.
|
||||
*/
|
||||
b->flags |= BF_WRITE_NULL;
|
||||
fdtab[fd].state = FD_STREADY;
|
||||
}
|
||||
|
||||
/* Funny, we were called to write something but there wasn't
|
||||
* anything. We can get there, for example if we were woken up
|
||||
* on a write event to finish the splice, but the ->o is 0
|
||||
* so we cannot write anything from the buffer. Let's disable
|
||||
* the write event and pretend we never came there.
|
||||
*/
|
||||
}
|
||||
retval = sock_raw_write_loop(si, b);
|
||||
if (retval < 0)
|
||||
goto out_error;
|
||||
else if (retval == 0 && si->send_proxy_ofs)
|
||||
goto out_may_wakeup; /* we failed to send the PROXY string */
|
||||
|
||||
if (b->flags & BF_OUT_EMPTY) {
|
||||
/* the connection is established but we can't write. Either the
|
||||
|
|
Loading…
Reference in New Issue