mirror of
http://git.haproxy.org/git/haproxy.git/
synced 2025-02-19 20:27:01 +00:00
[MEDIUM] stream_sock: implement tcp-cork for use during shutdowns on Linux
Setting TCP_CORK on a socket before sending the last segment enables automatic merging of this segment with the FIN from the shutdown() call. Playing with TCP_CORK is not easy though as we have to track the status of the TCP_NODELAY flag since both are mutually exclusive. Doing so saves one more packet per session and offers about 5% more performance. There is no reason not to do it, so there is no associated option.
This commit is contained in:
parent
9ea05a790f
commit
fb14edc215
@ -59,6 +59,13 @@ enum {
|
||||
#define FD_POLL_DATA (FD_POLL_IN | FD_POLL_OUT)
|
||||
#define FD_POLL_STICKY (FD_POLL_ERR | FD_POLL_HUP)
|
||||
|
||||
/* bit values for fdtab[fd]->flags. Most of them are used to hold a value
|
||||
* consecutive to a behaviour change.
|
||||
*/
|
||||
#define FD_FL_TCP 0x0001 /* socket is TCP */
|
||||
#define FD_FL_TCP_NODELAY 0x0002
|
||||
#define FD_FL_TCP_CORK 0x0004
|
||||
|
||||
/* info about one given fd */
|
||||
struct fdtab {
|
||||
struct {
|
||||
@ -66,6 +73,7 @@ struct fdtab {
|
||||
struct buffer *b; /* read/write buffer */
|
||||
} cb[DIR_SIZE];
|
||||
void *owner; /* the session (or proxy) associated with this fd */
|
||||
unsigned short flags; /* various flags precising the exact status of this fd */
|
||||
unsigned char state; /* the state of this fd */
|
||||
unsigned char ev; /* event seen in return of poll() : FD_POLL_* */
|
||||
struct sockaddr *peeraddr; /* pointer to peer's network address, or NULL if unset */
|
||||
|
@ -1948,6 +1948,7 @@ int connect_server(struct session *s)
|
||||
|
||||
fdtab[fd].owner = s->req->cons;
|
||||
fdtab[fd].state = FD_STCONN; /* connection in progress */
|
||||
fdtab[fd].flags = FD_FL_TCP | FD_FL_TCP_NODELAY;
|
||||
fdtab[fd].cb[DIR_RD].f = &stream_sock_read;
|
||||
fdtab[fd].cb[DIR_RD].b = s->rep;
|
||||
fdtab[fd].cb[DIR_WR].f = &stream_sock_write;
|
||||
|
@ -692,6 +692,7 @@ struct task *process_chk(struct task *t)
|
||||
fdtab[fd].peeraddr = (struct sockaddr *)&sa;
|
||||
fdtab[fd].peerlen = sizeof(sa);
|
||||
fdtab[fd].state = FD_STCONN; /* connection in progress */
|
||||
fdtab[fd].flags = FD_FL_TCP | FD_FL_TCP_NODELAY;
|
||||
EV_FD_SET(fd, DIR_WR); /* for connect status */
|
||||
#ifdef DEBUG_FULL
|
||||
assert (!EV_FD_ISSET(fd, DIR_RD));
|
||||
|
@ -417,6 +417,7 @@ int event_accept(int fd) {
|
||||
fd_insert(cfd);
|
||||
fdtab[cfd].owner = &s->si[0];
|
||||
fdtab[cfd].state = FD_STREADY;
|
||||
fdtab[cfd].flags = FD_FL_TCP | FD_FL_TCP_NODELAY;
|
||||
fdtab[cfd].cb[DIR_RD].f = l->proto->read;
|
||||
fdtab[cfd].cb[DIR_RD].b = s->req;
|
||||
fdtab[cfd].cb[DIR_WR].f = l->proto->write;
|
||||
|
@ -212,9 +212,7 @@ int tcp_bind_listener(struct listener *listener, char *errmsg, int errlen)
|
||||
goto tcp_close_return;
|
||||
}
|
||||
|
||||
if ((fcntl(fd, F_SETFL, O_NONBLOCK) == -1) ||
|
||||
(setsockopt(fd, IPPROTO_TCP, TCP_NODELAY,
|
||||
(char *) &one, sizeof(one)) == -1)) {
|
||||
if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
|
||||
err |= ERR_FATAL | ERR_ALERT;
|
||||
msg = "cannot make socket non-blocking";
|
||||
goto tcp_close_return;
|
||||
@ -281,6 +279,7 @@ int tcp_bind_listener(struct listener *listener, char *errmsg, int errlen)
|
||||
fdtab[fd].cb[DIR_RD].b = fdtab[fd].cb[DIR_WR].b = NULL;
|
||||
fdtab[fd].owner = listener; /* reference the listener instead of a task */
|
||||
fdtab[fd].state = FD_STLISTEN;
|
||||
fdtab[fd].flags = FD_FL_TCP;
|
||||
fdtab[fd].peeraddr = NULL;
|
||||
fdtab[fd].peerlen = 0;
|
||||
tcp_return:
|
||||
|
@ -16,6 +16,8 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <netinet/tcp.h>
|
||||
|
||||
#include <sys/socket.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
@ -569,6 +571,26 @@ static int stream_sock_write_loop(struct stream_interface *si, struct buffer *b)
|
||||
if (max > b->send_max)
|
||||
max = b->send_max;
|
||||
|
||||
|
||||
#ifdef TCP_CORK
|
||||
/*
|
||||
* Check if we want to cork output before sending. This typically occurs
|
||||
* when there are data left in the buffer, or when we reached the end of
|
||||
* buffer but we know we will close, so we try to merge the ongoing FIN
|
||||
* with the last data segment.
|
||||
*/
|
||||
if ((fdtab[si->fd].flags & (FD_FL_TCP|FD_FL_TCP_CORK)) == FD_FL_TCP) {
|
||||
if (unlikely((b->send_max == b->l &&
|
||||
(b->flags & (BF_SHUTW|BF_SHUTW_NOW|BF_HIJACK|BF_WRITE_ENA|BF_SHUTR)) ==
|
||||
(BF_WRITE_ENA|BF_SHUTR)))) {
|
||||
/* we have to unconditionally reset TCP_NODELAY for CORK */
|
||||
setsockopt(si->fd, IPPROTO_TCP, TCP_NODELAY, (char *) &zero, sizeof(zero));
|
||||
setsockopt(si->fd, SOL_TCP, TCP_CORK, (char *) &one, sizeof(one));
|
||||
fdtab[si->fd].flags = (fdtab[si->fd].flags & ~FD_FL_TCP_NODELAY) | FD_FL_TCP_CORK;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef MSG_NOSIGNAL
|
||||
{
|
||||
int skerr;
|
||||
@ -628,6 +650,21 @@ static int stream_sock_write_loop(struct stream_interface *si, struct buffer *b)
|
||||
}
|
||||
} /* while (1) */
|
||||
|
||||
/* check if we need to uncork the output, for instance when the
|
||||
* output buffer is empty but not shutr().
|
||||
*/
|
||||
if (unlikely((fdtab[si->fd].flags & (FD_FL_TCP|FD_FL_TCP_NODELAY)) == FD_FL_TCP && (b->flags & BF_EMPTY))) {
|
||||
if ((b->flags & (BF_SHUTW|BF_SHUTW_NOW|BF_HIJACK|BF_WRITE_ENA|BF_SHUTR)) != (BF_WRITE_ENA|BF_SHUTR)) {
|
||||
#ifdef TCP_CORK
|
||||
if (fdtab[si->fd].flags & FD_FL_TCP_CORK)
|
||||
setsockopt(si->fd, SOL_TCP, TCP_CORK, (char *) &zero, sizeof(zero));
|
||||
#endif
|
||||
setsockopt(si->fd, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one));
|
||||
fdtab[si->fd].flags = (fdtab[si->fd].flags & ~FD_FL_TCP_CORK) | FD_FL_TCP_NODELAY;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user