mirror of
http://git.haproxy.org/git/haproxy.git/
synced 2025-03-24 20:07:33 +00:00
fd_insert() is currently called just after setting the owner and iocb, but proceeding like this prevents the operation from being atomic and requires a lock to protect the maxfd computation in another thread from meeting an incompletely initialized FD and computing a wrong maxfd. Fortunately for now all fdtab[].owner are set before calling fd_insert(), and the first lock in fd_insert() enforces a memory barrier so the code is safe. This patch moves the initialization of the owner and iocb to fd_insert() so that the function will be able to properly arrange its operations and remain safe even when modified to become lockless. There's no other change beyond the internal API.
2101 lines
63 KiB
C
2101 lines
63 KiB
C
/*
|
|
* AF_INET/AF_INET6 SOCK_STREAM protocol layer (tcp)
|
|
*
|
|
* Copyright 2000-2013 Willy Tarreau <w@1wt.eu>
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*
|
|
*/
|
|
|
|
/* this is to have tcp_info defined on systems using musl
|
|
* library, such as Alpine Linux
|
|
*/
|
|
#define _GNU_SOURCE
|
|
|
|
#include <ctype.h>
|
|
#include <errno.h>
|
|
#include <fcntl.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <time.h>
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/socket.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/types.h>
|
|
#include <sys/un.h>
|
|
|
|
#include <netinet/tcp.h>
|
|
#include <netinet/in.h>
|
|
|
|
#include <common/compat.h>
|
|
#include <common/config.h>
|
|
#include <common/debug.h>
|
|
#include <common/errors.h>
|
|
#include <common/mini-clist.h>
|
|
#include <common/standard.h>
|
|
#include <common/namespace.h>
|
|
|
|
#include <types/action.h>
|
|
#include <types/connection.h>
|
|
#include <types/global.h>
|
|
#include <types/stream.h>
|
|
|
|
#include <proto/arg.h>
|
|
#include <proto/channel.h>
|
|
#include <proto/connection.h>
|
|
#include <proto/fd.h>
|
|
#include <proto/listener.h>
|
|
#include <proto/log.h>
|
|
#include <proto/port_range.h>
|
|
#include <proto/protocol.h>
|
|
#include <proto/proto_http.h>
|
|
#include <proto/proto_tcp.h>
|
|
#include <proto/proxy.h>
|
|
#include <proto/sample.h>
|
|
#include <proto/server.h>
|
|
#include <proto/task.h>
|
|
#include <proto/tcp_rules.h>
|
|
|
|
static int tcp_bind_listeners(struct protocol *proto, char *errmsg, int errlen);
|
|
static int tcp_bind_listener(struct listener *listener, char *errmsg, int errlen);
|
|
static void tcpv4_add_listener(struct listener *listener, int port);
|
|
static void tcpv6_add_listener(struct listener *listener, int port);
|
|
|
|
/* Note: must not be declared <const> as its list will be overwritten */
|
|
static struct protocol proto_tcpv4 = {
|
|
.name = "tcpv4",
|
|
.sock_domain = AF_INET,
|
|
.sock_type = SOCK_STREAM,
|
|
.sock_prot = IPPROTO_TCP,
|
|
.sock_family = AF_INET,
|
|
.sock_addrlen = sizeof(struct sockaddr_in),
|
|
.l3_addrlen = 32/8,
|
|
.accept = &listener_accept,
|
|
.connect = tcp_connect_server,
|
|
.bind = tcp_bind_listener,
|
|
.bind_all = tcp_bind_listeners,
|
|
.unbind_all = unbind_all_listeners,
|
|
.enable_all = enable_all_listeners,
|
|
.get_src = tcp_get_src,
|
|
.get_dst = tcp_get_dst,
|
|
.drain = tcp_drain,
|
|
.pause = tcp_pause_listener,
|
|
.add = tcpv4_add_listener,
|
|
.listeners = LIST_HEAD_INIT(proto_tcpv4.listeners),
|
|
.nb_listeners = 0,
|
|
};
|
|
|
|
/* Note: must not be declared <const> as its list will be overwritten */
|
|
static struct protocol proto_tcpv6 = {
|
|
.name = "tcpv6",
|
|
.sock_domain = AF_INET6,
|
|
.sock_type = SOCK_STREAM,
|
|
.sock_prot = IPPROTO_TCP,
|
|
.sock_family = AF_INET6,
|
|
.sock_addrlen = sizeof(struct sockaddr_in6),
|
|
.l3_addrlen = 128/8,
|
|
.accept = &listener_accept,
|
|
.connect = tcp_connect_server,
|
|
.bind = tcp_bind_listener,
|
|
.bind_all = tcp_bind_listeners,
|
|
.unbind_all = unbind_all_listeners,
|
|
.enable_all = enable_all_listeners,
|
|
.get_src = tcp_get_src,
|
|
.get_dst = tcp_get_dst,
|
|
.drain = tcp_drain,
|
|
.pause = tcp_pause_listener,
|
|
.add = tcpv6_add_listener,
|
|
.listeners = LIST_HEAD_INIT(proto_tcpv6.listeners),
|
|
.nb_listeners = 0,
|
|
};
|
|
|
|
/* Default TCP parameters, got by opening a temporary TCP socket. */
|
|
#ifdef TCP_MAXSEG
|
|
static THREAD_LOCAL int default_tcp_maxseg = -1;
|
|
static THREAD_LOCAL int default_tcp6_maxseg = -1;
|
|
#endif
|
|
|
|
/* Binds ipv4/ipv6 address <local> to socket <fd>, unless <flags> is set, in which
|
|
* case we try to bind <remote>. <flags> is a 2-bit field consisting of :
|
|
* - 0 : ignore remote address (may even be a NULL pointer)
|
|
* - 1 : use provided address
|
|
* - 2 : use provided port
|
|
* - 3 : use both
|
|
*
|
|
* The function supports multiple foreign binding methods :
|
|
* - linux_tproxy: we directly bind to the foreign address
|
|
* The second one can be used as a fallback for the first one.
|
|
* This function returns 0 when everything's OK, 1 if it could not bind, to the
|
|
* local address, 2 if it could not bind to the foreign address.
|
|
*/
|
|
int tcp_bind_socket(int fd, int flags, struct sockaddr_storage *local, struct sockaddr_storage *remote)
|
|
{
|
|
struct sockaddr_storage bind_addr;
|
|
int foreign_ok = 0;
|
|
int ret;
|
|
static THREAD_LOCAL int ip_transp_working = 1;
|
|
static THREAD_LOCAL int ip6_transp_working = 1;
|
|
|
|
switch (local->ss_family) {
|
|
case AF_INET:
|
|
if (flags && ip_transp_working) {
|
|
/* This deserves some explanation. Some platforms will support
|
|
* multiple combinations of certain methods, so we try the
|
|
* supported ones until one succeeds.
|
|
*/
|
|
if (0
|
|
#if defined(IP_TRANSPARENT)
|
|
|| (setsockopt(fd, SOL_IP, IP_TRANSPARENT, &one, sizeof(one)) == 0)
|
|
#endif
|
|
#if defined(IP_FREEBIND)
|
|
|| (setsockopt(fd, SOL_IP, IP_FREEBIND, &one, sizeof(one)) == 0)
|
|
#endif
|
|
#if defined(IP_BINDANY)
|
|
|| (setsockopt(fd, IPPROTO_IP, IP_BINDANY, &one, sizeof(one)) == 0)
|
|
#endif
|
|
#if defined(SO_BINDANY)
|
|
|| (setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == 0)
|
|
#endif
|
|
)
|
|
foreign_ok = 1;
|
|
else
|
|
ip_transp_working = 0;
|
|
}
|
|
break;
|
|
case AF_INET6:
|
|
if (flags && ip6_transp_working) {
|
|
if (0
|
|
#if defined(IPV6_TRANSPARENT) && defined(SOL_IPV6)
|
|
|| (setsockopt(fd, SOL_IPV6, IPV6_TRANSPARENT, &one, sizeof(one)) == 0)
|
|
#endif
|
|
#if defined(IP_FREEBIND)
|
|
|| (setsockopt(fd, SOL_IP, IP_FREEBIND, &one, sizeof(one)) == 0)
|
|
#endif
|
|
#if defined(IPV6_BINDANY)
|
|
|| (setsockopt(fd, IPPROTO_IPV6, IPV6_BINDANY, &one, sizeof(one)) == 0)
|
|
#endif
|
|
#if defined(SO_BINDANY)
|
|
|| (setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == 0)
|
|
#endif
|
|
)
|
|
foreign_ok = 1;
|
|
else
|
|
ip6_transp_working = 0;
|
|
}
|
|
break;
|
|
}
|
|
|
|
if (flags) {
|
|
memset(&bind_addr, 0, sizeof(bind_addr));
|
|
bind_addr.ss_family = remote->ss_family;
|
|
switch (remote->ss_family) {
|
|
case AF_INET:
|
|
if (flags & 1)
|
|
((struct sockaddr_in *)&bind_addr)->sin_addr = ((struct sockaddr_in *)remote)->sin_addr;
|
|
if (flags & 2)
|
|
((struct sockaddr_in *)&bind_addr)->sin_port = ((struct sockaddr_in *)remote)->sin_port;
|
|
break;
|
|
case AF_INET6:
|
|
if (flags & 1)
|
|
((struct sockaddr_in6 *)&bind_addr)->sin6_addr = ((struct sockaddr_in6 *)remote)->sin6_addr;
|
|
if (flags & 2)
|
|
((struct sockaddr_in6 *)&bind_addr)->sin6_port = ((struct sockaddr_in6 *)remote)->sin6_port;
|
|
break;
|
|
default:
|
|
/* we don't want to try to bind to an unknown address family */
|
|
foreign_ok = 0;
|
|
}
|
|
}
|
|
|
|
setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
|
|
if (foreign_ok) {
|
|
if (is_inet_addr(&bind_addr)) {
|
|
ret = bind(fd, (struct sockaddr *)&bind_addr, get_addr_len(&bind_addr));
|
|
if (ret < 0)
|
|
return 2;
|
|
}
|
|
}
|
|
else {
|
|
if (is_inet_addr(local)) {
|
|
ret = bind(fd, (struct sockaddr *)local, get_addr_len(local));
|
|
if (ret < 0)
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
if (!flags)
|
|
return 0;
|
|
|
|
if (!foreign_ok)
|
|
/* we could not bind to a foreign address */
|
|
return 2;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int create_server_socket(struct connection *conn)
|
|
{
|
|
const struct netns_entry *ns = NULL;
|
|
|
|
#ifdef CONFIG_HAP_NS
|
|
if (objt_server(conn->target)) {
|
|
if (__objt_server(conn->target)->flags & SRV_F_USE_NS_FROM_PP)
|
|
ns = conn->proxy_netns;
|
|
else
|
|
ns = __objt_server(conn->target)->netns;
|
|
}
|
|
#endif
|
|
return my_socketat(ns, conn->addr.to.ss_family, SOCK_STREAM, IPPROTO_TCP);
|
|
}
|
|
|
|
/*
|
|
* This function initiates a TCP connection establishment to the target assigned
|
|
* to connection <conn> using (si->{target,addr.to}). A source address may be
|
|
* pointed to by conn->addr.from in case of transparent proxying. Normal source
|
|
* bind addresses are still determined locally (due to the possible need of a
|
|
* source port). conn->target may point either to a valid server or to a backend,
|
|
* depending on conn->target. Only OBJ_TYPE_PROXY and OBJ_TYPE_SERVER are
|
|
* supported. The <data> parameter is a boolean indicating whether there are data
|
|
* waiting for being sent or not, in order to adjust data write polling and on
|
|
* some platforms, the ability to avoid an empty initial ACK. The <delack> argument
|
|
* allows the caller to force using a delayed ACK when establishing the connection :
|
|
* - 0 = no delayed ACK unless data are advertised and backend has tcp-smart-connect
|
|
* - 1 = delayed ACK if backend has tcp-smart-connect, regardless of data
|
|
* - 2 = delayed ACK regardless of backend options
|
|
*
|
|
* Note that a pending send_proxy message accounts for data.
|
|
*
|
|
* It can return one of :
|
|
* - SF_ERR_NONE if everything's OK
|
|
* - SF_ERR_SRVTO if there are no more servers
|
|
* - SF_ERR_SRVCL if the connection was refused by the server
|
|
* - SF_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
|
|
* - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
|
|
* - SF_ERR_INTERNAL for any other purely internal errors
|
|
* Additionally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted.
|
|
*
|
|
* The connection's fd is inserted only when SF_ERR_NONE is returned, otherwise
|
|
* it's invalid and the caller has nothing to do.
|
|
*/
|
|
|
|
int tcp_connect_server(struct connection *conn, int data, int delack)
|
|
{
|
|
int fd;
|
|
struct server *srv;
|
|
struct proxy *be;
|
|
struct conn_src *src;
|
|
|
|
conn->flags = CO_FL_WAIT_L4_CONN; /* connection in progress */
|
|
|
|
switch (obj_type(conn->target)) {
|
|
case OBJ_TYPE_PROXY:
|
|
be = objt_proxy(conn->target);
|
|
srv = NULL;
|
|
break;
|
|
case OBJ_TYPE_SERVER:
|
|
srv = objt_server(conn->target);
|
|
be = srv->proxy;
|
|
break;
|
|
default:
|
|
conn->flags |= CO_FL_ERROR;
|
|
return SF_ERR_INTERNAL;
|
|
}
|
|
|
|
fd = conn->handle.fd = create_server_socket(conn);
|
|
|
|
if (fd == -1) {
|
|
qfprintf(stderr, "Cannot get a server socket.\n");
|
|
|
|
if (errno == ENFILE) {
|
|
conn->err_code = CO_ER_SYS_FDLIM;
|
|
send_log(be, LOG_EMERG,
|
|
"Proxy %s reached system FD limit (maxsock=%d). Please check system tunables.\n",
|
|
be->id, global.maxsock);
|
|
}
|
|
else if (errno == EMFILE) {
|
|
conn->err_code = CO_ER_PROC_FDLIM;
|
|
send_log(be, LOG_EMERG,
|
|
"Proxy %s reached process FD limit (maxsock=%d). Please check 'ulimit-n' and restart.\n",
|
|
be->id, global.maxsock);
|
|
}
|
|
else if (errno == ENOBUFS || errno == ENOMEM) {
|
|
conn->err_code = CO_ER_SYS_MEMLIM;
|
|
send_log(be, LOG_EMERG,
|
|
"Proxy %s reached system memory limit (maxsock=%d). Please check system tunables.\n",
|
|
be->id, global.maxsock);
|
|
}
|
|
else if (errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
|
|
conn->err_code = CO_ER_NOPROTO;
|
|
}
|
|
else
|
|
conn->err_code = CO_ER_SOCK_ERR;
|
|
|
|
/* this is a resource error */
|
|
conn->flags |= CO_FL_ERROR;
|
|
return SF_ERR_RESOURCE;
|
|
}
|
|
|
|
if (fd >= global.maxsock) {
|
|
/* do not log anything there, it's a normal condition when this option
|
|
* is used to serialize connections to a server !
|
|
*/
|
|
ha_alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n");
|
|
close(fd);
|
|
conn->err_code = CO_ER_CONF_FDLIM;
|
|
conn->flags |= CO_FL_ERROR;
|
|
return SF_ERR_PRXCOND; /* it is a configuration limit */
|
|
}
|
|
|
|
if ((fcntl(fd, F_SETFL, O_NONBLOCK)==-1) ||
|
|
(setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &one, sizeof(one)) == -1)) {
|
|
qfprintf(stderr,"Cannot set client socket to non blocking mode.\n");
|
|
close(fd);
|
|
conn->err_code = CO_ER_SOCK_ERR;
|
|
conn->flags |= CO_FL_ERROR;
|
|
return SF_ERR_INTERNAL;
|
|
}
|
|
|
|
if (be->options & PR_O_TCP_SRV_KA)
|
|
setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one));
|
|
|
|
/* allow specific binding :
|
|
* - server-specific at first
|
|
* - proxy-specific next
|
|
*/
|
|
if (srv && srv->conn_src.opts & CO_SRC_BIND)
|
|
src = &srv->conn_src;
|
|
else if (be->conn_src.opts & CO_SRC_BIND)
|
|
src = &be->conn_src;
|
|
else
|
|
src = NULL;
|
|
|
|
if (src) {
|
|
int ret, flags = 0;
|
|
|
|
if (is_inet_addr(&conn->addr.from)) {
|
|
switch (src->opts & CO_SRC_TPROXY_MASK) {
|
|
case CO_SRC_TPROXY_CLI:
|
|
conn->flags |= CO_FL_PRIVATE;
|
|
/* fall through */
|
|
case CO_SRC_TPROXY_ADDR:
|
|
flags = 3;
|
|
break;
|
|
case CO_SRC_TPROXY_CIP:
|
|
case CO_SRC_TPROXY_DYN:
|
|
conn->flags |= CO_FL_PRIVATE;
|
|
flags = 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
#ifdef SO_BINDTODEVICE
|
|
/* Note: this might fail if not CAP_NET_RAW */
|
|
if (src->iface_name)
|
|
setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, src->iface_name, src->iface_len + 1);
|
|
#endif
|
|
|
|
if (src->sport_range) {
|
|
int attempts = 10; /* should be more than enough to find a spare port */
|
|
struct sockaddr_storage sa;
|
|
|
|
ret = 1;
|
|
memcpy(&sa, &src->source_addr, sizeof(sa));
|
|
|
|
do {
|
|
/* note: in case of retry, we may have to release a previously
|
|
* allocated port, hence this loop's construct.
|
|
*/
|
|
port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
|
|
fdinfo[fd].port_range = NULL;
|
|
|
|
if (!attempts)
|
|
break;
|
|
attempts--;
|
|
|
|
fdinfo[fd].local_port = port_range_alloc_port(src->sport_range);
|
|
if (!fdinfo[fd].local_port) {
|
|
conn->err_code = CO_ER_PORT_RANGE;
|
|
break;
|
|
}
|
|
|
|
fdinfo[fd].port_range = src->sport_range;
|
|
set_host_port(&sa, fdinfo[fd].local_port);
|
|
|
|
ret = tcp_bind_socket(fd, flags, &sa, &conn->addr.from);
|
|
if (ret != 0)
|
|
conn->err_code = CO_ER_CANT_BIND;
|
|
} while (ret != 0); /* binding NOK */
|
|
}
|
|
else {
|
|
#ifdef IP_BIND_ADDRESS_NO_PORT
|
|
static THREAD_LOCAL int bind_address_no_port = 1;
|
|
setsockopt(fd, SOL_IP, IP_BIND_ADDRESS_NO_PORT, (const void *) &bind_address_no_port, sizeof(int));
|
|
#endif
|
|
ret = tcp_bind_socket(fd, flags, &src->source_addr, &conn->addr.from);
|
|
if (ret != 0)
|
|
conn->err_code = CO_ER_CANT_BIND;
|
|
}
|
|
|
|
if (unlikely(ret != 0)) {
|
|
port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
|
|
fdinfo[fd].port_range = NULL;
|
|
close(fd);
|
|
|
|
if (ret == 1) {
|
|
ha_alert("Cannot bind to source address before connect() for backend %s. Aborting.\n",
|
|
be->id);
|
|
send_log(be, LOG_EMERG,
|
|
"Cannot bind to source address before connect() for backend %s.\n",
|
|
be->id);
|
|
} else {
|
|
ha_alert("Cannot bind to tproxy source address before connect() for backend %s. Aborting.\n",
|
|
be->id);
|
|
send_log(be, LOG_EMERG,
|
|
"Cannot bind to tproxy source address before connect() for backend %s.\n",
|
|
be->id);
|
|
}
|
|
conn->flags |= CO_FL_ERROR;
|
|
return SF_ERR_RESOURCE;
|
|
}
|
|
}
|
|
|
|
#if defined(TCP_QUICKACK)
|
|
/* disabling tcp quick ack now allows the first request to leave the
|
|
* machine with the first ACK. We only do this if there are pending
|
|
* data in the buffer.
|
|
*/
|
|
if (delack == 2 || ((delack || data || conn->send_proxy_ofs) && (be->options2 & PR_O2_SMARTCON)))
|
|
setsockopt(fd, IPPROTO_TCP, TCP_QUICKACK, &zero, sizeof(zero));
|
|
#endif
|
|
|
|
#ifdef TCP_USER_TIMEOUT
|
|
/* there is not much more we can do here when it fails, it's still minor */
|
|
if (srv && srv->tcp_ut)
|
|
setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT, &srv->tcp_ut, sizeof(srv->tcp_ut));
|
|
#endif
|
|
if (global.tune.server_sndbuf)
|
|
setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &global.tune.server_sndbuf, sizeof(global.tune.server_sndbuf));
|
|
|
|
if (global.tune.server_rcvbuf)
|
|
setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &global.tune.server_rcvbuf, sizeof(global.tune.server_rcvbuf));
|
|
|
|
if (connect(fd, (struct sockaddr *)&conn->addr.to, get_addr_len(&conn->addr.to)) == -1) {
|
|
if (errno == EINPROGRESS || errno == EALREADY) {
|
|
/* common case, let's wait for connect status */
|
|
conn->flags |= CO_FL_WAIT_L4_CONN;
|
|
}
|
|
else if (errno == EISCONN) {
|
|
/* should normally not happen but if so, indicates that it's OK */
|
|
conn->flags &= ~CO_FL_WAIT_L4_CONN;
|
|
}
|
|
else if (errno == EAGAIN || errno == EADDRINUSE || errno == EADDRNOTAVAIL) {
|
|
char *msg;
|
|
if (errno == EAGAIN || errno == EADDRNOTAVAIL) {
|
|
msg = "no free ports";
|
|
conn->err_code = CO_ER_FREE_PORTS;
|
|
}
|
|
else {
|
|
msg = "local address already in use";
|
|
conn->err_code = CO_ER_ADDR_INUSE;
|
|
}
|
|
|
|
qfprintf(stderr,"Connect() failed for backend %s: %s.\n", be->id, msg);
|
|
port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
|
|
fdinfo[fd].port_range = NULL;
|
|
close(fd);
|
|
send_log(be, LOG_ERR, "Connect() failed for backend %s: %s.\n", be->id, msg);
|
|
conn->flags |= CO_FL_ERROR;
|
|
return SF_ERR_RESOURCE;
|
|
} else if (errno == ETIMEDOUT) {
|
|
//qfprintf(stderr,"Connect(): ETIMEDOUT");
|
|
port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
|
|
fdinfo[fd].port_range = NULL;
|
|
close(fd);
|
|
conn->err_code = CO_ER_SOCK_ERR;
|
|
conn->flags |= CO_FL_ERROR;
|
|
return SF_ERR_SRVTO;
|
|
} else {
|
|
// (errno == ECONNREFUSED || errno == ENETUNREACH || errno == EACCES || errno == EPERM)
|
|
//qfprintf(stderr,"Connect(): %d", errno);
|
|
port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
|
|
fdinfo[fd].port_range = NULL;
|
|
close(fd);
|
|
conn->err_code = CO_ER_SOCK_ERR;
|
|
conn->flags |= CO_FL_ERROR;
|
|
return SF_ERR_SRVCL;
|
|
}
|
|
}
|
|
else {
|
|
/* connect() == 0, this is great! */
|
|
conn->flags &= ~CO_FL_WAIT_L4_CONN;
|
|
}
|
|
|
|
conn->flags |= CO_FL_ADDR_TO_SET;
|
|
|
|
/* Prepare to send a few handshakes related to the on-wire protocol. */
|
|
if (conn->send_proxy_ofs)
|
|
conn->flags |= CO_FL_SEND_PROXY;
|
|
|
|
conn_ctrl_init(conn); /* registers the FD */
|
|
fdtab[fd].linger_risk = 1; /* close hard if needed */
|
|
|
|
if (conn_xprt_init(conn) < 0) {
|
|
conn_full_close(conn);
|
|
conn->flags |= CO_FL_ERROR;
|
|
return SF_ERR_RESOURCE;
|
|
}
|
|
|
|
if (conn->flags & (CO_FL_HANDSHAKE | CO_FL_WAIT_L4_CONN | CO_FL_EARLY_SSL_HS)) {
|
|
conn_sock_want_send(conn); /* for connect status, proxy protocol or SSL */
|
|
if (conn->flags & CO_FL_EARLY_SSL_HS)
|
|
conn_xprt_want_send(conn);
|
|
}
|
|
else {
|
|
/* If there's no more handshake, we need to notify the data
|
|
* layer when the connection is already OK otherwise we'll have
|
|
* no other opportunity to do it later (eg: health checks).
|
|
*/
|
|
data = 1;
|
|
}
|
|
|
|
if (data)
|
|
conn_xprt_want_send(conn); /* prepare to send data if any */
|
|
|
|
return SF_ERR_NONE; /* connection is OK */
|
|
}
|
|
|
|
|
|
/*
|
|
* Retrieves the source address for the socket <fd>, with <dir> indicating
|
|
* if we're a listener (=0) or an initiator (!=0). It returns 0 in case of
|
|
* success, -1 in case of error. The socket's source address is stored in
|
|
* <sa> for <salen> bytes.
|
|
*/
|
|
int tcp_get_src(int fd, struct sockaddr *sa, socklen_t salen, int dir)
|
|
{
|
|
if (dir)
|
|
return getsockname(fd, sa, &salen);
|
|
else
|
|
return getpeername(fd, sa, &salen);
|
|
}
|
|
|
|
|
|
/*
|
|
* Retrieves the original destination address for the socket <fd>, with <dir>
|
|
* indicating if we're a listener (=0) or an initiator (!=0). In the case of a
|
|
* listener, if the original destination address was translated, the original
|
|
* address is retrieved. It returns 0 in case of success, -1 in case of error.
|
|
* The socket's source address is stored in <sa> for <salen> bytes.
|
|
*/
|
|
int tcp_get_dst(int fd, struct sockaddr *sa, socklen_t salen, int dir)
|
|
{
|
|
if (dir)
|
|
return getpeername(fd, sa, &salen);
|
|
else {
|
|
int ret = getsockname(fd, sa, &salen);
|
|
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
#if defined(TPROXY) && defined(SO_ORIGINAL_DST)
|
|
/* For TPROXY and Netfilter's NAT, we can retrieve the original
|
|
* IPv4 address before DNAT/REDIRECT. We must not do that with
|
|
* other families because v6-mapped IPv4 addresses are still
|
|
* reported as v4.
|
|
*/
|
|
if (((struct sockaddr_storage *)sa)->ss_family == AF_INET
|
|
&& getsockopt(fd, SOL_IP, SO_ORIGINAL_DST, sa, &salen) == 0)
|
|
return 0;
|
|
#endif
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
/* Tries to drain any pending incoming data from the socket to reach the
|
|
* receive shutdown. Returns positive if the shutdown was found, negative
|
|
* if EAGAIN was hit, otherwise zero. This is useful to decide whether we
|
|
* can close a connection cleanly are we must kill it hard.
|
|
*/
|
|
int tcp_drain(int fd)
|
|
{
|
|
int turns = 2;
|
|
int len;
|
|
|
|
while (turns) {
|
|
#ifdef MSG_TRUNC_CLEARS_INPUT
|
|
len = recv(fd, NULL, INT_MAX, MSG_DONTWAIT | MSG_NOSIGNAL | MSG_TRUNC);
|
|
if (len == -1 && errno == EFAULT)
|
|
#endif
|
|
len = recv(fd, trash.str, trash.size, MSG_DONTWAIT | MSG_NOSIGNAL);
|
|
|
|
if (len == 0) {
|
|
/* cool, shutdown received */
|
|
fdtab[fd].linger_risk = 0;
|
|
return 1;
|
|
}
|
|
|
|
if (len < 0) {
|
|
if (errno == EAGAIN) {
|
|
/* connection not closed yet */
|
|
fd_cant_recv(fd);
|
|
return -1;
|
|
}
|
|
if (errno == EINTR) /* oops, try again */
|
|
continue;
|
|
/* other errors indicate a dead connection, fine. */
|
|
fdtab[fd].linger_risk = 0;
|
|
return 1;
|
|
}
|
|
/* OK we read some data, let's try again once */
|
|
turns--;
|
|
}
|
|
/* some data are still present, give up */
|
|
return 0;
|
|
}
|
|
|
|
/* This is the callback which is set when a connection establishment is pending
|
|
* and we have nothing to send. It updates the FD polling status. It returns 0
|
|
* if it fails in a fatal way or needs to poll to go further, otherwise it
|
|
* returns non-zero and removes the CO_FL_WAIT_L4_CONN flag from the connection's
|
|
* flags. In case of error, it sets CO_FL_ERROR and leaves the error code in
|
|
* errno. The error checking is done in two passes in order to limit the number
|
|
* of syscalls in the normal case :
|
|
* - if POLL_ERR was reported by the poller, we check for a pending error on
|
|
* the socket before proceeding. If found, it's assigned to errno so that
|
|
* upper layers can see it.
|
|
* - otherwise connect() is used to check the connection state again, since
|
|
* the getsockopt return cannot reliably be used to know if the connection
|
|
* is still pending or ready. This one may often return an error as well,
|
|
* since we don't always have POLL_ERR (eg: OSX or cached events).
|
|
*/
|
|
int tcp_connect_probe(struct connection *conn)
|
|
{
|
|
int fd = conn->handle.fd;
|
|
socklen_t lskerr;
|
|
int skerr;
|
|
|
|
if (conn->flags & CO_FL_ERROR)
|
|
return 0;
|
|
|
|
if (!conn_ctrl_ready(conn))
|
|
return 0;
|
|
|
|
if (!(conn->flags & CO_FL_WAIT_L4_CONN))
|
|
return 1; /* strange we were called while ready */
|
|
|
|
if (!fd_send_ready(fd))
|
|
return 0;
|
|
|
|
/* we might be the first witness of FD_POLL_ERR. Note that FD_POLL_HUP
|
|
* without FD_POLL_IN also indicates a hangup without input data meaning
|
|
* there was no connection.
|
|
*/
|
|
if (fdtab[fd].ev & FD_POLL_ERR ||
|
|
(fdtab[fd].ev & (FD_POLL_IN|FD_POLL_HUP)) == FD_POLL_HUP) {
|
|
skerr = 0;
|
|
lskerr = sizeof(skerr);
|
|
getsockopt(fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr);
|
|
errno = skerr;
|
|
if (errno == EAGAIN)
|
|
errno = 0;
|
|
if (errno)
|
|
goto out_error;
|
|
}
|
|
|
|
/* Use connect() to check the state of the socket. This has the
|
|
* advantage of giving us the following info :
|
|
* - error
|
|
* - connecting (EALREADY, EINPROGRESS)
|
|
* - connected (EISCONN, 0)
|
|
*/
|
|
if (connect(fd, (struct sockaddr *)&conn->addr.to, get_addr_len(&conn->addr.to)) < 0) {
|
|
if (errno == EALREADY || errno == EINPROGRESS) {
|
|
__conn_sock_stop_recv(conn);
|
|
fd_cant_send(fd);
|
|
return 0;
|
|
}
|
|
|
|
if (errno && errno != EISCONN)
|
|
goto out_error;
|
|
|
|
/* otherwise we're connected */
|
|
}
|
|
|
|
/* The FD is ready now, we'll mark the connection as complete and
|
|
* forward the event to the transport layer which will notify the
|
|
* data layer.
|
|
*/
|
|
conn->flags &= ~CO_FL_WAIT_L4_CONN;
|
|
return 1;
|
|
|
|
out_error:
|
|
/* Write error on the file descriptor. Report it to the connection
|
|
* and disable polling on this FD.
|
|
*/
|
|
fdtab[fd].linger_risk = 0;
|
|
conn->flags |= CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH;
|
|
__conn_sock_stop_both(conn);
|
|
return 0;
|
|
}
|
|
|
|
/* XXX: Should probably be elsewhere */
|
|
static int compare_sockaddr(struct sockaddr_storage *a, struct sockaddr_storage *b)
|
|
{
|
|
if (a->ss_family != b->ss_family) {
|
|
return (-1);
|
|
}
|
|
switch (a->ss_family) {
|
|
case AF_INET:
|
|
{
|
|
struct sockaddr_in *a4 = (void *)a, *b4 = (void *)b;
|
|
if (a4->sin_port != b4->sin_port)
|
|
return (-1);
|
|
return (memcmp(&a4->sin_addr, &b4->sin_addr,
|
|
sizeof(a4->sin_addr)));
|
|
}
|
|
case AF_INET6:
|
|
{
|
|
struct sockaddr_in6 *a6 = (void *)a, *b6 = (void *)b;
|
|
if (a6->sin6_port != b6->sin6_port)
|
|
return (-1);
|
|
return (memcmp(&a6->sin6_addr, &b6->sin6_addr,
|
|
sizeof(a6->sin6_addr)));
|
|
}
|
|
default:
|
|
return (-1);
|
|
}
|
|
|
|
}
|
|
|
|
#define LI_MANDATORY_FLAGS (LI_O_FOREIGN | LI_O_V6ONLY | LI_O_V4V6)
|
|
/* When binding the listeners, check if a socket has been sent to us by the
|
|
* previous process that we could reuse, instead of creating a new one.
|
|
*/
|
|
static int tcp_find_compatible_fd(struct listener *l)
|
|
{
|
|
struct xfer_sock_list *xfer_sock = xfer_sock_list;
|
|
int ret = -1;
|
|
|
|
while (xfer_sock) {
|
|
if (!compare_sockaddr(&xfer_sock->addr, &l->addr)) {
|
|
if ((l->interface == NULL && xfer_sock->iface == NULL) ||
|
|
(l->interface != NULL && xfer_sock->iface != NULL &&
|
|
!strcmp(l->interface, xfer_sock->iface))) {
|
|
if ((l->options & LI_MANDATORY_FLAGS) ==
|
|
(xfer_sock->options & LI_MANDATORY_FLAGS)) {
|
|
if ((xfer_sock->namespace == NULL &&
|
|
l->netns == NULL)
|
|
#ifdef CONFIG_HAP_NS
|
|
|| (xfer_sock->namespace != NULL &&
|
|
l->netns != NULL &&
|
|
!strcmp(xfer_sock->namespace,
|
|
l->netns->node.key))
|
|
#endif
|
|
) {
|
|
break;
|
|
}
|
|
|
|
}
|
|
}
|
|
}
|
|
xfer_sock = xfer_sock->next;
|
|
}
|
|
if (xfer_sock != NULL) {
|
|
ret = xfer_sock->fd;
|
|
if (xfer_sock == xfer_sock_list)
|
|
xfer_sock_list = xfer_sock->next;
|
|
if (xfer_sock->prev)
|
|
xfer_sock->prev->next = xfer_sock->next;
|
|
if (xfer_sock->next)
|
|
xfer_sock->next->prev = xfer_sock->prev;
|
|
free(xfer_sock->iface);
|
|
free(xfer_sock->namespace);
|
|
free(xfer_sock);
|
|
}
|
|
return ret;
|
|
}
|
|
#undef L1_MANDATORY_FLAGS
|
|
|
|
/* This function tries to bind a TCPv4/v6 listener. It may return a warning or
|
|
* an error message in <errmsg> if the message is at most <errlen> bytes long
|
|
* (including '\0'). Note that <errmsg> may be NULL if <errlen> is also zero.
|
|
* The return value is composed from ERR_ABORT, ERR_WARN,
|
|
* ERR_ALERT, ERR_RETRYABLE and ERR_FATAL. ERR_NONE indicates that everything
|
|
* was alright and that no message was returned. ERR_RETRYABLE means that an
|
|
* error occurred but that it may vanish after a retry (eg: port in use), and
|
|
* ERR_FATAL indicates a non-fixable error. ERR_WARN and ERR_ALERT do not alter
|
|
* the meaning of the error, but just indicate that a message is present which
|
|
* should be displayed with the respective level. Last, ERR_ABORT indicates
|
|
* that it's pointless to try to start other listeners. No error message is
|
|
* returned if errlen is NULL.
|
|
*/
|
|
int tcp_bind_listener(struct listener *listener, char *errmsg, int errlen)
|
|
{
|
|
__label__ tcp_return, tcp_close_return;
|
|
int fd, err;
|
|
int ext, ready;
|
|
socklen_t ready_len;
|
|
const char *msg = NULL;
|
|
#ifdef TCP_MAXSEG
|
|
|
|
/* Create a temporary TCP socket to get default parameters we can't
|
|
* guess.
|
|
* */
|
|
ready_len = sizeof(default_tcp_maxseg);
|
|
if (default_tcp_maxseg == -1) {
|
|
default_tcp_maxseg = -2;
|
|
fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
|
|
if (fd < 0)
|
|
ha_warning("Failed to create a temporary socket!\n");
|
|
else {
|
|
if (getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &default_tcp_maxseg,
|
|
&ready_len) == -1)
|
|
ha_warning("Failed to get the default value of TCP_MAXSEG\n");
|
|
}
|
|
close(fd);
|
|
}
|
|
if (default_tcp6_maxseg == -1) {
|
|
default_tcp6_maxseg = -2;
|
|
fd = socket(AF_INET6, SOCK_STREAM, IPPROTO_TCP);
|
|
if (fd >= 0) {
|
|
if (getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &default_tcp6_maxseg,
|
|
&ready_len) == -1)
|
|
ha_warning("Failed ot get the default value of TCP_MAXSEG for IPv6\n");
|
|
close(fd);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
|
|
/* ensure we never return garbage */
|
|
if (errlen)
|
|
*errmsg = 0;
|
|
|
|
if (listener->state != LI_ASSIGNED)
|
|
return ERR_NONE; /* already bound */
|
|
|
|
err = ERR_NONE;
|
|
|
|
if (listener->fd == -1)
|
|
listener->fd = tcp_find_compatible_fd(listener);
|
|
|
|
/* if the listener already has an fd assigned, then we were offered the
|
|
* fd by an external process (most likely the parent), and we don't want
|
|
* to create a new socket. However we still want to set a few flags on
|
|
* the socket.
|
|
*/
|
|
fd = listener->fd;
|
|
ext = (fd >= 0);
|
|
|
|
if (!ext) {
|
|
fd = my_socketat(listener->netns, listener->addr.ss_family, SOCK_STREAM, IPPROTO_TCP);
|
|
|
|
if (fd == -1) {
|
|
err |= ERR_RETRYABLE | ERR_ALERT;
|
|
msg = "cannot create listening socket";
|
|
goto tcp_return;
|
|
}
|
|
}
|
|
|
|
if (fd >= global.maxsock) {
|
|
err |= ERR_FATAL | ERR_ABORT | ERR_ALERT;
|
|
msg = "not enough free sockets (raise '-n' parameter)";
|
|
goto tcp_close_return;
|
|
}
|
|
|
|
if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
|
|
err |= ERR_FATAL | ERR_ALERT;
|
|
msg = "cannot make socket non-blocking";
|
|
goto tcp_close_return;
|
|
}
|
|
|
|
if (!ext && setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) == -1) {
|
|
/* not fatal but should be reported */
|
|
msg = "cannot do so_reuseaddr";
|
|
err |= ERR_ALERT;
|
|
}
|
|
|
|
if (listener->options & LI_O_NOLINGER)
|
|
setsockopt(fd, SOL_SOCKET, SO_LINGER, &nolinger, sizeof(struct linger));
|
|
else {
|
|
struct linger tmplinger;
|
|
socklen_t len = sizeof(tmplinger);
|
|
if (getsockopt(fd, SOL_SOCKET, SO_LINGER, &tmplinger, &len) == 0 &&
|
|
(tmplinger.l_onoff == 1 || tmplinger.l_linger == 0)) {
|
|
tmplinger.l_onoff = 0;
|
|
tmplinger.l_linger = 0;
|
|
setsockopt(fd, SOL_SOCKET, SO_LINGER, &tmplinger,
|
|
sizeof(tmplinger));
|
|
}
|
|
}
|
|
|
|
#ifdef SO_REUSEPORT
|
|
/* OpenBSD and Linux 3.9 support this. As it's present in old libc versions of
|
|
* Linux, it might return an error that we will silently ignore.
|
|
*/
|
|
if (!ext && (global.tune.options & GTUNE_USE_REUSEPORT))
|
|
setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
|
|
#endif
|
|
|
|
if (!ext && (listener->options & LI_O_FOREIGN)) {
|
|
switch (listener->addr.ss_family) {
|
|
case AF_INET:
|
|
if (1
|
|
#if defined(IP_TRANSPARENT)
|
|
&& (setsockopt(fd, SOL_IP, IP_TRANSPARENT, &one, sizeof(one)) == -1)
|
|
#endif
|
|
#if defined(IP_FREEBIND)
|
|
&& (setsockopt(fd, SOL_IP, IP_FREEBIND, &one, sizeof(one)) == -1)
|
|
#endif
|
|
#if defined(IP_BINDANY)
|
|
&& (setsockopt(fd, IPPROTO_IP, IP_BINDANY, &one, sizeof(one)) == -1)
|
|
#endif
|
|
#if defined(SO_BINDANY)
|
|
&& (setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == -1)
|
|
#endif
|
|
) {
|
|
msg = "cannot make listening socket transparent";
|
|
err |= ERR_ALERT;
|
|
}
|
|
break;
|
|
case AF_INET6:
|
|
if (1
|
|
#if defined(IPV6_TRANSPARENT) && defined(SOL_IPV6)
|
|
&& (setsockopt(fd, SOL_IPV6, IPV6_TRANSPARENT, &one, sizeof(one)) == -1)
|
|
#endif
|
|
#if defined(IP_FREEBIND)
|
|
&& (setsockopt(fd, SOL_IP, IP_FREEBIND, &one, sizeof(one)) == -1)
|
|
#endif
|
|
#if defined(IPV6_BINDANY)
|
|
&& (setsockopt(fd, IPPROTO_IPV6, IPV6_BINDANY, &one, sizeof(one)) == -1)
|
|
#endif
|
|
#if defined(SO_BINDANY)
|
|
&& (setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == -1)
|
|
#endif
|
|
) {
|
|
msg = "cannot make listening socket transparent";
|
|
err |= ERR_ALERT;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
#ifdef SO_BINDTODEVICE
|
|
/* Note: this might fail if not CAP_NET_RAW */
|
|
if (!ext && listener->interface) {
|
|
if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
|
|
listener->interface, strlen(listener->interface) + 1) == -1) {
|
|
msg = "cannot bind listener to device";
|
|
err |= ERR_WARN;
|
|
}
|
|
}
|
|
#endif
|
|
#if defined(TCP_MAXSEG)
|
|
if (listener->maxseg > 0) {
|
|
if (setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG,
|
|
&listener->maxseg, sizeof(listener->maxseg)) == -1) {
|
|
msg = "cannot set MSS";
|
|
err |= ERR_WARN;
|
|
}
|
|
} else if (ext) {
|
|
int tmpmaxseg = -1;
|
|
int defaultmss;
|
|
socklen_t len = sizeof(tmpmaxseg);
|
|
|
|
if (listener->addr.ss_family == AF_INET)
|
|
defaultmss = default_tcp_maxseg;
|
|
else
|
|
defaultmss = default_tcp6_maxseg;
|
|
|
|
getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &tmpmaxseg, &len);
|
|
if (tmpmaxseg != defaultmss && setsockopt(fd, IPPROTO_TCP,
|
|
TCP_MAXSEG, &defaultmss,
|
|
sizeof(defaultmss)) == -1) {
|
|
msg = "cannot set MSS";
|
|
err |= ERR_WARN;
|
|
}
|
|
}
|
|
#endif
|
|
#if defined(TCP_USER_TIMEOUT)
|
|
if (listener->tcp_ut) {
|
|
if (setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT,
|
|
&listener->tcp_ut, sizeof(listener->tcp_ut)) == -1) {
|
|
msg = "cannot set TCP User Timeout";
|
|
err |= ERR_WARN;
|
|
}
|
|
} else
|
|
setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT, &zero,
|
|
sizeof(zero));
|
|
#endif
|
|
#if defined(TCP_DEFER_ACCEPT)
|
|
if (listener->options & LI_O_DEF_ACCEPT) {
|
|
/* defer accept by up to one second */
|
|
int accept_delay = 1;
|
|
if (setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &accept_delay, sizeof(accept_delay)) == -1) {
|
|
msg = "cannot enable DEFER_ACCEPT";
|
|
err |= ERR_WARN;
|
|
}
|
|
} else
|
|
setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &zero,
|
|
sizeof(zero));
|
|
#endif
|
|
#if defined(TCP_FASTOPEN)
|
|
if (listener->options & LI_O_TCP_FO) {
|
|
/* TFO needs a queue length, let's use the configured backlog */
|
|
int qlen = listener->backlog ? listener->backlog : listener->maxconn;
|
|
if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) == -1) {
|
|
msg = "cannot enable TCP_FASTOPEN";
|
|
err |= ERR_WARN;
|
|
}
|
|
} else {
|
|
socklen_t len;
|
|
int qlen;
|
|
len = sizeof(qlen);
|
|
/* Only disable fast open if it was enabled, we don't want
|
|
* the kernel to create a fast open queue if there's none.
|
|
*/
|
|
if (getsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &qlen, &len) == 0 &&
|
|
qlen != 0) {
|
|
if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &zero,
|
|
sizeof(zero)) == -1) {
|
|
msg = "cannot disable TCP_FASTOPEN";
|
|
err |= ERR_WARN;
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
#if defined(IPV6_V6ONLY)
|
|
if (listener->options & LI_O_V6ONLY)
|
|
setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one));
|
|
else if (listener->options & LI_O_V4V6)
|
|
setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &zero, sizeof(zero));
|
|
#endif
|
|
|
|
if (!ext && bind(fd, (struct sockaddr *)&listener->addr, listener->proto->sock_addrlen) == -1) {
|
|
err |= ERR_RETRYABLE | ERR_ALERT;
|
|
msg = "cannot bind socket";
|
|
goto tcp_close_return;
|
|
}
|
|
|
|
ready = 0;
|
|
ready_len = sizeof(ready);
|
|
if (getsockopt(fd, SOL_SOCKET, SO_ACCEPTCONN, &ready, &ready_len) == -1)
|
|
ready = 0;
|
|
|
|
if (!(ext && ready) && /* only listen if not already done by external process */
|
|
listen(fd, listener->backlog ? listener->backlog : listener->maxconn) == -1) {
|
|
err |= ERR_RETRYABLE | ERR_ALERT;
|
|
msg = "cannot listen to socket";
|
|
goto tcp_close_return;
|
|
}
|
|
|
|
#if defined(TCP_QUICKACK)
|
|
if (listener->options & LI_O_NOQUICKACK)
|
|
setsockopt(fd, IPPROTO_TCP, TCP_QUICKACK, &zero, sizeof(zero));
|
|
else
|
|
setsockopt(fd, IPPROTO_TCP, TCP_QUICKACK, &one, sizeof(one));
|
|
#endif
|
|
|
|
/* the socket is ready */
|
|
listener->fd = fd;
|
|
listener->state = LI_LISTEN;
|
|
|
|
fd_insert(fd, listener, listener->proto->accept,
|
|
listener->bind_conf->bind_thread[relative_pid-1] ?
|
|
listener->bind_conf->bind_thread[relative_pid-1] : MAX_THREADS_MASK);
|
|
|
|
tcp_return:
|
|
if (msg && errlen) {
|
|
char pn[INET6_ADDRSTRLEN];
|
|
|
|
addr_to_str(&listener->addr, pn, sizeof(pn));
|
|
snprintf(errmsg, errlen, "%s [%s:%d]", msg, pn, get_host_port(&listener->addr));
|
|
}
|
|
return err;
|
|
|
|
tcp_close_return:
|
|
close(fd);
|
|
goto tcp_return;
|
|
}
|
|
|
|
/* This function creates all TCP sockets bound to the protocol entry <proto>.
|
|
* It is intended to be used as the protocol's bind_all() function.
|
|
* The sockets will be registered but not added to any fd_set, in order not to
|
|
* loose them across the fork(). A call to enable_all_listeners() is needed
|
|
* to complete initialization. The return value is composed from ERR_*.
|
|
*/
|
|
static int tcp_bind_listeners(struct protocol *proto, char *errmsg, int errlen)
|
|
{
|
|
struct listener *listener;
|
|
int err = ERR_NONE;
|
|
|
|
list_for_each_entry(listener, &proto->listeners, proto_list) {
|
|
err |= tcp_bind_listener(listener, errmsg, errlen);
|
|
if (err & ERR_ABORT)
|
|
break;
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
/* Add <listener> to the list of tcpv4 listeners, on port <port>. The
|
|
* listener's state is automatically updated from LI_INIT to LI_ASSIGNED.
|
|
* The number of listeners for the protocol is updated.
|
|
*/
|
|
static void tcpv4_add_listener(struct listener *listener, int port)
|
|
{
|
|
if (listener->state != LI_INIT)
|
|
return;
|
|
listener->state = LI_ASSIGNED;
|
|
listener->proto = &proto_tcpv4;
|
|
((struct sockaddr_in *)(&listener->addr))->sin_port = htons(port);
|
|
LIST_ADDQ(&proto_tcpv4.listeners, &listener->proto_list);
|
|
proto_tcpv4.nb_listeners++;
|
|
}
|
|
|
|
/* Add <listener> to the list of tcpv6 listeners, on port <port>. The
|
|
* listener's state is automatically updated from LI_INIT to LI_ASSIGNED.
|
|
* The number of listeners for the protocol is updated.
|
|
*/
|
|
static void tcpv6_add_listener(struct listener *listener, int port)
|
|
{
|
|
if (listener->state != LI_INIT)
|
|
return;
|
|
listener->state = LI_ASSIGNED;
|
|
listener->proto = &proto_tcpv6;
|
|
((struct sockaddr_in *)(&listener->addr))->sin_port = htons(port);
|
|
LIST_ADDQ(&proto_tcpv6.listeners, &listener->proto_list);
|
|
proto_tcpv6.nb_listeners++;
|
|
}
|
|
|
|
/* Pause a listener. Returns < 0 in case of failure, 0 if the listener
|
|
* was totally stopped, or > 0 if correctly paused.
|
|
*/
|
|
int tcp_pause_listener(struct listener *l)
|
|
{
|
|
if (shutdown(l->fd, SHUT_WR) != 0)
|
|
return -1; /* Solaris dies here */
|
|
|
|
if (listen(l->fd, l->backlog ? l->backlog : l->maxconn) != 0)
|
|
return -1; /* OpenBSD dies here */
|
|
|
|
if (shutdown(l->fd, SHUT_RD) != 0)
|
|
return -1; /* should always be OK */
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* Execute the "set-src" action. May be called from {tcp,http}request.
|
|
* It only changes the address and tries to preserve the original port. If the
|
|
* previous family was neither AF_INET nor AF_INET6, the port is set to zero.
|
|
*/
|
|
enum act_return tcp_action_req_set_src(struct act_rule *rule, struct proxy *px,
|
|
struct session *sess, struct stream *s, int flags)
|
|
{
|
|
struct connection *cli_conn;
|
|
|
|
if ((cli_conn = objt_conn(sess->origin)) && conn_ctrl_ready(cli_conn)) {
|
|
struct sample *smp;
|
|
|
|
smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_ADDR);
|
|
if (smp) {
|
|
int port = get_net_port(&cli_conn->addr.from);
|
|
|
|
if (smp->data.type == SMP_T_IPV4) {
|
|
((struct sockaddr_in *)&cli_conn->addr.from)->sin_family = AF_INET;
|
|
((struct sockaddr_in *)&cli_conn->addr.from)->sin_addr.s_addr = smp->data.u.ipv4.s_addr;
|
|
((struct sockaddr_in *)&cli_conn->addr.from)->sin_port = port;
|
|
} else if (smp->data.type == SMP_T_IPV6) {
|
|
((struct sockaddr_in6 *)&cli_conn->addr.from)->sin6_family = AF_INET6;
|
|
memcpy(&((struct sockaddr_in6 *)&cli_conn->addr.from)->sin6_addr, &smp->data.u.ipv6, sizeof(struct in6_addr));
|
|
((struct sockaddr_in6 *)&cli_conn->addr.from)->sin6_port = port;
|
|
}
|
|
}
|
|
cli_conn->flags |= CO_FL_ADDR_FROM_SET;
|
|
}
|
|
return ACT_RET_CONT;
|
|
}
|
|
|
|
/*
|
|
* Execute the "set-dst" action. May be called from {tcp,http}request.
|
|
* It only changes the address and tries to preserve the original port. If the
|
|
* previous family was neither AF_INET nor AF_INET6, the port is set to zero.
|
|
*/
|
|
enum act_return tcp_action_req_set_dst(struct act_rule *rule, struct proxy *px,
|
|
struct session *sess, struct stream *s, int flags)
|
|
{
|
|
struct connection *cli_conn;
|
|
|
|
if ((cli_conn = objt_conn(sess->origin)) && conn_ctrl_ready(cli_conn)) {
|
|
struct sample *smp;
|
|
|
|
smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_ADDR);
|
|
if (smp) {
|
|
int port = get_net_port(&cli_conn->addr.to);
|
|
|
|
if (smp->data.type == SMP_T_IPV4) {
|
|
((struct sockaddr_in *)&cli_conn->addr.to)->sin_family = AF_INET;
|
|
((struct sockaddr_in *)&cli_conn->addr.to)->sin_addr.s_addr = smp->data.u.ipv4.s_addr;
|
|
} else if (smp->data.type == SMP_T_IPV6) {
|
|
((struct sockaddr_in6 *)&cli_conn->addr.to)->sin6_family = AF_INET6;
|
|
memcpy(&((struct sockaddr_in6 *)&cli_conn->addr.to)->sin6_addr, &smp->data.u.ipv6, sizeof(struct in6_addr));
|
|
((struct sockaddr_in6 *)&cli_conn->addr.to)->sin6_port = port;
|
|
}
|
|
cli_conn->flags |= CO_FL_ADDR_TO_SET;
|
|
}
|
|
}
|
|
return ACT_RET_CONT;
|
|
}
|
|
|
|
/*
|
|
* Execute the "set-src-port" action. May be called from {tcp,http}request.
|
|
* We must test the sin_family before setting the port. If the address family
|
|
* is neither AF_INET nor AF_INET6, the address is forced to AF_INET "0.0.0.0"
|
|
* and the port is assigned.
|
|
*/
|
|
enum act_return tcp_action_req_set_src_port(struct act_rule *rule, struct proxy *px,
|
|
struct session *sess, struct stream *s, int flags)
|
|
{
|
|
struct connection *cli_conn;
|
|
|
|
if ((cli_conn = objt_conn(sess->origin)) && conn_ctrl_ready(cli_conn)) {
|
|
struct sample *smp;
|
|
|
|
conn_get_from_addr(cli_conn);
|
|
|
|
smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_SINT);
|
|
if (smp) {
|
|
if (cli_conn->addr.from.ss_family == AF_INET6) {
|
|
((struct sockaddr_in6 *)&cli_conn->addr.from)->sin6_port = htons(smp->data.u.sint);
|
|
} else {
|
|
if (cli_conn->addr.from.ss_family != AF_INET) {
|
|
cli_conn->addr.from.ss_family = AF_INET;
|
|
((struct sockaddr_in *)&cli_conn->addr.from)->sin_addr.s_addr = 0;
|
|
}
|
|
((struct sockaddr_in *)&cli_conn->addr.from)->sin_port = htons(smp->data.u.sint);
|
|
}
|
|
}
|
|
}
|
|
return ACT_RET_CONT;
|
|
}
|
|
|
|
/*
|
|
* Execute the "set-dst-port" action. May be called from {tcp,http}request.
|
|
* We must test the sin_family before setting the port. If the address family
|
|
* is neither AF_INET nor AF_INET6, the address is forced to AF_INET "0.0.0.0"
|
|
* and the port is assigned.
|
|
*/
|
|
enum act_return tcp_action_req_set_dst_port(struct act_rule *rule, struct proxy *px,
|
|
struct session *sess, struct stream *s, int flags)
|
|
{
|
|
struct connection *cli_conn;
|
|
|
|
if ((cli_conn = objt_conn(sess->origin)) && conn_ctrl_ready(cli_conn)) {
|
|
struct sample *smp;
|
|
|
|
conn_get_to_addr(cli_conn);
|
|
|
|
smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_SINT);
|
|
if (smp) {
|
|
if (cli_conn->addr.to.ss_family == AF_INET6) {
|
|
((struct sockaddr_in6 *)&cli_conn->addr.to)->sin6_port = htons(smp->data.u.sint);
|
|
} else {
|
|
if (cli_conn->addr.to.ss_family != AF_INET) {
|
|
cli_conn->addr.to.ss_family = AF_INET;
|
|
((struct sockaddr_in *)&cli_conn->addr.to)->sin_addr.s_addr = 0;
|
|
}
|
|
((struct sockaddr_in *)&cli_conn->addr.to)->sin_port = htons(smp->data.u.sint);
|
|
}
|
|
}
|
|
}
|
|
return ACT_RET_CONT;
|
|
}
|
|
|
|
/* Executes the "silent-drop" action. May be called from {tcp,http}{request,response} */
|
|
static enum act_return tcp_exec_action_silent_drop(struct act_rule *rule, struct proxy *px, struct session *sess, struct stream *strm, int flags)
|
|
{
|
|
struct connection *conn = objt_conn(sess->origin);
|
|
|
|
if (!conn)
|
|
goto out;
|
|
|
|
if (!conn_ctrl_ready(conn))
|
|
goto out;
|
|
|
|
#ifdef TCP_QUICKACK
|
|
/* drain is needed only to send the quick ACK */
|
|
conn_sock_drain(conn);
|
|
|
|
/* re-enable quickack if it was disabled to ack all data and avoid
|
|
* retransmits from the client that might trigger a real reset.
|
|
*/
|
|
setsockopt(conn->handle.fd, SOL_TCP, TCP_QUICKACK, &one, sizeof(one));
|
|
#endif
|
|
/* lingering must absolutely be disabled so that we don't send a
|
|
* shutdown(), this is critical to the TCP_REPAIR trick. When no stream
|
|
* is present, returning with ERR will cause lingering to be disabled.
|
|
*/
|
|
if (strm)
|
|
strm->si[0].flags |= SI_FL_NOLINGER;
|
|
|
|
/* We're on the client-facing side, we must force to disable lingering to
|
|
* ensure we will use an RST exclusively and kill any pending data.
|
|
*/
|
|
fdtab[conn->handle.fd].linger_risk = 1;
|
|
|
|
#ifdef TCP_REPAIR
|
|
if (setsockopt(conn->handle.fd, SOL_TCP, TCP_REPAIR, &one, sizeof(one)) == 0) {
|
|
/* socket will be quiet now */
|
|
goto out;
|
|
}
|
|
#endif
|
|
/* either TCP_REPAIR is not defined or it failed (eg: permissions).
|
|
* Let's fall back on the TTL trick, though it only works for routed
|
|
* network and has no effect on local net.
|
|
*/
|
|
#ifdef IP_TTL
|
|
setsockopt(conn->handle.fd, SOL_IP, IP_TTL, &one, sizeof(one));
|
|
#endif
|
|
out:
|
|
/* kill the stream if any */
|
|
if (strm) {
|
|
channel_abort(&strm->req);
|
|
channel_abort(&strm->res);
|
|
strm->req.analysers = 0;
|
|
strm->res.analysers = 0;
|
|
HA_ATOMIC_ADD(&strm->be->be_counters.denied_req, 1);
|
|
if (!(strm->flags & SF_ERR_MASK))
|
|
strm->flags |= SF_ERR_PRXCOND;
|
|
if (!(strm->flags & SF_FINST_MASK))
|
|
strm->flags |= SF_FINST_R;
|
|
}
|
|
|
|
HA_ATOMIC_ADD(&sess->fe->fe_counters.denied_req, 1);
|
|
if (sess->listener->counters)
|
|
HA_ATOMIC_ADD(&sess->listener->counters->denied_req, 1);
|
|
|
|
return ACT_RET_STOP;
|
|
}
|
|
|
|
/* parse "set-{src,dst}[-port]" action */
|
|
enum act_parse_ret tcp_parse_set_src_dst(const char **args, int *orig_arg, struct proxy *px, struct act_rule *rule, char **err)
|
|
{
|
|
int cur_arg;
|
|
struct sample_expr *expr;
|
|
unsigned int where;
|
|
|
|
cur_arg = *orig_arg;
|
|
expr = sample_parse_expr((char **)args, &cur_arg, px->conf.args.file, px->conf.args.line, err, &px->conf.args);
|
|
if (!expr)
|
|
return ACT_RET_PRS_ERR;
|
|
|
|
where = 0;
|
|
if (px->cap & PR_CAP_FE)
|
|
where |= SMP_VAL_FE_HRQ_HDR;
|
|
if (px->cap & PR_CAP_BE)
|
|
where |= SMP_VAL_BE_HRQ_HDR;
|
|
|
|
if (!(expr->fetch->val & where)) {
|
|
memprintf(err,
|
|
"fetch method '%s' extracts information from '%s', none of which is available here",
|
|
args[cur_arg-1], sample_src_names(expr->fetch->use));
|
|
free(expr);
|
|
return ACT_RET_PRS_ERR;
|
|
}
|
|
rule->arg.expr = expr;
|
|
rule->action = ACT_CUSTOM;
|
|
|
|
if (!strcmp(args[*orig_arg-1], "set-src")) {
|
|
rule->action_ptr = tcp_action_req_set_src;
|
|
} else if (!strcmp(args[*orig_arg-1], "set-src-port")) {
|
|
rule->action_ptr = tcp_action_req_set_src_port;
|
|
} else if (!strcmp(args[*orig_arg-1], "set-dst")) {
|
|
rule->action_ptr = tcp_action_req_set_dst;
|
|
} else if (!strcmp(args[*orig_arg-1], "set-dst-port")) {
|
|
rule->action_ptr = tcp_action_req_set_dst_port;
|
|
} else {
|
|
return ACT_RET_PRS_ERR;
|
|
}
|
|
|
|
(*orig_arg)++;
|
|
|
|
return ACT_RET_PRS_OK;
|
|
}
|
|
|
|
|
|
/* Parse a "silent-drop" action. It takes no argument. It returns ACT_RET_PRS_OK on
|
|
* success, ACT_RET_PRS_ERR on error.
|
|
*/
|
|
static enum act_parse_ret tcp_parse_silent_drop(const char **args, int *orig_arg, struct proxy *px,
|
|
struct act_rule *rule, char **err)
|
|
{
|
|
rule->action = ACT_CUSTOM;
|
|
rule->action_ptr = tcp_exec_action_silent_drop;
|
|
return ACT_RET_PRS_OK;
|
|
}
|
|
|
|
|
|
/************************************************************************/
|
|
/* All supported sample fetch functions must be declared here */
|
|
/************************************************************************/
|
|
|
|
/* fetch the connection's source IPv4/IPv6 address */
|
|
int smp_fetch_src(const struct arg *args, struct sample *smp, const char *kw, void *private)
|
|
{
|
|
struct connection *cli_conn = objt_conn(smp->sess->origin);
|
|
|
|
if (!cli_conn)
|
|
return 0;
|
|
|
|
switch (cli_conn->addr.from.ss_family) {
|
|
case AF_INET:
|
|
smp->data.u.ipv4 = ((struct sockaddr_in *)&cli_conn->addr.from)->sin_addr;
|
|
smp->data.type = SMP_T_IPV4;
|
|
break;
|
|
case AF_INET6:
|
|
smp->data.u.ipv6 = ((struct sockaddr_in6 *)&cli_conn->addr.from)->sin6_addr;
|
|
smp->data.type = SMP_T_IPV6;
|
|
break;
|
|
default:
|
|
return 0;
|
|
}
|
|
|
|
smp->flags = 0;
|
|
return 1;
|
|
}
|
|
|
|
/* set temp integer to the connection's source port */
|
|
static int
|
|
smp_fetch_sport(const struct arg *args, struct sample *smp, const char *k, void *private)
|
|
{
|
|
struct connection *cli_conn = objt_conn(smp->sess->origin);
|
|
|
|
if (!cli_conn)
|
|
return 0;
|
|
|
|
smp->data.type = SMP_T_SINT;
|
|
if (!(smp->data.u.sint = get_host_port(&cli_conn->addr.from)))
|
|
return 0;
|
|
|
|
smp->flags = 0;
|
|
return 1;
|
|
}
|
|
|
|
/* fetch the connection's destination IPv4/IPv6 address */
|
|
static int
|
|
smp_fetch_dst(const struct arg *args, struct sample *smp, const char *kw, void *private)
|
|
{
|
|
struct connection *cli_conn = objt_conn(smp->sess->origin);
|
|
|
|
if (!cli_conn)
|
|
return 0;
|
|
|
|
conn_get_to_addr(cli_conn);
|
|
|
|
switch (cli_conn->addr.to.ss_family) {
|
|
case AF_INET:
|
|
smp->data.u.ipv4 = ((struct sockaddr_in *)&cli_conn->addr.to)->sin_addr;
|
|
smp->data.type = SMP_T_IPV4;
|
|
break;
|
|
case AF_INET6:
|
|
smp->data.u.ipv6 = ((struct sockaddr_in6 *)&cli_conn->addr.to)->sin6_addr;
|
|
smp->data.type = SMP_T_IPV6;
|
|
break;
|
|
default:
|
|
return 0;
|
|
}
|
|
|
|
smp->flags = 0;
|
|
return 1;
|
|
}
|
|
|
|
/* check if the destination address of the front connection is local to the
|
|
* system or if it was intercepted.
|
|
*/
|
|
int smp_fetch_dst_is_local(const struct arg *args, struct sample *smp, const char *kw, void *private)
|
|
{
|
|
struct connection *conn = objt_conn(smp->sess->origin);
|
|
struct listener *li = smp->sess->listener;
|
|
|
|
if (!conn)
|
|
return 0;
|
|
|
|
conn_get_to_addr(conn);
|
|
if (!(conn->flags & CO_FL_ADDR_TO_SET))
|
|
return 0;
|
|
|
|
smp->data.type = SMP_T_BOOL;
|
|
smp->flags = 0;
|
|
smp->data.u.sint = addr_is_local(li->netns, &conn->addr.to);
|
|
return smp->data.u.sint >= 0;
|
|
}
|
|
|
|
/* check if the source address of the front connection is local to the system
|
|
* or not.
|
|
*/
|
|
int smp_fetch_src_is_local(const struct arg *args, struct sample *smp, const char *kw, void *private)
|
|
{
|
|
struct connection *conn = objt_conn(smp->sess->origin);
|
|
struct listener *li = smp->sess->listener;
|
|
|
|
if (!conn)
|
|
return 0;
|
|
|
|
conn_get_from_addr(conn);
|
|
if (!(conn->flags & CO_FL_ADDR_FROM_SET))
|
|
return 0;
|
|
|
|
smp->data.type = SMP_T_BOOL;
|
|
smp->flags = 0;
|
|
smp->data.u.sint = addr_is_local(li->netns, &conn->addr.from);
|
|
return smp->data.u.sint >= 0;
|
|
}
|
|
|
|
/* set temp integer to the frontend connexion's destination port */
|
|
static int
|
|
smp_fetch_dport(const struct arg *args, struct sample *smp, const char *kw, void *private)
|
|
{
|
|
struct connection *cli_conn = objt_conn(smp->sess->origin);
|
|
|
|
if (!cli_conn)
|
|
return 0;
|
|
|
|
conn_get_to_addr(cli_conn);
|
|
|
|
smp->data.type = SMP_T_SINT;
|
|
if (!(smp->data.u.sint = get_host_port(&cli_conn->addr.to)))
|
|
return 0;
|
|
|
|
smp->flags = 0;
|
|
return 1;
|
|
}
|
|
|
|
#ifdef TCP_INFO
|
|
|
|
/* Returns some tcp_info data is its avalaible. "dir" must be set to 0 if
|
|
* the client connection is require, otherwise it is set to 1. "val" represents
|
|
* the required value. Use 0 for rtt and 1 for rttavg. "unit" is the expected unit
|
|
* by default, the rtt is in us. Id "unit" is set to 0, the unit is us, if it is
|
|
* set to 1, the untis are milliseconds.
|
|
* If the function fails it returns 0, otherwise it returns 1 and "result" is filled.
|
|
*/
|
|
static inline int get_tcp_info(const struct arg *args, struct sample *smp,
|
|
int dir, int val)
|
|
{
|
|
struct connection *conn;
|
|
struct tcp_info info;
|
|
socklen_t optlen;
|
|
|
|
/* strm can be null. */
|
|
if (!smp->strm)
|
|
return 0;
|
|
|
|
/* get the object associated with the stream interface.The
|
|
* object can be other thing than a connection. For example,
|
|
* it be a appctx. */
|
|
conn = cs_conn(objt_cs(smp->strm->si[dir].end));
|
|
if (!conn)
|
|
return 0;
|
|
|
|
/* The fd may not be available for the tcp_info struct, and the
|
|
syscal can fail. */
|
|
optlen = sizeof(info);
|
|
if (getsockopt(conn->handle.fd, SOL_TCP, TCP_INFO, &info, &optlen) == -1)
|
|
return 0;
|
|
|
|
/* extract the value. */
|
|
smp->data.type = SMP_T_SINT;
|
|
switch (val) {
|
|
case 0: smp->data.u.sint = info.tcpi_rtt; break;
|
|
case 1: smp->data.u.sint = info.tcpi_rttvar; break;
|
|
#if defined(__linux__)
|
|
/* these ones are common to all Linux versions */
|
|
case 2: smp->data.u.sint = info.tcpi_unacked; break;
|
|
case 3: smp->data.u.sint = info.tcpi_sacked; break;
|
|
case 4: smp->data.u.sint = info.tcpi_lost; break;
|
|
case 5: smp->data.u.sint = info.tcpi_retrans; break;
|
|
case 6: smp->data.u.sint = info.tcpi_fackets; break;
|
|
case 7: smp->data.u.sint = info.tcpi_reordering; break;
|
|
#elif defined(__FreeBSD__) || defined(__NetBSD__)
|
|
/* the ones are found on FreeBSD and NetBSD featuring TCP_INFO */
|
|
case 2: smp->data.u.sint = info.__tcpi_unacked; break;
|
|
case 3: smp->data.u.sint = info.__tcpi_sacked; break;
|
|
case 4: smp->data.u.sint = info.__tcpi_lost; break;
|
|
case 5: smp->data.u.sint = info.__tcpi_retrans; break;
|
|
case 6: smp->data.u.sint = info.__tcpi_fackets; break;
|
|
case 7: smp->data.u.sint = info.__tcpi_reordering; break;
|
|
#endif
|
|
default: return 0;
|
|
}
|
|
|
|
/* Convert the value as expected. */
|
|
if (args) {
|
|
if (args[0].type == ARGT_STR) {
|
|
if (strcmp(args[0].data.str.str, "us") == 0) {
|
|
/* Do nothing. */
|
|
} else if (strcmp(args[0].data.str.str, "ms") == 0) {
|
|
smp->data.u.sint = (smp->data.u.sint + 500) / 1000;
|
|
} else
|
|
return 0;
|
|
} else if (args[0].type == ARGT_STOP) {
|
|
smp->data.u.sint = (smp->data.u.sint + 500) / 1000;
|
|
} else
|
|
return 0;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
/* get the mean rtt of a client connexion */
|
|
static int
|
|
smp_fetch_fc_rtt(const struct arg *args, struct sample *smp, const char *kw, void *private)
|
|
{
|
|
if (!get_tcp_info(args, smp, 0, 0))
|
|
return 0;
|
|
return 1;
|
|
}
|
|
|
|
/* get the variance of the mean rtt of a client connexion */
|
|
static int
|
|
smp_fetch_fc_rttvar(const struct arg *args, struct sample *smp, const char *kw, void *private)
|
|
{
|
|
if (!get_tcp_info(args, smp, 0, 1))
|
|
return 0;
|
|
return 1;
|
|
}
|
|
|
|
#if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__)
|
|
|
|
/* get the unacked counter on a client connexion */
|
|
static int
|
|
smp_fetch_fc_unacked(const struct arg *args, struct sample *smp, const char *kw, void *private)
|
|
{
|
|
if (!get_tcp_info(args, smp, 0, 2))
|
|
return 0;
|
|
return 1;
|
|
}
|
|
|
|
/* get the sacked counter on a client connexion */
|
|
static int
|
|
smp_fetch_fc_sacked(const struct arg *args, struct sample *smp, const char *kw, void *private)
|
|
{
|
|
if (!get_tcp_info(args, smp, 0, 3))
|
|
return 0;
|
|
return 1;
|
|
}
|
|
|
|
/* get the lost counter on a client connexion */
|
|
static int
|
|
smp_fetch_fc_lost(const struct arg *args, struct sample *smp, const char *kw, void *private)
|
|
{
|
|
if (!get_tcp_info(args, smp, 0, 4))
|
|
return 0;
|
|
return 1;
|
|
}
|
|
|
|
/* get the retrans counter on a client connexion */
|
|
static int
|
|
smp_fetch_fc_retrans(const struct arg *args, struct sample *smp, const char *kw, void *private)
|
|
{
|
|
if (!get_tcp_info(args, smp, 0, 5))
|
|
return 0;
|
|
return 1;
|
|
}
|
|
|
|
/* get the fackets counter on a client connexion */
|
|
static int
|
|
smp_fetch_fc_fackets(const struct arg *args, struct sample *smp, const char *kw, void *private)
|
|
{
|
|
if (!get_tcp_info(args, smp, 0, 6))
|
|
return 0;
|
|
return 1;
|
|
}
|
|
|
|
/* get the reordering counter on a client connexion */
|
|
static int
|
|
smp_fetch_fc_reordering(const struct arg *args, struct sample *smp, const char *kw, void *private)
|
|
{
|
|
if (!get_tcp_info(args, smp, 0, 7))
|
|
return 0;
|
|
return 1;
|
|
}
|
|
#endif // linux || freebsd || netbsd
|
|
#endif // TCP_INFO
|
|
|
|
#ifdef IPV6_V6ONLY
|
|
/* parse the "v4v6" bind keyword */
|
|
static int bind_parse_v4v6(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
|
|
{
|
|
struct listener *l;
|
|
|
|
list_for_each_entry(l, &conf->listeners, by_bind) {
|
|
if (l->addr.ss_family == AF_INET6)
|
|
l->options |= LI_O_V4V6;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* parse the "v6only" bind keyword */
|
|
static int bind_parse_v6only(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
|
|
{
|
|
struct listener *l;
|
|
|
|
list_for_each_entry(l, &conf->listeners, by_bind) {
|
|
if (l->addr.ss_family == AF_INET6)
|
|
l->options |= LI_O_V6ONLY;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
#ifdef CONFIG_HAP_TRANSPARENT
|
|
/* parse the "transparent" bind keyword */
|
|
static int bind_parse_transparent(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
|
|
{
|
|
struct listener *l;
|
|
|
|
list_for_each_entry(l, &conf->listeners, by_bind) {
|
|
if (l->addr.ss_family == AF_INET || l->addr.ss_family == AF_INET6)
|
|
l->options |= LI_O_FOREIGN;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
#ifdef TCP_DEFER_ACCEPT
|
|
/* parse the "defer-accept" bind keyword */
|
|
static int bind_parse_defer_accept(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
|
|
{
|
|
struct listener *l;
|
|
|
|
list_for_each_entry(l, &conf->listeners, by_bind) {
|
|
if (l->addr.ss_family == AF_INET || l->addr.ss_family == AF_INET6)
|
|
l->options |= LI_O_DEF_ACCEPT;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
#ifdef TCP_FASTOPEN
|
|
/* parse the "tfo" bind keyword */
|
|
static int bind_parse_tfo(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
|
|
{
|
|
struct listener *l;
|
|
|
|
list_for_each_entry(l, &conf->listeners, by_bind) {
|
|
if (l->addr.ss_family == AF_INET || l->addr.ss_family == AF_INET6)
|
|
l->options |= LI_O_TCP_FO;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
#ifdef TCP_MAXSEG
|
|
/* parse the "mss" bind keyword */
|
|
static int bind_parse_mss(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
|
|
{
|
|
struct listener *l;
|
|
int mss;
|
|
|
|
if (!*args[cur_arg + 1]) {
|
|
memprintf(err, "'%s' : missing MSS value", args[cur_arg]);
|
|
return ERR_ALERT | ERR_FATAL;
|
|
}
|
|
|
|
mss = atoi(args[cur_arg + 1]);
|
|
if (!mss || abs(mss) > 65535) {
|
|
memprintf(err, "'%s' : expects an MSS with and absolute value between 1 and 65535", args[cur_arg]);
|
|
return ERR_ALERT | ERR_FATAL;
|
|
}
|
|
|
|
list_for_each_entry(l, &conf->listeners, by_bind) {
|
|
if (l->addr.ss_family == AF_INET || l->addr.ss_family == AF_INET6)
|
|
l->maxseg = mss;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
#ifdef TCP_USER_TIMEOUT
|
|
/* parse the "tcp-ut" bind keyword */
|
|
static int bind_parse_tcp_ut(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
|
|
{
|
|
const char *ptr = NULL;
|
|
struct listener *l;
|
|
unsigned int timeout;
|
|
|
|
if (!*args[cur_arg + 1]) {
|
|
memprintf(err, "'%s' : missing TCP User Timeout value", args[cur_arg]);
|
|
return ERR_ALERT | ERR_FATAL;
|
|
}
|
|
|
|
ptr = parse_time_err(args[cur_arg + 1], &timeout, TIME_UNIT_MS);
|
|
if (ptr) {
|
|
memprintf(err, "'%s' : expects a positive delay in milliseconds", args[cur_arg]);
|
|
return ERR_ALERT | ERR_FATAL;
|
|
}
|
|
|
|
list_for_each_entry(l, &conf->listeners, by_bind) {
|
|
if (l->addr.ss_family == AF_INET || l->addr.ss_family == AF_INET6)
|
|
l->tcp_ut = timeout;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
#ifdef SO_BINDTODEVICE
|
|
/* parse the "interface" bind keyword */
|
|
static int bind_parse_interface(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
|
|
{
|
|
struct listener *l;
|
|
|
|
if (!*args[cur_arg + 1]) {
|
|
memprintf(err, "'%s' : missing interface name", args[cur_arg]);
|
|
return ERR_ALERT | ERR_FATAL;
|
|
}
|
|
|
|
list_for_each_entry(l, &conf->listeners, by_bind) {
|
|
if (l->addr.ss_family == AF_INET || l->addr.ss_family == AF_INET6)
|
|
l->interface = strdup(args[cur_arg + 1]);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
#ifdef CONFIG_HAP_NS
|
|
/* parse the "namespace" bind keyword */
|
|
static int bind_parse_namespace(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
|
|
{
|
|
struct listener *l;
|
|
char *namespace = NULL;
|
|
|
|
if (!*args[cur_arg + 1]) {
|
|
memprintf(err, "'%s' : missing namespace id", args[cur_arg]);
|
|
return ERR_ALERT | ERR_FATAL;
|
|
}
|
|
namespace = args[cur_arg + 1];
|
|
|
|
list_for_each_entry(l, &conf->listeners, by_bind) {
|
|
l->netns = netns_store_lookup(namespace, strlen(namespace));
|
|
|
|
if (l->netns == NULL)
|
|
l->netns = netns_store_insert(namespace);
|
|
|
|
if (l->netns == NULL) {
|
|
ha_alert("Cannot open namespace '%s'.\n", args[cur_arg + 1]);
|
|
return ERR_ALERT | ERR_FATAL;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
#ifdef TCP_USER_TIMEOUT
|
|
/* parse the "tcp-ut" server keyword */
|
|
static int srv_parse_tcp_ut(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
|
|
{
|
|
const char *ptr = NULL;
|
|
unsigned int timeout;
|
|
|
|
if (!*args[*cur_arg + 1]) {
|
|
memprintf(err, "'%s' : missing TCP User Timeout value", args[*cur_arg]);
|
|
return ERR_ALERT | ERR_FATAL;
|
|
}
|
|
|
|
ptr = parse_time_err(args[*cur_arg + 1], &timeout, TIME_UNIT_MS);
|
|
if (ptr) {
|
|
memprintf(err, "'%s' : expects a positive delay in milliseconds", args[*cur_arg]);
|
|
return ERR_ALERT | ERR_FATAL;
|
|
}
|
|
|
|
if (newsrv->addr.ss_family == AF_INET || newsrv->addr.ss_family == AF_INET6)
|
|
newsrv->tcp_ut = timeout;
|
|
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
|
|
/* Note: must not be declared <const> as its list will be overwritten.
|
|
* Note: fetches that may return multiple types must be declared as the lowest
|
|
* common denominator, the type that can be casted into all other ones. For
|
|
* instance v4/v6 must be declared v4.
|
|
*/
|
|
static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
|
|
{ "dst", smp_fetch_dst, 0, NULL, SMP_T_IPV4, SMP_USE_L4CLI },
|
|
{ "dst_is_local", smp_fetch_dst_is_local, 0, NULL, SMP_T_BOOL, SMP_USE_L4CLI },
|
|
{ "dst_port", smp_fetch_dport, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
|
|
{ "src", smp_fetch_src, 0, NULL, SMP_T_IPV4, SMP_USE_L4CLI },
|
|
{ "src_is_local", smp_fetch_src_is_local, 0, NULL, SMP_T_BOOL, SMP_USE_L4CLI },
|
|
{ "src_port", smp_fetch_sport, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
|
|
#ifdef TCP_INFO
|
|
{ "fc_rtt", smp_fetch_fc_rtt, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_L4CLI },
|
|
{ "fc_rttvar", smp_fetch_fc_rttvar, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_L4CLI },
|
|
#if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__)
|
|
{ "fc_unacked", smp_fetch_fc_unacked, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_L4CLI },
|
|
{ "fc_sacked", smp_fetch_fc_sacked, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_L4CLI },
|
|
{ "fc_retrans", smp_fetch_fc_retrans, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_L4CLI },
|
|
{ "fc_fackets", smp_fetch_fc_fackets, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_L4CLI },
|
|
{ "fc_lost", smp_fetch_fc_lost, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_L4CLI },
|
|
{ "fc_reordering", smp_fetch_fc_reordering, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_L4CLI },
|
|
#endif // linux || freebsd || netbsd
|
|
#endif // TCP_INFO
|
|
{ /* END */ },
|
|
}};
|
|
|
|
/************************************************************************/
|
|
/* All supported bind keywords must be declared here. */
|
|
/************************************************************************/
|
|
|
|
/* Note: must not be declared <const> as its list will be overwritten.
|
|
* Please take care of keeping this list alphabetically sorted, doing so helps
|
|
* all code contributors.
|
|
* Optional keywords are also declared with a NULL ->parse() function so that
|
|
* the config parser can report an appropriate error when a known keyword was
|
|
* not enabled.
|
|
*/
|
|
static struct bind_kw_list bind_kws = { "TCP", { }, {
|
|
#ifdef TCP_DEFER_ACCEPT
|
|
{ "defer-accept", bind_parse_defer_accept, 0 }, /* wait for some data for 1 second max before doing accept */
|
|
#endif
|
|
#ifdef SO_BINDTODEVICE
|
|
{ "interface", bind_parse_interface, 1 }, /* specifically bind to this interface */
|
|
#endif
|
|
#ifdef TCP_MAXSEG
|
|
{ "mss", bind_parse_mss, 1 }, /* set MSS of listening socket */
|
|
#endif
|
|
#ifdef TCP_USER_TIMEOUT
|
|
{ "tcp-ut", bind_parse_tcp_ut, 1 }, /* set User Timeout on listening socket */
|
|
#endif
|
|
#ifdef TCP_FASTOPEN
|
|
{ "tfo", bind_parse_tfo, 0 }, /* enable TCP_FASTOPEN of listening socket */
|
|
#endif
|
|
#ifdef CONFIG_HAP_TRANSPARENT
|
|
{ "transparent", bind_parse_transparent, 0 }, /* transparently bind to the specified addresses */
|
|
#endif
|
|
#ifdef IPV6_V6ONLY
|
|
{ "v4v6", bind_parse_v4v6, 0 }, /* force socket to bind to IPv4+IPv6 */
|
|
{ "v6only", bind_parse_v6only, 0 }, /* force socket to bind to IPv6 only */
|
|
#endif
|
|
#ifdef CONFIG_HAP_NS
|
|
{ "namespace", bind_parse_namespace, 1 },
|
|
#endif
|
|
/* the versions with the NULL parse function*/
|
|
{ "defer-accept", NULL, 0 },
|
|
{ "interface", NULL, 1 },
|
|
{ "mss", NULL, 1 },
|
|
{ "transparent", NULL, 0 },
|
|
{ "v4v6", NULL, 0 },
|
|
{ "v6only", NULL, 0 },
|
|
{ NULL, NULL, 0 },
|
|
}};
|
|
|
|
static struct srv_kw_list srv_kws = { "TCP", { }, {
|
|
#ifdef TCP_USER_TIMEOUT
|
|
{ "tcp-ut", srv_parse_tcp_ut, 1, 1 }, /* set TCP user timeout on server */
|
|
#endif
|
|
{ NULL, NULL, 0 },
|
|
}};
|
|
|
|
static struct action_kw_list tcp_req_conn_actions = {ILH, {
|
|
{ "silent-drop", tcp_parse_silent_drop },
|
|
{ "set-src", tcp_parse_set_src_dst },
|
|
{ "set-src-port", tcp_parse_set_src_dst },
|
|
{ "set-dst" , tcp_parse_set_src_dst },
|
|
{ "set-dst-port", tcp_parse_set_src_dst },
|
|
{ /* END */ }
|
|
}};
|
|
|
|
static struct action_kw_list tcp_req_sess_actions = {ILH, {
|
|
{ "silent-drop", tcp_parse_silent_drop },
|
|
{ "set-src", tcp_parse_set_src_dst },
|
|
{ "set-src-port", tcp_parse_set_src_dst },
|
|
{ "set-dst" , tcp_parse_set_src_dst },
|
|
{ "set-dst-port", tcp_parse_set_src_dst },
|
|
{ /* END */ }
|
|
}};
|
|
|
|
static struct action_kw_list tcp_req_cont_actions = {ILH, {
|
|
{ "silent-drop", tcp_parse_silent_drop },
|
|
{ /* END */ }
|
|
}};
|
|
|
|
static struct action_kw_list tcp_res_cont_actions = {ILH, {
|
|
{ "silent-drop", tcp_parse_silent_drop },
|
|
{ /* END */ }
|
|
}};
|
|
|
|
static struct action_kw_list http_req_actions = {ILH, {
|
|
{ "silent-drop", tcp_parse_silent_drop },
|
|
{ "set-src", tcp_parse_set_src_dst },
|
|
{ "set-src-port", tcp_parse_set_src_dst },
|
|
{ "set-dst", tcp_parse_set_src_dst },
|
|
{ "set-dst-port", tcp_parse_set_src_dst },
|
|
{ /* END */ }
|
|
}};
|
|
|
|
static struct action_kw_list http_res_actions = {ILH, {
|
|
{ "silent-drop", tcp_parse_silent_drop },
|
|
{ /* END */ }
|
|
}};
|
|
|
|
|
|
__attribute__((constructor))
|
|
static void __tcp_protocol_init(void)
|
|
{
|
|
protocol_register(&proto_tcpv4);
|
|
protocol_register(&proto_tcpv6);
|
|
sample_register_fetches(&sample_fetch_keywords);
|
|
bind_register_keywords(&bind_kws);
|
|
srv_register_keywords(&srv_kws);
|
|
tcp_req_conn_keywords_register(&tcp_req_conn_actions);
|
|
tcp_req_sess_keywords_register(&tcp_req_sess_actions);
|
|
tcp_req_cont_keywords_register(&tcp_req_cont_actions);
|
|
tcp_res_cont_keywords_register(&tcp_res_cont_actions);
|
|
http_req_keywords_register(&http_req_actions);
|
|
http_res_keywords_register(&http_res_actions);
|
|
|
|
|
|
hap_register_build_opts("Built with transparent proxy support using:"
|
|
#if defined(IP_TRANSPARENT)
|
|
" IP_TRANSPARENT"
|
|
#endif
|
|
#if defined(IPV6_TRANSPARENT)
|
|
" IPV6_TRANSPARENT"
|
|
#endif
|
|
#if defined(IP_FREEBIND)
|
|
" IP_FREEBIND"
|
|
#endif
|
|
#if defined(IP_BINDANY)
|
|
" IP_BINDANY"
|
|
#endif
|
|
#if defined(IPV6_BINDANY)
|
|
" IPV6_BINDANY"
|
|
#endif
|
|
#if defined(SO_BINDANY)
|
|
" SO_BINDANY"
|
|
#endif
|
|
"", 0);
|
|
}
|
|
|
|
|
|
/*
|
|
* Local variables:
|
|
* c-indent-level: 8
|
|
* c-basic-offset: 8
|
|
* End:
|
|
*/
|