diff --git a/doc/configuration.txt b/doc/configuration.txt index 26d63e669..15bf24e34 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -28279,6 +28279,27 @@ report this to the maintainers. range can or must be specified. It is considered as an alias of 'stream+ipv4@'. +'mptcp@
[:port1[-port2]]' following
is considered as an IPv4 + or IPv6 address depending of the syntax but + socket type and transport method is forced to + "stream", with the MPTCP protocol. Depending + on the statement using this address, a port or + a port range can or must be specified. + +'mptcp4@
[:port1[-port2]]' following
is always considered as + an IPv4 address but socket type and transport + method is forced to "stream", with the MPTCP + protocol. Depending on the statement using + this address, a port or port range can or + must be specified. + +'mptcp6@
[:port1[-port2]]' following
is always considered as + an IPv6 address but socket type and transport + method is forced to "stream", with the MPTCP + protocol. Depending on the statement using + this address, a port or port range can or + must be specified. + 'udp@
[:port1[-port2]]' following
is considered as an IPv4 or IPv6 address depending of the syntax but socket type and transport method is forced to diff --git a/examples/mptcp-backend.py b/examples/mptcp-backend.py new file mode 100644 index 000000000..5237de542 --- /dev/null +++ b/examples/mptcp-backend.py @@ -0,0 +1,22 @@ +# ============================================================================= +# Example of a simple backend server using mptcp in python, used with mptcp.cfg +# ============================================================================= + +import socket + +sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM, socket.IPPROTO_MPTCP) +sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) +# dual stack IPv4/IPv6 +sock.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0) + +sock.bind(("::", 4331)) +sock.listen() + +while True: + (conn, address) = sock.accept() + req = conn.recv(1024) + print(F"Received request : {req}") + conn.send(b"HTTP/1.0 200 OK\r\n\r\nHello\n") + conn.close() + +sock.close() diff --git a/examples/mptcp.cfg b/examples/mptcp.cfg new file mode 100644 index 000000000..d43483dfe --- /dev/null +++ b/examples/mptcp.cfg @@ -0,0 +1,23 @@ +# You can test this configuration by running the command: +# +# $ mptcpize run curl localhost:5000 + +global + strict-limits # refuse to start if insufficient FDs/memory + # add some process-wide tuning here if required + +defaults + mode http + balance roundrobin + timeout client 60s + timeout server 60s + timeout connect 1s + +frontend main + bind mptcp@[::]:5000 + default_backend mptcp_backend + +# MPTCP is usually used on the frontend, but it is also possible +# to enable it to communicate with the backend +backend mptcp_backend + server mptcp_server mptcp@[::]:4331 diff --git a/include/haproxy/compat.h b/include/haproxy/compat.h index 3829060b7..68474fe8e 100644 --- a/include/haproxy/compat.h +++ b/include/haproxy/compat.h @@ -317,6 +317,16 @@ typedef struct { } empty_t; #define queue _queue #endif +/* Define a flag indicating if MPTCP is available */ +#ifdef __linux__ +#define HA_HAVE_MPTCP 1 +#endif + +/* only Linux defines IPPROTO_MPTCP */ +#ifndef IPPROTO_MPTCP +#define IPPROTO_MPTCP 262 +#endif + #endif /* _HAPROXY_COMPAT_H */ /* diff --git a/include/haproxy/sock_inet.h b/include/haproxy/sock_inet.h index 6f07e637a..1c3b7a303 100644 --- a/include/haproxy/sock_inet.h +++ b/include/haproxy/sock_inet.h @@ -31,6 +31,14 @@ extern int sock_inet6_v6only_default; extern int sock_inet_tcp_maxseg_default; extern int sock_inet6_tcp_maxseg_default; +#ifdef HA_HAVE_MPTCP +extern int sock_inet_mptcp_maxseg_default; +extern int sock_inet6_mptcp_maxseg_default; +#else +#define sock_inet_mptcp_maxseg_default -1 +#define sock_inet6_mptcp_maxseg_default -1 +#endif + extern struct proto_fam proto_fam_inet4; extern struct proto_fam proto_fam_inet6; diff --git a/src/backend.c b/src/backend.c index 6956d9bfe..e4bd465e9 100644 --- a/src/backend.c +++ b/src/backend.c @@ -1690,8 +1690,9 @@ skip_reuse: if (!srv_conn->xprt) { /* set the correct protocol on the output stream connector */ + if (srv) { - if (conn_prepare(srv_conn, protocol_lookup(srv_conn->dst->ss_family, PROTO_TYPE_STREAM, 0), srv->xprt)) { + if (conn_prepare(srv_conn, protocol_lookup(srv_conn->dst->ss_family, PROTO_TYPE_STREAM, srv->alt_proto), srv->xprt)) { conn_free(srv_conn); return SF_ERR_INTERNAL; } diff --git a/src/proto_tcp.c b/src/proto_tcp.c index cf79ffbc5..39de465ef 100644 --- a/src/proto_tcp.c +++ b/src/proto_tcp.c @@ -145,6 +145,98 @@ struct protocol proto_tcpv6 = { INITCALL1(STG_REGISTER, protocol_register, &proto_tcpv6); +#ifdef HA_HAVE_MPTCP +/* Most fields are copied from proto_tcpv4 */ +struct protocol proto_mptcpv4 = { + .name = "mptcpv4", + + /* connection layer */ + .xprt_type = PROTO_TYPE_STREAM, + .listen = tcp_bind_listener, + .enable = tcp_enable_listener, + .disable = tcp_disable_listener, + .add = default_add_listener, + .unbind = default_unbind_listener, + .suspend = default_suspend_listener, + .resume = default_resume_listener, + .accept_conn = sock_accept_conn, + .ctrl_init = sock_conn_ctrl_init, + .ctrl_close = sock_conn_ctrl_close, + .connect = tcp_connect_server, + .drain = sock_drain, + .check_events = sock_check_events, + .ignore_events = sock_ignore_events, + .get_info = tcp_get_info, + + /* binding layer */ + .rx_suspend = tcp_suspend_receiver, + .rx_resume = tcp_resume_receiver, + + /* address family */ + .fam = &proto_fam_inet4, + + /* socket layer */ + .proto_type = PROTO_TYPE_STREAM, + .sock_type = SOCK_STREAM, + .sock_prot = IPPROTO_MPTCP, /* MPTCP specific */ + .rx_enable = sock_enable, + .rx_disable = sock_disable, + .rx_unbind = sock_unbind, + .rx_listening = sock_accepting_conn, + .default_iocb = sock_accept_iocb, +#ifdef SO_REUSEPORT + .flags = PROTO_F_REUSEPORT_SUPPORTED, +#endif +}; + +INITCALL1(STG_REGISTER, protocol_register, &proto_mptcpv4); + +/* Most fields are copied from proto_tcpv6 */ +struct protocol proto_mptcpv6 = { + .name = "mptcpv6", + + /* connection layer */ + .xprt_type = PROTO_TYPE_STREAM, + .listen = tcp_bind_listener, + .enable = tcp_enable_listener, + .disable = tcp_disable_listener, + .add = default_add_listener, + .unbind = default_unbind_listener, + .suspend = default_suspend_listener, + .resume = default_resume_listener, + .accept_conn = sock_accept_conn, + .ctrl_init = sock_conn_ctrl_init, + .ctrl_close = sock_conn_ctrl_close, + .connect = tcp_connect_server, + .drain = sock_drain, + .check_events = sock_check_events, + .ignore_events = sock_ignore_events, + .get_info = tcp_get_info, + + /* binding layer */ + .rx_suspend = tcp_suspend_receiver, + .rx_resume = tcp_resume_receiver, + + /* address family */ + .fam = &proto_fam_inet6, + + /* socket layer */ + .proto_type = PROTO_TYPE_STREAM, + .sock_type = SOCK_STREAM, + .sock_prot = IPPROTO_MPTCP, /* MPTCP specific */ + .rx_enable = sock_enable, + .rx_disable = sock_disable, + .rx_unbind = sock_unbind, + .rx_listening = sock_accepting_conn, + .default_iocb = sock_accept_iocb, +#ifdef SO_REUSEPORT + .flags = PROTO_F_REUSEPORT_SUPPORTED, +#endif +}; + +INITCALL1(STG_REGISTER, protocol_register, &proto_mptcpv6); +#endif + /* Binds ipv4/ipv6 address to socket , unless is set, in which * case we try to bind . is a 2-bit field consisting of : * - 0 : ignore remote address (may even be a NULL pointer) @@ -590,12 +682,20 @@ int tcp_bind_listener(struct listener *listener, char *errmsg, int errlen) /* we may want to try to restore the default MSS if the socket was inherited */ int tmpmaxseg = -1; int defaultmss; + int v4 = listener->rx.addr.ss_family == AF_INET; socklen_t len = sizeof(tmpmaxseg); - if (listener->rx.addr.ss_family == AF_INET) - defaultmss = sock_inet_tcp_maxseg_default; - else - defaultmss = sock_inet6_tcp_maxseg_default; + if (listener->rx.proto->sock_prot == IPPROTO_MPTCP) { + if (v4) + defaultmss = sock_inet_mptcp_maxseg_default; + else + defaultmss = sock_inet6_mptcp_maxseg_default; + } else { + if (v4) + defaultmss = sock_inet_tcp_maxseg_default; + else + defaultmss = sock_inet6_tcp_maxseg_default; + } getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &tmpmaxseg, &len); if (defaultmss > 0 && diff --git a/src/protocol.c b/src/protocol.c index f5f494068..edf1c22ad 100644 --- a/src/protocol.c +++ b/src/protocol.c @@ -51,7 +51,8 @@ void protocol_register(struct protocol *proto) LIST_APPEND(&protocols, &proto->list); __protocol_by_family[sock_family] [proto->proto_type] - [proto->xprt_type == PROTO_TYPE_DGRAM] = proto; + [proto->xprt_type == PROTO_TYPE_DGRAM || + proto->sock_prot == IPPROTO_MPTCP] = proto; __proto_fam_by_family[sock_family] = proto->fam; HA_SPIN_UNLOCK(PROTO_LOCK, &proto_lock); } diff --git a/src/sock.c b/src/sock.c index aa524d886..4b872d15e 100644 --- a/src/sock.c +++ b/src/sock.c @@ -279,7 +279,7 @@ int sock_create_server_socket(struct connection *conn, struct proxy *be, int *st ns = __objt_server(conn->target)->netns; } #endif - proto = protocol_lookup(conn->dst->ss_family, PROTO_TYPE_STREAM, 0); + proto = protocol_lookup(conn->dst->ss_family, PROTO_TYPE_STREAM, conn->ctrl->sock_prot == IPPROTO_MPTCP); BUG_ON(!proto); sock_fd = my_socketat(ns, proto->fam->sock_domain, SOCK_STREAM, proto->sock_prot); @@ -306,7 +306,8 @@ int sock_create_server_socket(struct connection *conn, struct proxy *be, int *st } if (fd_set_nonblock(sock_fd) == -1 || - ((conn->ctrl->sock_prot == IPPROTO_TCP) && (setsockopt(sock_fd, IPPROTO_TCP, TCP_NODELAY, &one, sizeof(one)) == -1))) { + ((conn->ctrl->sock_prot == IPPROTO_TCP || conn->ctrl->sock_prot == IPPROTO_MPTCP) && + (setsockopt(sock_fd, IPPROTO_TCP, TCP_NODELAY, &one, sizeof(one)) == -1))) { qfprintf(stderr,"Cannot set client socket to non blocking mode.\n"); send_log(be, LOG_EMERG, "Cannot set client socket to non blocking mode.\n"); close(sock_fd); diff --git a/src/sock_inet.c b/src/sock_inet.c index 07364f02a..20a9ab598 100644 --- a/src/sock_inet.c +++ b/src/sock_inet.c @@ -79,6 +79,12 @@ int sock_inet6_v6only_default = 0; int sock_inet_tcp_maxseg_default = -1; int sock_inet6_tcp_maxseg_default = -1; +/* Default MPTCPv4/MPTCPv6 MSS settings. -1=unknown. */ +#ifdef HA_HAVE_MPTCP +int sock_inet_mptcp_maxseg_default = -1; +int sock_inet6_mptcp_maxseg_default = -1; +#endif + /* Compares two AF_INET sockaddr addresses. Returns 0 if they match or non-zero * if they do not match. */ @@ -496,6 +502,30 @@ static void sock_inet_prepare() #endif close(fd); } + +#ifdef HA_HAVE_MPTCP + fd = socket(AF_INET, SOCK_STREAM, IPPROTO_MPTCP); + if (fd >= 0) { +#ifdef TCP_MAXSEG + /* retrieve the OS' default mss for MPTCPv4 */ + len = sizeof(val); + if (getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &val, &len) == 0) + sock_inet_mptcp_maxseg_default = val; +#endif + close(fd); + } + + fd = socket(AF_INET6, SOCK_STREAM, IPPROTO_MPTCP); + if (fd >= 0) { +#ifdef TCP_MAXSEG + /* retrieve the OS' default mss for MPTCPv6 */ + len = sizeof(val); + if (getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &val, &len) == 0) + sock_inet6_mptcp_maxseg_default = val; +#endif + close(fd); + } +#endif } INITCALL0(STG_PREPARE, sock_inet_prepare); diff --git a/src/tools.c b/src/tools.c index db414600f..01189d3e0 100644 --- a/src/tools.c +++ b/src/tools.c @@ -1069,6 +1069,13 @@ struct sockaddr_storage *str2sa_range(const char *str, int *port, int *low, int proto_type = PROTO_TYPE_STREAM; ctrl_type = SOCK_STREAM; } + else if (strncmp(str2, "mptcp4@", 7) == 0) { + str2 += 7; + ss.ss_family = AF_INET; + proto_type = PROTO_TYPE_STREAM; + ctrl_type = SOCK_STREAM; + alt_proto = 1; + } else if (strncmp(str2, "udp4@", 5) == 0) { str2 += 5; ss.ss_family = AF_INET; @@ -1082,6 +1089,13 @@ struct sockaddr_storage *str2sa_range(const char *str, int *port, int *low, int proto_type = PROTO_TYPE_STREAM; ctrl_type = SOCK_STREAM; } + else if (strncmp(str2, "mptcp6@", 7) == 0) { + str2 += 7; + ss.ss_family = AF_INET6; + proto_type = PROTO_TYPE_STREAM; + ctrl_type = SOCK_STREAM; + alt_proto = 1; + } else if (strncmp(str2, "udp6@", 5) == 0) { str2 += 5; ss.ss_family = AF_INET6; @@ -1095,6 +1109,13 @@ struct sockaddr_storage *str2sa_range(const char *str, int *port, int *low, int proto_type = PROTO_TYPE_STREAM; ctrl_type = SOCK_STREAM; } + else if (strncmp(str2, "mptcp@", 6) == 0) { + str2 += 6; + ss.ss_family = AF_UNSPEC; + proto_type = PROTO_TYPE_STREAM; + ctrl_type = SOCK_STREAM; + alt_proto = 1; + } else if (strncmp(str2, "udp@", 4) == 0) { str2 += 4; ss.ss_family = AF_UNSPEC;