diff --git a/Makefile b/Makefile index 1f6adc46f..74c6d7d4b 100644 --- a/Makefile +++ b/Makefile @@ -28,6 +28,7 @@ # USE_TPROXY : enable transparent proxy. Automatic. # USE_LINUX_TPROXY : enable full transparent proxy. Automatic. # USE_LINUX_SPLICE : enable kernel 2.6 splicing. Automatic. +# USE_LINUX_CAP : enable Linux capabilities. # USE_LIBCRYPT : enable encrypted passwords using -lcrypt # USE_CRYPT_H : set it if your system requires including crypt.h # USE_GETADDRINFO : use getaddrinfo() to resolve IPv6 host names. @@ -305,7 +306,7 @@ LDFLAGS = $(ARCH_FLAGS) -g # specific entries if present before them. use_opts = USE_EPOLL USE_KQUEUE USE_NETFILTER USE_POLL \ USE_THREAD USE_PTHREAD_EMULATION USE_BACKTRACE \ - USE_TPROXY USE_LINUX_TPROXY \ + USE_TPROXY USE_LINUX_TPROXY USE_LINUX_CAP \ USE_LINUX_SPLICE USE_LIBCRYPT USE_CRYPT_H USE_ENGINE \ USE_GETADDRINFO USE_OPENSSL USE_OPENSSL_WOLFSSL USE_SSL USE_LUA \ USE_ACCEPT4 USE_CLOSEFROM USE_ZLIB USE_SLZ USE_CPU_AFFINITY \ @@ -347,7 +348,7 @@ endif ifeq ($(TARGET),linux-glibc) set_target_defaults = $(call default_opts, \ USE_POLL USE_TPROXY USE_LIBCRYPT USE_DL USE_RT USE_CRYPT_H USE_NETFILTER \ - USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_LINUX_TPROXY \ + USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_LINUX_TPROXY USE_LINUX_CAP \ USE_ACCEPT4 USE_LINUX_SPLICE USE_PRCTL USE_THREAD_DUMP USE_NS USE_TFO \ USE_GETADDRINFO USE_BACKTRACE USE_SHM_OPEN) INSTALL = install -v @@ -357,7 +358,7 @@ endif ifeq ($(TARGET),linux-glibc-legacy) set_target_defaults = $(call default_opts, \ USE_POLL USE_TPROXY USE_LIBCRYPT USE_DL USE_RT USE_CRYPT_H USE_NETFILTER \ - USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_LINUX_TPROXY \ + USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_LINUX_TPROXY USE_LINUX_CAP \ USE_ACCEPT4 USE_LINUX_SPLICE USE_PRCTL USE_THREAD_DUMP USE_GETADDRINFO) INSTALL = install -v endif @@ -366,7 +367,7 @@ endif ifeq ($(TARGET),linux-musl) set_target_defaults = $(call default_opts, \ USE_POLL USE_TPROXY USE_LIBCRYPT USE_DL USE_RT USE_CRYPT_H USE_NETFILTER \ - USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_LINUX_TPROXY \ + USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_LINUX_TPROXY USE_LINUX_CAP \ USE_ACCEPT4 USE_LINUX_SPLICE USE_PRCTL USE_THREAD_DUMP USE_NS USE_TFO \ USE_GETADDRINFO USE_SHM_OPEN) INSTALL = install -v @@ -813,6 +814,10 @@ ifneq ($(USE_NS),) OPTIONS_OBJS += src/namespace.o endif +ifneq ($(USE_LINUX_CAP),) + OPTIONS_OBJS += src/linuxcap.o +endif + ifneq ($(USE_OT),) include addons/ot/Makefile endif diff --git a/doc/configuration.txt b/doc/configuration.txt index 9c9d8a747..9794c36e3 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -2183,6 +2183,22 @@ set-var-fmt set-var-fmt proc.current_state "primary" set-var-fmt proc.bootid "%pid|%t" +setcap [,...] + Sets a list of capabilities that must be preserved when starting with uid 0 + and switching to a non-zero uid. By default all permissions are lost by the + uid switch, but some are often needed when trying connecting to a server from + a foreign address during transparent proxying, or when binding to a port + below 1024, e.g. when using "tune.quic.socket-owner connection", resulting in + setups running entirely under uid 0. Setting capabilities generally is a + safer alternative, as only the required capabilities will be preserved. The + feature is OS-specific and only enabled on Linux when USE_LINUX_CAP=1 is set + at build time. The list of supported capabilities also depends on the OS and + is enumerated by the error message displayed when an invalid capability name + or an empty one is passed. Multiple capabilities may be passed, delimited by + commas. Among those commonly used, "cap_net_raw" allows to transparently bind + to a foreign address, and "cap_net_bind_service" allows to bind to a + privileged port and may be used by QUIC. + setenv Sets environment variable to value . If the variable exists, it is overwritten. The changes immediately take effect so that the next line in @@ -3424,7 +3440,8 @@ tune.quic.socket-owner { listener | connection } network stack. If your platform is deemed not compatible, haproxy will automatically switch to "listener" mode on startup. Please note that QUIC listeners running on privileged ports may require to run as uid 0, or some - OS-specific tuning to permit the target uid to bind such ports. + OS-specific tuning to permit the target uid to bind such ports, such as + system capabilities. See also the "setcap" global directive. The "listener" value indicates that QUIC transfers will occur on the shared listener socket. This option can be a good compromise for small traffic as it @@ -11645,7 +11662,8 @@ source [:] [interface ] is possible at the server level using the "source" server option. Refer to section 5 for more information. - In order to work, "usesrc" requires root privileges. + In order to work, "usesrc" requires root privileges, or on supported systems, + the "cap_net_raw" capability. See also the "setcap" global directive. Examples : backend private diff --git a/include/haproxy/linuxcap.h b/include/haproxy/linuxcap.h new file mode 100644 index 000000000..9c337a40c --- /dev/null +++ b/include/haproxy/linuxcap.h @@ -0,0 +1,7 @@ +#ifndef _HAPROXY_LINUXCAP_H +#define _HAPROXY_LINUXCAP_H + +int prepare_caps_for_setuid(int from_uid, int to_uid); +int finalize_caps_after_setuid(int from_uid, int to_uid); + +#endif /* _HAPROXY_LINUXCAP_H */ diff --git a/src/haproxy.c b/src/haproxy.c index ac6c50340..f2bb31e01 100644 --- a/src/haproxy.c +++ b/src/haproxy.c @@ -108,6 +108,9 @@ #include #include #include +#if defined(USE_LINUX_CAP) +#include +#endif #include #include #include @@ -3184,6 +3187,8 @@ static void *run_thread_poll_loop(void *data) /* set uid/gid depending on global settings */ static void set_identity(const char *program_name) { + int from_uid __maybe_unused = geteuid(); + if (global.gid) { if (getgroups(0, NULL) > 0 && setgroups(0, NULL) == -1) ha_warning("[%s.main()] Failed to drop supplementary groups. Using 'gid'/'group'" @@ -3196,11 +3201,27 @@ static void set_identity(const char *program_name) } } +#if defined(USE_LINUX_CAP) + if (prepare_caps_for_setuid(from_uid, global.uid) < 0) { + ha_alert("[%s.main()] Cannot switch uid to %d.\n", program_name, global.uid); + protocol_unbind_all(); + exit(1); + } +#endif + if (global.uid && setuid(global.uid) == -1) { ha_alert("[%s.main()] Cannot set uid %d.\n", program_name, global.uid); protocol_unbind_all(); exit(1); } + +#if defined(USE_LINUX_CAP) + if (finalize_caps_after_setuid(from_uid, global.uid) < 0) { + ha_alert("[%s.main()] Cannot switch uid to %d.\n", program_name, global.uid); + protocol_unbind_all(); + exit(1); + } +#endif } int main(int argc, char **argv) diff --git a/src/linuxcap.c b/src/linuxcap.c new file mode 100644 index 000000000..919086c0a --- /dev/null +++ b/src/linuxcap.c @@ -0,0 +1,191 @@ +/* + * Minimal handling of Linux kernel capabilities + * + * Copyright 2000-2023 Willy Tarreau + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +/* Depending on distros, some have capset(), others use the more complicated + * libcap. Let's stick to what we need and the kernel documents (capset). + * Note that prctl is needed here. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* supported names, zero-terminated */ +static const struct { + int cap; + const char *name; +} known_caps[] = { +#ifdef CAP_NET_RAW + { CAP_NET_RAW, "cap_net_raw" }, +#endif +#ifdef CAP_NET_ADMIN + { CAP_NET_ADMIN, "cap_net_admin" }, +#endif +#ifdef CAP_NET_BIND_SERVICE + { CAP_NET_BIND_SERVICE, "cap_net_bind_service" }, +#endif + /* must be last */ + { 0, 0 } +}; + +/* provided by sys/capability.h on some distros */ +static inline int capset(cap_user_header_t hdrp, const cap_user_data_t datap) +{ + return syscall(SYS_capset, hdrp, datap); +} + +/* defaults to zero, i.e. we don't keep any cap after setuid() */ +static uint32_t caplist; + +/* try to apply capabilities before switching UID from to . + * In practice we need to do this in 4 steps: + * - set PR_SET_KEEPCAPS to preserve caps across the final setuid() + * - set the effective and permitted caps ; + * - switch euid to non-zero + * - set the effective and permitted caps again + * - then the caller can safely call setuid() + * We don't do this if the current euid is not zero or if the target uid + * is zero. Returns >=0 on success, negative on failure. Alerts or warnings + * may be emitted. + */ +int prepare_caps_for_setuid(int from_uid, int to_uid) +{ + struct __user_cap_data_struct cap_data = { }; + struct __user_cap_header_struct cap_hdr = { + .pid = 0, /* current process */ + .version = _LINUX_CAPABILITY_VERSION_1, + }; + + if (from_uid != 0) + return 0; + + if (!to_uid) + return 0; + + if (!caplist) + return 0; + + if (prctl(PR_SET_KEEPCAPS, 1) == -1) { + ha_alert("Failed to preserve capabilities using prctl(): %s\n", strerror(errno)); + return -1; + } + + cap_data.effective = cap_data.permitted = caplist | (1 << CAP_SETUID); + if (capset(&cap_hdr, &cap_data) == -1) { + ha_alert("Failed to preset the capabilities to preserve using capset(): %s\n", strerror(errno)); + return -1; + } + + if (seteuid(to_uid) == -1) { + ha_alert("Failed to set effective uid to %d: %s\n", to_uid, strerror(errno)); + return -1; + } + + cap_data.effective = cap_data.permitted = caplist | (1 << CAP_SETUID); + if (capset(&cap_hdr, &cap_data) == -1) { + ha_alert("Failed to set the final capabilities using capset(): %s\n", strerror(errno)); + return -1; + } + /* all's good */ + return 0; +} + +/* finalize the capabilities after setuid(). The most important is to drop the + * CAP_SET_SETUID capability, which would otherwise allow to switch back to any + * UID and recover everything. + */ +int finalize_caps_after_setuid(int from_uid, int to_uid) +{ + struct __user_cap_data_struct cap_data = { }; + struct __user_cap_header_struct cap_hdr = { + .pid = 0, /* current process */ + .version = _LINUX_CAPABILITY_VERSION_1, + }; + + if (from_uid != 0) + return 0; + + if (!to_uid) + return 0; + + if (!caplist) + return 0; + + cap_data.effective = cap_data.permitted = caplist; + if (capset(&cap_hdr, &cap_data) == -1) { + ha_alert("Failed to drop the setuid capability using capset(): %s\n", strerror(errno)); + return -1; + } + /* all's good */ + return 0; +} + +/* parse the "setcap" global keyword. Returns -1 on failure, 0 on success. */ +static int cfg_parse_global_setcap(char **args, int section_type, + struct proxy *curpx, const struct proxy *defpx, + const char *file, int line, char **err) +{ + char *name = args[1]; + char *next; + uint32_t caps = 0; + int id; + + if (!*name) { + memprintf(err, "'%s' : missing capability name(s). ", args[0]); + goto dump_caps; + } + + while (name && *name) { + next = strchr(name, ','); + if (next) + *(next++) = '\0'; + + for (id = 0; known_caps[id].cap; id++) { + if (strcmp(name, known_caps[id].name) == 0) { + caps |= 1U << known_caps[id].cap; + break; + } + } + + if (!known_caps[id].cap) { + memprintf(err, "'%s' : unsupported capability '%s'. ", args[0], args[1]); + goto dump_caps; + } + name = next; + } + + caplist |= caps; + return 0; + + + dump_caps: + memprintf(err, "%s Supported ones are: ", *err); + + for (id = 0; known_caps[id].cap; id++) + memprintf(err, "%s%s%s%s", *err, + id ? known_caps[id+1].cap ? ", " : " and " : "", + known_caps[id].name, known_caps[id+1].cap ? "" : "."); + return -1; +} + +static struct cfg_kw_list cfg_kws = {ILH, { + { CFG_GLOBAL, "setcap", cfg_parse_global_setcap }, + { 0, NULL, NULL } +}}; + +INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);