MEDIUM: unix: implement support for Linux abstract namespace sockets

These sockets are the same as Unix sockets except that there's no need
for any filesystem access. The address may be whatever string both sides
agree upon. This can be really convenient for inter-process communications
as well as for chaining backends to frontends.

These addresses are forced by prepending their address with "abns@" for
"abstract namespace".
This commit is contained in:
Willy Tarreau 2014-05-10 01:49:15 +02:00
parent 5cf0b52d29
commit ccfccefb80
3 changed files with 61 additions and 43 deletions

View File

@ -1762,6 +1762,7 @@ bind /<path> [, ...] [param*]
- 'ipv4@' -> address is always IPv4
- 'ipv6@' -> address is always IPv6
- 'unix@' -> address is a path to a local unix socket
- 'abns@' -> address is in abstract namespace (Linux only)
- 'fd@<n>' -> use file descriptor <n> inherited from the
parent. The fd must be bound and may or may not already
be listening.
@ -5750,6 +5751,7 @@ server <name> <address>[:[port]] [param*]
- 'ipv4@' -> address is always IPv4
- 'ipv6@' -> address is always IPv6
- 'unix@' -> address is a path to a local unix socket
- 'abns@' -> address is in abstract namespace (Linux only)
Any part of the address string may reference any number of
environment variables by preceding their name with a dollar
sign ('$') and optionally enclosing them with braces ('{}'),
@ -5796,6 +5798,7 @@ source <addr>[:<port>] [interface <name>]
- 'ipv4@' -> address is always IPv4
- 'ipv6@' -> address is always IPv6
- 'unix@' -> address is a path to a local unix socket
- 'abns@' -> address is in abstract namespace (Linux only)
Any part of the address string may reference any number of
environment variables by preceding their name with a dollar
sign ('$') and optionally enclosing them with braces ('{}'),

View File

@ -188,45 +188,46 @@ static int uxst_bind_listener(struct listener *listener, char *errmsg, int errle
if (ext)
goto fd_ready;
/* 1. create socket names */
if (!path[0]) {
msg = "Invalid empty name for a UNIX socket";
goto err_return;
}
if (path[0]) {
ret = snprintf(tempname, MAXPATHLEN, "%s.%d.tmp", path, pid);
if (ret < 0 || ret >= MAXPATHLEN) {
msg = "name too long for UNIX socket";
goto err_return;
}
ret = snprintf(tempname, MAXPATHLEN, "%s.%d.tmp", path, pid);
if (ret < 0 || ret >= MAXPATHLEN) {
msg = "name too long for UNIX socket";
goto err_return;
}
ret = snprintf(backname, MAXPATHLEN, "%s.%d.bak", path, pid);
if (ret < 0 || ret >= MAXPATHLEN) {
msg = "name too long for UNIX socket";
goto err_return;
}
ret = snprintf(backname, MAXPATHLEN, "%s.%d.bak", path, pid);
if (ret < 0 || ret >= MAXPATHLEN) {
msg = "name too long for UNIX socket";
goto err_return;
}
/* 2. clean existing orphaned entries */
if (unlink(tempname) < 0 && errno != ENOENT) {
msg = "error when trying to unlink previous UNIX socket";
goto err_return;
}
/* 2. clean existing orphaned entries */
if (unlink(tempname) < 0 && errno != ENOENT) {
msg = "error when trying to unlink previous UNIX socket";
goto err_return;
}
if (unlink(backname) < 0 && errno != ENOENT) {
msg = "error when trying to unlink previous UNIX socket";
goto err_return;
}
if (unlink(backname) < 0 && errno != ENOENT) {
msg = "error when trying to unlink previous UNIX socket";
goto err_return;
}
/* 3. backup existing socket */
if (link(path, backname) < 0 && errno != ENOENT) {
msg = "error when trying to preserve previous UNIX socket";
goto err_return;
}
/* 3. backup existing socket */
if (link(path, backname) < 0 && errno != ENOENT) {
msg = "error when trying to preserve previous UNIX socket";
goto err_return;
strncpy(addr.sun_path, tempname, sizeof(addr.sun_path));
addr.sun_path[sizeof(addr.sun_path) - 1] = 0;
}
else {
/* first char is zero, it's an abstract socket whose address
* is defined by all the bytes past this zero.
*/
memcpy(addr.sun_path, path, sizeof(addr.sun_path));
}
/* 4. prepare new socket */
addr.sun_family = AF_UNIX;
strncpy(addr.sun_path, tempname, sizeof(addr.sun_path));
addr.sun_path[sizeof(addr.sun_path) - 1] = 0;
fd = socket(PF_UNIX, SOCK_STREAM, 0);
if (fd < 0) {
@ -254,9 +255,9 @@ static int uxst_bind_listener(struct listener *listener, char *errmsg, int errle
/* <uid> and <gid> different of -1 will be used to change the socket owner.
* If <mode> is not 0, it will be used to restrict access to the socket.
* While it is known not to be portable on every OS, it's still useful
* where it works.
* where it works. We also don't change permissions on abstract sockets.
*/
if (!ext &&
if (!ext && path[0] &&
(((listener->bind_conf->ux.uid != -1 || listener->bind_conf->ux.gid != -1) &&
(chown(tempname, listener->bind_conf->ux.uid, listener->bind_conf->ux.gid) == -1)) ||
(listener->bind_conf->ux.mode != 0 && chmod(tempname, listener->bind_conf->ux.mode) == -1))) {
@ -275,21 +276,21 @@ static int uxst_bind_listener(struct listener *listener, char *errmsg, int errle
goto err_unlink_temp;
}
/* 5. install.
* Point of no return: we are ready, we'll switch the sockets. We don't
/* Point of no return: we are ready, we'll switch the sockets. We don't
* fear loosing the socket <path> because we have a copy of it in
* backname.
* backname. Abstract sockets are not renamed.
*/
if (!ext && rename(tempname, path) < 0) {
if (!ext && path[0] && rename(tempname, path) < 0) {
msg = "cannot switch final and temporary UNIX sockets";
goto err_rename;
}
/* 6. cleanup. If we're bound to an fd inherited from the parent, we
/* Cleanup: If we're bound to an fd inherited from the parent, we
* want to ensure that destroy_uxst_socket() will never remove the
* path, and for this we simply clear the path to the socket.
* path, and for this we simply clear the path to the socket, which
* under Linux corresponds to an abstract socket.
*/
if (!ext)
if (!ext && path[0])
unlink(backname);
else
((struct sockaddr_un *)&listener->addr)->sun_path[0] = 0;

View File

@ -635,6 +635,11 @@ static struct sockaddr_storage *str2ip(const char *str, struct sockaddr_storage
* - "ipv6@" => force address to resolve as IPv6 and fail if not possible.
* - "unix@" => force address to be a path to a UNIX socket even if the
* path does not start with a '/'
* - 'abns@' -> force address to belong to the abstract namespace (Linux
* only). These sockets are just like Unix sockets but without
* the need for an underlying file system. The address is a
* string. Technically it's like a Unix socket with a zero in
* the first byte of the address.
* - "fd@" => an integer must follow, and is a file descriptor number.
*
* Also note that in order to avoid any ambiguity with IPv6 addresses, the ':'
@ -655,6 +660,7 @@ struct sockaddr_storage *str2sa_range(const char *str, int *low, int *high, char
char *back, *str2;
char *port1, *port2;
int portl, porth, porta;
int abstract = 0;
portl = porth = porta = 0;
@ -668,6 +674,12 @@ struct sockaddr_storage *str2sa_range(const char *str, int *low, int *high, char
if (strncmp(str2, "unix@", 5) == 0) {
str2 += 5;
abstract = 0;
ss.ss_family = AF_UNIX;
}
else if (strncmp(str2, "abns@", 5) == 0) {
str2 += 5;
abstract = 1;
ss.ss_family = AF_UNIX;
}
else if (strncmp(str2, "ipv4@", 5) == 0) {
@ -706,7 +718,7 @@ struct sockaddr_storage *str2sa_range(const char *str, int *low, int *high, char
/* complete unix socket path name during startup or soft-restart is
* <unix_bind_prefix><path>.<pid>.<bak|tmp>
*/
prefix_path_len = pfx ? strlen(pfx) : 0;
prefix_path_len = (pfx && !abstract) ? strlen(pfx) : 0;
max_path_len = (sizeof(((struct sockaddr_un *)&ss)->sun_path) - 1) -
(prefix_path_len ? prefix_path_len + 1 + 5 + 1 + 3 : 0);
@ -716,9 +728,11 @@ struct sockaddr_storage *str2sa_range(const char *str, int *low, int *high, char
goto out;
}
/* when abstract==1, we skip the first zero and copy all bytes except the trailing zero */
memset(((struct sockaddr_un *)&ss)->sun_path, 0, sizeof(((struct sockaddr_un *)&ss)->sun_path));
if (prefix_path_len)
memcpy(((struct sockaddr_un *)&ss)->sun_path, pfx, prefix_path_len);
memcpy(((struct sockaddr_un *)&ss)->sun_path + prefix_path_len, str2, adr_len + 1);
memcpy(((struct sockaddr_un *)&ss)->sun_path + prefix_path_len + abstract, str2, adr_len + 1 - abstract);
}
else { /* IPv4 and IPv6 */
port1 = strrchr(str2, ':');