From 041751c13a88ade79d45412db1c6bde6c0f2f901 Mon Sep 17 00:00:00 2001 From: Conrad Hoffmann Date: Tue, 20 May 2014 14:28:24 +0200 Subject: [PATCH] BUG/MEDIUM: polling: fix possible CPU hogging of worker processes after receiving SIGUSR1. When run in daemon mode (i.e. with at least one forked process) and using the epoll poller, sending USR1 (graceful shutdown) to the worker processes can cause some workers to start running at 100% CPU. Precondition is having an established HTTP keep-alive connection when the signal is received. The cloned (during fork) listening sockets do not get closed in the parent process, thus they do not get removed from the epoll set automatically (see man 7 epoll). This can lead to the process receiving epoll events that it doesn't feel responsible for, resulting in an endless loop around epoll_wait() delivering these events. The solution is to explicitly remove these file descriptors from the epoll set. To not degrade performance, care was taken to only do this when neccessary, i.e. when the file descriptor was cloned during fork. Signed-off-by: Conrad Hoffmann [wt: a backport to 1.4 could be studied though chances to catch the bug are low] --- include/proto/fd.h | 1 + include/types/fd.h | 1 + src/ev_epoll.c | 15 ++++++++++++++- src/fd.c | 7 +++++++ 4 files changed, 23 insertions(+), 1 deletion(-) diff --git a/include/proto/fd.h b/include/proto/fd.h index 605dc215f..b0a478e4c 100644 --- a/include/proto/fd.h +++ b/include/proto/fd.h @@ -335,6 +335,7 @@ static inline void fd_insert(int fd) fdtab[fd].ev = 0; fdtab[fd].new = 1; fdtab[fd].linger_risk = 0; + fdtab[fd].cloned = 0; if (fd + 1 > maxfd) maxfd = fd + 1; } diff --git a/include/types/fd.h b/include/types/fd.h index 1c2c7c808..057d968ad 100644 --- a/include/types/fd.h +++ b/include/types/fd.h @@ -86,6 +86,7 @@ struct fdtab { unsigned char new:1; /* 1 if this fd has just been created */ unsigned char updated:1; /* 1 if this fd is already in the update list */ unsigned char linger_risk:1; /* 1 if we must kill lingering before closing */ + unsigned char cloned:1; /* 1 if a cloned socket, requires EPOLL_CTL_DEL on close */ }; /* less often used information */ diff --git a/src/ev_epoll.c b/src/ev_epoll.c index 2849ec6c1..9d359b2ab 100644 --- a/src/ev_epoll.c +++ b/src/ev_epoll.c @@ -45,6 +45,19 @@ static struct epoll_event ev; #define EPOLLRDHUP 0x2000 #endif +/* + * Immediately remove file descriptor from epoll set upon close. + * Since we forked, some fds share inodes with the other process, and epoll may + * send us events even though this process closed the fd (see man 7 epoll, + * "Questions and answers", Q 6). + */ +REGPRM1 static void __fd_clo(int fd) +{ + if (unlikely(fdtab[fd].cloned)) { + epoll_ctl(epoll_fd, EPOLL_CTL_DEL, fd, &ev); + } +} + /* * Linux epoll() poller */ @@ -267,7 +280,7 @@ static void _do_register(void) p->pref = 300; p->private = NULL; - p->clo = NULL; + p->clo = __fd_clo; p->test = _do_test; p->init = _do_init; p->term = _do_term; diff --git a/src/fd.c b/src/fd.c index 66f1e8bd6..c238bc880 100644 --- a/src/fd.c +++ b/src/fd.c @@ -438,6 +438,13 @@ int list_pollers(FILE *out) */ int fork_poller() { + int fd; + for (fd = 0; fd <= maxfd; fd++) { + if (fdtab[fd].owner) { + fdtab[fd].cloned = 1; + } + } + if (cur_poller.fork) { if (cur_poller.fork(&cur_poller)) return 1;