MEDIUM: fd: add experimental support for edge-triggered polling

Some of the recent optimizations around the polling to save a few
epoll_ctl() calls have shown that they could also cause some trouble.
However, over time our code base has become totally asynchronous with
I/Os always attempted from the upper layers and only retried at the
bottom, making it look like we're getting closer to EPOLLET support.

There are showstoppers there such as the listeners which cannot support
this. But given that most of the epoll_ctl() dance comes from the
connections, we can try to enable edge-triggered polling on connections.

What this patch does is to add a new global tunable "tune.fd.edge-triggered",
that makes fd_insert() automatically set an et_possible bit on the fd if
the I/O callback is conn_fd_handler. When the epoll code sees an update
for such an FD, it immediately registers it in both directions the first
time and doesn't update it anymore.

On a few tests it proved quite useful with a 14% request rate increase in
a H2->H1 scenario, reducing the epoll_ctl() calls from 2 per request to
2 per connection.

The option is obviously disabled by default as bugs are still expected,
particularly around the subscribe() code where it is possible that some
layers do not always re-attempt reading data after being woken up.
This commit is contained in:
Willy Tarreau 2020-06-18 08:58:47 +02:00
parent 13cd54c08b
commit bc52bec163
6 changed files with 62 additions and 0 deletions

View File

@ -675,6 +675,7 @@ The following keywords are supported in the "global" section :
- tune.bufsize
- tune.chksize
- tune.comp.maxlevel
- tune.fd.edge-triggered
- tune.h2.header-table-size
- tune.h2.initial-window-size
- tune.h2.max-concurrent-streams
@ -1874,6 +1875,13 @@ tune.fail-alloc
success). This is useful to debug and make sure memory failures are handled
gracefully.
tune.fd.edge-triggered { on | off } [ EXPERIMENTAL ]
Enables ('on') or disables ('off') the edge-triggered polling mode for FDs
that support it. This is currently only support with epoll. It may noticeably
reduce the number of epoll_ctl() calls and slightly improve performance in
certain scenarios. This is still experimental, it may result in frozen
connections if bugs are still present, and is disabled by default.
tune.h2.header-table-size <number>
Sets the HTTP/2 dynamic header table size. It defaults to 4096 bytes and
cannot be larger than 65536 bytes. A larger value may help certain clients

View File

@ -133,6 +133,7 @@ struct fdtab {
unsigned char linger_risk:1; /* 1 if we must kill lingering before closing */
unsigned char cloned:1; /* 1 if a cloned socket, requires EPOLL_CTL_DEL on close */
unsigned char initialized:1; /* 1 if init phase was done on this fd (e.g. set non-blocking) */
unsigned char et_possible:1; /* 1 if edge-triggered is possible on this FD */
} THREAD_ALIGNED(64);
/* polled mask, one bit per thread and per direction for each FD */

View File

@ -30,6 +30,7 @@
#include <haproxy/activity.h>
#include <haproxy/api.h>
#include <haproxy/fd-t.h>
#include <haproxy/global.h>
#include <haproxy/thread.h>
#include <haproxy/ticks.h>
#include <haproxy/time.h>
@ -435,6 +436,7 @@ static inline void fd_update_events(int fd, unsigned char evts)
static inline void fd_insert(int fd, void *owner, void (*iocb)(int fd), unsigned long thread_mask)
{
int locked = fdtab[fd].running_mask != tid_bit;
extern void conn_fd_handler(int);
if (locked)
fd_set_running_excl(fd);
@ -443,6 +445,12 @@ static inline void fd_insert(int fd, void *owner, void (*iocb)(int fd), unsigned
fdtab[fd].ev = 0;
fdtab[fd].linger_risk = 0;
fdtab[fd].cloned = 0;
fdtab[fd].et_possible = 0;
/* conn_fd_handler should support edge-triggered FDs */
if ((global.tune.options & GTUNE_FD_ET) && fdtab[fd].iocb == conn_fd_handler)
fdtab[fd].et_possible = 1;
fdtab[fd].thread_mask = thread_mask;
/* note: do not reset polled_mask here as it indicates which poller
* still knows this FD from a possible previous round.

View File

@ -66,6 +66,7 @@
#define GTUNE_STRICT_LIMITS (1<<15)
#define GTUNE_INSECURE_FORK (1<<16)
#define GTUNE_INSECURE_SETUID (1<<17)
#define GTUNE_FD_ET (1<<18)
/* SSL server verify mode */
enum {

View File

@ -59,6 +59,20 @@ static void _update_fd(int fd)
en = fdtab[fd].state;
/* Try to force EPOLLET on FDs that support it */
if (fdtab[fd].et_possible) {
/* already done ? */
if (polled_mask[fd].poll_recv & polled_mask[fd].poll_send & tid_bit)
return;
/* enable ET polling in both directions */
_HA_ATOMIC_OR(&polled_mask[fd].poll_recv, tid_bit);
_HA_ATOMIC_OR(&polled_mask[fd].poll_send, tid_bit);
opcode = EPOLL_CTL_ADD;
ev.events = EPOLLIN | EPOLLRDHUP | EPOLLOUT | EPOLLET;
goto done;
}
/* if we're already polling or are going to poll for this FD and it's
* neither active nor ready, force it to be active so that we don't
* needlessly unsubscribe then re-subscribe it.
@ -120,6 +134,7 @@ static void _update_fd(int fd)
if (en & FD_EV_ACTIVE_W)
ev.events |= EPOLLOUT;
done:
ev.data.fd = fd;
epoll_ctl(epoll_fd[tid], opcode, fd, &ev);
}

View File

@ -88,9 +88,11 @@
#endif
#include <haproxy/api.h>
#include <haproxy/cfgparse.h>
#include <haproxy/fd.h>
#include <haproxy/global.h>
#include <haproxy/port_range.h>
#include <haproxy/tools.h>
struct fdtab *fdtab = NULL; /* array of all the file descriptors */
@ -807,6 +809,33 @@ int fork_poller()
return 1;
}
/* config parser for global "tune.fd.edge-triggered", accepts "on" or "off" */
static int cfg_parse_tune_fd_edge_triggered(char **args, int section_type, struct proxy *curpx,
struct proxy *defpx, const char *file, int line,
char **err)
{
if (too_many_args(1, args, err, NULL))
return -1;
if (strcmp(args[1], "on") == 0)
global.tune.options |= GTUNE_FD_ET;
else if (strcmp(args[1], "off") == 0)
global.tune.options &= ~GTUNE_FD_ET;
else {
memprintf(err, "'%s' expects either 'on' or 'off' but got '%s'.", args[0], args[1]);
return -1;
}
return 0;
}
/* config keyword parsers */
static struct cfg_kw_list cfg_kws = {ILH, {
{ CFG_GLOBAL, "tune.fd.edge-triggered", cfg_parse_tune_fd_edge_triggered },
{ 0, NULL, NULL }
}};
INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
REGISTER_PER_THREAD_ALLOC(alloc_pollers_per_thread);
REGISTER_PER_THREAD_INIT(init_pollers_per_thread);
REGISTER_PER_THREAD_DEINIT(deinit_pollers_per_thread);