[MEDIUM] limit the number of events returned by *poll*

By default, epoll/kqueue used to return as many events as possible.
This could sometimes cause huge latencies (latencies of up to 400 ms
have been observed with many thousands of fds at once). Limiting the
number of events returned also reduces the latency by avoiding too
many blind processing. The value is set to 200 by default and can be
changed in the global section using the tune.maxpollevents parameter.
This commit is contained in:
Willy Tarreau 2007-06-03 17:16:49 +02:00
parent fb8983f21b
commit 1db37710dc
9 changed files with 53 additions and 7 deletions

View File

@ -128,6 +128,7 @@ the following ones :
- pidfile <file>
- ulimit-n <number>
- stats
- tune.maxpollevents <number>
1.1) Event logging
@ -338,6 +339,12 @@ Tests have shown constant performance from 1 to 20000 simultaneous sessions.
Version 1.3.9 introduced kqueue() for FreeBSD/OpenBSD, and speculative epoll()
which consists in trying to perform I/O before queuing the events via syscalls.
In order to optimize latency, it is now possible to limit the number of events
returned by a single call to poll. The limit is fixed to 200 by default. If a
smaller latency is seeked, it may be useful to reduce this value by using the
'tune.maxpollevents' parameter in the 'global' section. Increasing it will
slightly save CPU cycles in presence of large number of connections.
Haproxy will use kqueue() or speculative epoll() when available, then epoll(),
and will fall back to poll(), then to select(). However, if for any reason you
need to disable epoll() or poll() (eg. because of a bug or just to compare
@ -351,6 +358,7 @@ Example :
# use only select()
noepoll
nopoll
tune.maxpollevents 100
Note :
------

View File

@ -134,6 +134,7 @@ support
- quiet
- pidfile <fichier>
- ulimit-n <nombre>
- tune.maxpollevents <nombre>
1.1) Journalisation des événements
@ -362,6 +363,13 @@ qu'une variante appel
les opérations d'entrées/sorties avant de chaîner les événements par les appels
système.
Afin d'optimiser la latence, il est désormais possible de limiter le nombre
d'événements remontés à chaque appel. La limite par défaut est fixée à 200. Si
une latence plus petite est recherchée, il peut être justifié d'abaisser cette
limite par l'utilisation du paramètre 'tune.maxpollevents' dans la section
'global'. L'augmenter permettra d'économiser un peu le processeur en présence
de très grands nombres de connexions simultanées.
Haproxy utilisera kqueue() ou speculative epoll() lorsque ce sera disponible,
puis epoll(), et se repliera sur poll(), puis en dernier lieu sur select().
Cependant, si pour une raison quelconque il s'avérait nécessaire de désactiver
@ -375,6 +383,7 @@ Exemple :
# utiliser seulement select()
noepoll
nopoll
tune.maxpollevents 100
Remarque :
----------

View File

@ -2,7 +2,7 @@
include/common/defaults.h
Miscellaneous default values.
Copyright (C) 2000-2006 Willy Tarreau - w@1wt.eu
Copyright (C) 2000-2007 Willy Tarreau - w@1wt.eu
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@ -78,6 +78,12 @@
#define MIN_RET_FOR_READ_LOOP 1460
#endif
// the max number of events returned in one call to poll/epoll. Too small a
// value will cause lots of calls, and too high a value may cause high latency.
#ifndef MAX_POLL_EVENTS
#define MAX_POLL_EVENTS 200
#endif
// cookie delimitor in "prefix" mode. This character is inserted between the
// persistence cookie and the original value. The '~' is allowed by RFC2965,
// and should not be too common in server names.

View File

@ -60,6 +60,9 @@ struct global {
int logfac1, logfac2;
int loglev1, loglev2;
struct sockaddr_in logsrv1, logsrv2;
struct {
int maxpollevents; /* max number of poll events at once */
} tune;
};
extern struct global global;

View File

@ -276,6 +276,17 @@ int cfg_parse_global(const char *file, int linenum, char **args)
else if (!strcmp(args[0], "stats")) {
global.mode |= MODE_STATS;
}
else if (!strcmp(args[0], "tune.maxpollevents")) {
if (global.tune.maxpollevents != 0) {
Alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
return 0;
}
if (*(args[1]) == 0) {
Alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
return -1;
}
global.tune.maxpollevents = atol(args[1]);
}
else if (!strcmp(args[0], "uid")) {
if (global.uid != 0) {
Alert("parsing [%s:%d] : user/uid already specified. Continuing.\n", file, linenum);

View File

@ -18,6 +18,7 @@
#include <common/config.h>
#include <common/standard.h>
#include <common/time.h>
#include <common/tools.h>
#include <types/fd.h>
#include <types/global.h>
@ -238,7 +239,8 @@ REGPRM2 static void _do_poll(struct poller *p, struct timeval *exp)
else
wait_time = __tv_ms_elapsed(&now, exp) + 1;
status = epoll_wait(epoll_fd, epoll_events, maxfd, wait_time);
fd = MIN(maxfd, global.tune.maxpollevents);
status = epoll_wait(epoll_fd, epoll_events, fd, wait_time);
tv_now(&now);
for (count = 0; count < status; count++) {
@ -278,7 +280,7 @@ REGPRM1 static int _do_init(struct poller *p)
goto fail_fd;
epoll_events = (struct epoll_event*)
calloc(1, sizeof(struct epoll_event) * global.maxsock);
calloc(1, sizeof(struct epoll_event) * global.tune.maxpollevents);
if (epoll_events == NULL)
goto fail_ee;

View File

@ -24,6 +24,7 @@
#include <common/compat.h>
#include <common/config.h>
#include <common/time.h>
#include <common/tools.h>
#include <types/fd.h>
#include <types/global.h>
@ -118,11 +119,12 @@ REGPRM2 static void _do_poll(struct poller *p, struct timeval *exp)
to_ptr = &timeout;
}
fd = MIN(maxfd, global.tune.maxpollevents);
status = kevent(kqueue_fd, // int kq
NULL, // const struct kevent *changelist
0, // int nchanges
kev, // struct kevent *eventlist
maxfd, // int nevents
fd, // int nevents
to_ptr); // const struct timespec *timeout
tv_now(&now);
@ -161,7 +163,7 @@ REGPRM1 static int _do_init(struct poller *p)
if (kqueue_fd < 0)
goto fail_fd;
kev = (struct kevent*)calloc(1, sizeof(struct kevent) * global.maxsock);
kev = (struct kevent*)calloc(1, sizeof(struct kevent) * global.tune.maxpollevents);
if (kev == NULL)
goto fail_kev;

View File

@ -18,6 +18,7 @@
#include <common/config.h>
#include <common/standard.h>
#include <common/time.h>
#include <common/tools.h>
#include <types/fd.h>
#include <types/global.h>
@ -389,7 +390,8 @@ REGPRM2 static void _do_poll(struct poller *p, struct timeval *exp)
}
/* now let's wait for real events */
status = epoll_wait(epoll_fd, epoll_events, maxfd, wait_time);
fd = MIN(maxfd, global.tune.maxpollevents);
status = epoll_wait(epoll_fd, epoll_events, fd, wait_time);
tv_now(&now);
@ -439,7 +441,7 @@ REGPRM1 static int _do_init(struct poller *p)
goto fail_fd;
epoll_events = (struct epoll_event*)
calloc(1, sizeof(struct epoll_event) * global.maxsock);
calloc(1, sizeof(struct epoll_event) * global.tune.maxpollevents);
if (epoll_events == NULL)
goto fail_ee;

View File

@ -520,6 +520,9 @@ void init(int argc, char **argv)
global.maxsock += global.maxconn * 2; /* each connection needs two sockets */
if (global.tune.maxpollevents <= 0)
global.tune.maxpollevents = MAX_POLL_EVENTS;
if (arg_mode & (MODE_DEBUG | MODE_FOREGROUND)) {
/* command line debug mode inhibits configuration mode */
global.mode &= ~(MODE_DAEMON | MODE_QUIET);