* released 1.2.5-pre3

* experimental epoll() support (use temporary '-e' argument)
This commit is contained in:
willy tarreau 2005-12-18 01:09:15 +01:00
parent c1f4753d60
commit ad90a0c80e
4 changed files with 287 additions and 64 deletions

View File

@ -1,7 +1,10 @@
ChangeLog :
===========
2005/04/24
2005/04/25 : 1.2.5-pre3
- experimental epoll() support (use temporary '-e' argument)
2005/04/24 : 1.2.5-pre2
- implemented the HTTP 303 code for error redirection. This forces the
browser to fetch the given URI with a GET request. The new keyword for
this is 'errorloc303', and a new 'errorloc302' keyword has been created
@ -9,7 +12,7 @@ ChangeLog :
- added more controls in the parser for valid use of '\x' sequence.
- few fixes from Alex & Klaus
2005/02/17
2005/02/17 : 1.2.5-pre1
- fixed a few errors in the documentation
2005/02/13

View File

@ -73,8 +73,8 @@ TARGET_OPTS=$(COPTS.$(TARGET))
REGEX_OPTS=$(COPTS.$(REGEX))
CPU_OPTS=$(COPTS.$(CPU))
COPTS=-I. $(CPU_OPTS) $(TARGET_OPTS) $(REGEX_OPTS) $(SMALL_OPTS)
LIBS=$(LIBS.$(TARGET)) $(LIBS.$(REGEX))
COPTS=-I. $(ADDINC) $(CPU_OPTS) $(TARGET_OPTS) $(REGEX_OPTS) $(SMALL_OPTS)
LIBS=$(LIBS.$(TARGET)) $(LIBS.$(REGEX)) $(ADDLIB)
# - use -DSTATTIME=0 to disable statistics, else specify an interval in
# milliseconds.

3
TODO
View File

@ -139,4 +139,5 @@ Todo for 1.2
* appcookie
- weighted round robin
- option to shutdown(listen_sock) when max connections reached
* epoll
- replace the event scheduler with an O(log(N)) one

337
haproxy.c
View File

@ -62,6 +62,8 @@
#include <strings.h>
#endif
#include <sys/epoll.h>
#include "include/appsession.h"
#define HAPROXY_VERSION "1.2.5"
@ -575,6 +577,7 @@ struct fdtab {
/*********************************************************************/
int cfg_maxpconn = 2000; /* # of simultaneous connections per proxy (-N) */
int cfg_use_epoll = 0; /* use epoll() instead of select() ? */
char *cfg_cfgfile = NULL; /* configuration file */
char *progname = NULL; /* program name */
int pid; /* current process id */
@ -607,6 +610,11 @@ fd_set *ReadEvent,
*StaticReadEvent,
*StaticWriteEvent;
/* used by the epoll() emulation of select() */
fd_set *PrevReadEvent, *PrevWriteEvent;
struct epoll_event *epoll_events;
int epoll_fd;
void **pool_session = NULL,
**pool_buffer = NULL,
**pool_fdtab = NULL,
@ -803,7 +811,9 @@ void usage(char *name) {
" -c check mode : only check config file and exit\n"
" -n sets the maximum total # of connections (%d)\n"
" -N sets the default, per-proxy maximum # of connections (%d)\n"
" -p writes pids of all children to this file\n\n",
" -p writes pids of all children to this file\n"
" -e tries to use epoll() instead of select()\n"
"\n",
name, DEFAULT_MAXCONN, cfg_maxpconn);
exit(1);
}
@ -1384,6 +1394,20 @@ static inline struct timeval *tv_min(struct timeval *tvmin,
static inline void fd_delete(int fd) {
FD_CLR(fd, StaticReadEvent);
FD_CLR(fd, StaticWriteEvent);
if (cfg_use_epoll) {
struct epoll_event ev;
ev.data.fd = fd;
if (epoll_ctl(epoll_fd, EPOLL_CTL_DEL, fd, &ev) < 0) {
// it's impossible to tell whether it has already
// been done.
//perror("epoll_ctl(DEL)");
//exit(1);
}
FD_CLR(fd, PrevReadEvent);
FD_CLR(fd, PrevWriteEvent);
}
close(fd);
fdtab[fd].state = FD_STCLOSE;
@ -2130,6 +2154,20 @@ int event_srv_write(int fd) {
void client_retnclose(struct session *s, int len, const char *msg) {
FD_CLR(s->cli_fd, StaticReadEvent);
FD_SET(s->cli_fd, StaticWriteEvent);
if (cfg_use_epoll) {
struct epoll_event ev;
ev.data.fd = s->cli_fd;
if (epoll_ctl(epoll_fd, EPOLL_CTL_DEL, s->cli_fd, &ev) < 0) {
// it's impossible to tell whether it has already
// been done.
//perror("epoll_ctl(DEL)");
//exit(1);
}
FD_CLR(s->cli_fd, PrevReadEvent);
FD_CLR(s->cli_fd, PrevWriteEvent);
}
tv_eternity(&s->crexpire);
shutdown(s->cli_fd, SHUT_RD);
s->cli_state = CL_STSHUTR;
@ -4832,71 +4870,232 @@ void select_loop() {
next_time = MINTIME(time2, next_time);
#endif
if (next_time > 0) { /* FIXME */
/* Convert to timeval */
/* to avoid eventual select loops due to timer precision */
next_time += SCHEDULER_RESOLUTION;
delta.tv_sec = next_time / 1000;
delta.tv_usec = (next_time % 1000) * 1000;
}
else if (next_time == 0) { /* allow select to return immediately when needed */
delta.tv_sec = delta.tv_usec = 0;
}
if (cfg_use_epoll) {
/* use epoll() */
int fds, count;
int pr, pw, sr, sw;
unsigned rn, ro, wn, wo; /* read new, read old, write new, write old */
struct epoll_event ev;
/* let's restore fdset state */
for (fds = 0; (fds << INTBITS) < maxfd; fds++) {
readnotnull = 0; writenotnull = 0;
for (i = 0; i < (maxfd + FD_SETSIZE - 1)/(8*sizeof(int)); i++) {
readnotnull |= (*(((int*)ReadEvent)+i) = *(((int*)StaticReadEvent)+i)) != 0;
writenotnull |= (*(((int*)WriteEvent)+i) = *(((int*)StaticWriteEvent)+i)) != 0;
}
rn = ((int*)StaticReadEvent)[fds]; ro = ((int*)PrevReadEvent)[fds];
wn = ((int*)StaticWriteEvent)[fds]; wo = ((int*)PrevWriteEvent)[fds];
// /* just a verification code, needs to be removed for performance */
// for (i=0; i<maxfd; i++) {
// if (FD_ISSET(i, ReadEvent) != FD_ISSET(i, StaticReadEvent))
// abort();
// if (FD_ISSET(i, WriteEvent) != FD_ISSET(i, StaticWriteEvent))
// abort();
//
// }
status = select(maxfd,
readnotnull ? ReadEvent : NULL,
writenotnull ? WriteEvent : NULL,
NULL,
(next_time >= 0) ? &delta : NULL);
/* this is an experiment on the separation of the select work */
// status = (readnotnull ? select(maxfd, ReadEvent, NULL, NULL, (next_time >= 0) ? &delta : NULL) : 0);
// status |= (writenotnull ? select(maxfd, NULL, WriteEvent, NULL, (next_time >= 0) ? &delta : NULL) : 0);
tv_now(&now);
if (status > 0) { /* must proceed with events */
int fds;
char count;
for (fds = 0; (fds << INTBITS) < maxfd; fds++)
if ((((int *)(ReadEvent))[fds] | ((int *)(WriteEvent))[fds]) != 0)
if ((ro^rn) | (wo^wn)) {
for (count = 1<<INTBITS, fd = fds << INTBITS; count && fd < maxfd; count--, fd++) {
/* if we specify read first, the accepts and zero reads will be
* seen first. Moreover, system buffers will be flushed faster.
*/
if (fdtab[fd].state == FD_STCLOSE)
continue;
if (FD_ISSET(fd, ReadEvent))
fdtab[fd].read(fd);
if (FD_ISSET(fd, WriteEvent))
fdtab[fd].write(fd);
#define WE_KNOW_HOW_FDSET_WORKS
#ifdef WE_KNOW_HOW_FDSET_WORKS
#define WE_REALLY_NOW_THAT_FDSETS_ARE_INTS
#ifdef WE_REALLY_NOW_THAT_FDSETS_ARE_INTS
pr = (ro >> ((1<<INTBITS)-count)) & 1;
pw = (wo >> ((1<<INTBITS)-count)) & 1;
sr = (rn >> ((1<<INTBITS)-count)) & 1;
sw = (wn >> ((1<<INTBITS)-count)) & 1;
#else
pr = FD_ISSET(fd&((1<<INTBITS)-1), (typeof(fd_set*))&ro);
pw = FD_ISSET(fd&((1<<INTBITS)-1), (typeof(fd_set*))&wo);
sr = FD_ISSET(fd&((1<<INTBITS)-1), (typeof(fd_set*))&rn);
sw = FD_ISSET(fd&((1<<INTBITS)-1), (typeof(fd_set*))&wn);
#endif
#else
pr = FD_ISSET(fd, PrevReadEvent);
pw = FD_ISSET(fd, PrevWriteEvent);
sr = FD_ISSET(fd, StaticReadEvent);
sw = FD_ISSET(fd, StaticWriteEvent);
#endif
if (!((sr^pr) | (sw^pw)))
continue;
ev.events = (sr ? EPOLLIN : 0) | (sw ? EPOLLOUT : 0);
ev.data.fd = fd;
if ((pr | pw)) {
/* the file-descriptor already exists... */
if ((sr | sw)) {
/* ...and it will still exist */
if (epoll_ctl(epoll_fd, EPOLL_CTL_MOD, fd, &ev) < 0) {
perror("epoll_ctl(MOD)");
exit(1);
}
} else {
/* ...and it will be removed */
if (epoll_ctl(epoll_fd, EPOLL_CTL_DEL, fd, &ev) < 0) {
perror("epoll_ctl(DEL)");
exit(1);
}
}
} else {
/* the file-descriptor did not exist, let's add it */
if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
perror("epoll_ctl(ADD)");
exit(1);
}
}
}
}
else {
// fprintf(stderr,"select returned %d, maxfd=%d\n", status, maxfd);
((int*)PrevReadEvent)[fds] = rn;
((int*)PrevWriteEvent)[fds] = wn;
}
#if useless_optimization
unsigned a, d, m; /* add mask, del mask, mod mask */
a = (rn|wn) & ~(ro|wo); /* fds to add */
d = (ro|wo) & ~(rn|wn); /* fds to remove, normally none */
m = (ro^rn) | (wo^wn); /* fds which change */
if (m) {
struct epoll_event ev;
m &= ~(a|d); /* keep only changes, not add/del */
if (m) { /* fds which only change */
for (count = 1<<INTBITS, fd = fds << INTBITS; count && fd < maxfd; count--, fd++) {
ev.data.fd = fd;
ev.events = 0;
if ((FD_ISSET(fd, PrevReadEvent) || FD_ISSET(fd, PrevWriteEvent)) &&
(FD_ISSET(fd, StaticReadEvent) || FD_ISSET(fd, StaticWriteEvent))) {
if (FD_ISSET(fd, StaticReadEvent))
ev.events |= EPOLLIN;
if (FD_ISSET(fd, StaticWriteEvent))
ev.events |= EPOLLOUT;
if (ev.events && epoll_ctl(epoll_fd, EPOLL_CTL_MOD, fd, &ev) < 0) {
perror("epoll_ctl(MOD)");
exit(1);
}
}
}
}
if (a) { /* fds to add */
// printf("a=%08x\n", a);
for (count = 1<<INTBITS, fd = fds << INTBITS; count && fd < maxfd; count--, fd++) {
ev.data.fd = fd;
ev.events = 0;
if (!FD_ISSET(fd, PrevReadEvent) && !FD_ISSET(fd, PrevWriteEvent)) {
if (FD_ISSET(fd, StaticReadEvent))
ev.events |= EPOLLIN;
if (FD_ISSET(fd, StaticWriteEvent))
ev.events |= EPOLLOUT;
if (ev.events && epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
perror("epoll_ctl(ADD)");
exit(1);
}
}
}
}
if (d) { /* fds to delete */
for (count = 1<<INTBITS, fd = fds << INTBITS; count && fd < maxfd; count--, fd++) {
ev.data.fd = fd;
ev.events = 0;
if (FD_ISSET(fd, StaticReadEvent) || FD_ISSET(fd, StaticWriteEvent))
continue;
if (!FD_ISSET(fd, PrevReadEvent) && !FD_ISSET(fd, PrevWriteEvent))
continue;
if (epoll_ctl(epoll_fd, EPOLL_CTL_DEL, fd, &ev) < 0) {
perror("epoll_ctl(DEL)");
exit(1);
}
}
}
((int*)PrevReadEvent)[fds] = rn;
((int*)PrevWriteEvent)[fds] = wn;
}
#endif
}
/* now let's wait for events */
status = epoll_wait(epoll_fd, epoll_events, maxfd, next_time);
tv_now(&now);
for (count = 0; count < status; count++) {
fd = epoll_events[count].data.fd;
if (fdtab[fd].state == FD_STCLOSE)
continue;
if (epoll_events[count].events & ( EPOLLIN | EPOLLERR | EPOLLHUP ))
fdtab[fd].read(fd);
if (fdtab[fd].state == FD_STCLOSE)
continue;
if (epoll_events[count].events & ( EPOLLOUT | EPOLLERR | EPOLLHUP ))
fdtab[fd].write(fd);
}
} else {
/* use select() */
if (next_time > 0) { /* FIXME */
/* Convert to timeval */
/* to avoid eventual select loops due to timer precision */
next_time += SCHEDULER_RESOLUTION;
delta.tv_sec = next_time / 1000;
delta.tv_usec = (next_time % 1000) * 1000;
}
else if (next_time == 0) { /* allow select to return immediately when needed */
delta.tv_sec = delta.tv_usec = 0;
}
/* let's restore fdset state */
readnotnull = 0; writenotnull = 0;
for (i = 0; i < (maxfd + FD_SETSIZE - 1)/(8*sizeof(int)); i++) {
readnotnull |= (*(((int*)ReadEvent)+i) = *(((int*)StaticReadEvent)+i)) != 0;
writenotnull |= (*(((int*)WriteEvent)+i) = *(((int*)StaticWriteEvent)+i)) != 0;
}
// /* just a verification code, needs to be removed for performance */
// for (i=0; i<maxfd; i++) {
// if (FD_ISSET(i, ReadEvent) != FD_ISSET(i, StaticReadEvent))
// abort();
// if (FD_ISSET(i, WriteEvent) != FD_ISSET(i, StaticWriteEvent))
// abort();
//
// }
status = select(maxfd,
readnotnull ? ReadEvent : NULL,
writenotnull ? WriteEvent : NULL,
NULL,
(next_time >= 0) ? &delta : NULL);
/* this is an experiment on the separation of the select work */
// status = (readnotnull ? select(maxfd, ReadEvent, NULL, NULL, (next_time >= 0) ? &delta : NULL) : 0);
// status |= (writenotnull ? select(maxfd, NULL, WriteEvent, NULL, (next_time >= 0) ? &delta : NULL) : 0);
tv_now(&now);
if (status > 0) { /* must proceed with events */
int fds;
char count;
for (fds = 0; (fds << INTBITS) < maxfd; fds++)
if ((((int *)(ReadEvent))[fds] | ((int *)(WriteEvent))[fds]) != 0)
for (count = 1<<INTBITS, fd = fds << INTBITS; count && fd < maxfd; count--, fd++) {
/* if we specify read first, the accepts and zero reads will be
* seen first. Moreover, system buffers will be flushed faster.
*/
if (fdtab[fd].state == FD_STCLOSE)
continue;
if (FD_ISSET(fd, ReadEvent))
fdtab[fd].read(fd);
if (FD_ISSET(fd, WriteEvent))
fdtab[fd].write(fd);
}
}
else {
// fprintf(stderr,"select returned %d, maxfd=%d\n", status, maxfd);
}
}
}
}
@ -6795,6 +6994,8 @@ void init(int argc, char **argv) {
display_version();
exit(0);
}
else if (*flag == 'e')
cfg_use_epoll = 1;
else if (*flag == 'V')
arg_mode |= MODE_VERBOSE;
else if (*flag == 'd')
@ -6897,6 +7098,22 @@ void init(int argc, char **argv) {
sizeof(fd_set) *
(global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE);
if (cfg_use_epoll) {
epoll_fd = epoll_create(global.maxsock + 1);
if (epoll_fd < 0) {
Warning("epoll() is not available. Using select() instead.\n");
cfg_use_epoll = 0;
} else {
epoll_events = (struct epoll_event*) calloc(1, sizeof(struct epoll_event) * global.maxsock);
PrevReadEvent = (fd_set *)calloc(1,
sizeof(fd_set) *
(global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE);
PrevWriteEvent = (fd_set *)calloc(1,
sizeof(fd_set) *
(global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE);
}
}
fdtab = (struct fdtab *)calloc(1,
sizeof(struct fdtab) * (global.maxsock));
for (i = 0; i < global.maxsock; i++) {
@ -7109,6 +7326,8 @@ void deinit(void){
if (ReadEvent) free(ReadEvent);
if (WriteEvent) free(WriteEvent);
if (PrevReadEvent) free(ReadEvent);
if (PrevWriteEvent) free(WriteEvent);
if (StaticReadEvent) free(StaticReadEvent);
if (StaticWriteEvent) free(StaticWriteEvent);
if (fdtab) free(fdtab);