diff --git a/CHANGELOG b/CHANGELOG index f0af36f44b..fc0b5aba16 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,7 +1,10 @@ ChangeLog : =========== -2005/04/24 +2005/04/25 : 1.2.5-pre3 + - experimental epoll() support (use temporary '-e' argument) + +2005/04/24 : 1.2.5-pre2 - implemented the HTTP 303 code for error redirection. This forces the browser to fetch the given URI with a GET request. The new keyword for this is 'errorloc303', and a new 'errorloc302' keyword has been created @@ -9,7 +12,7 @@ ChangeLog : - added more controls in the parser for valid use of '\x' sequence. - few fixes from Alex & Klaus -2005/02/17 +2005/02/17 : 1.2.5-pre1 - fixed a few errors in the documentation 2005/02/13 diff --git a/Makefile b/Makefile index 29a79fcb64..70a21fe470 100644 --- a/Makefile +++ b/Makefile @@ -73,8 +73,8 @@ TARGET_OPTS=$(COPTS.$(TARGET)) REGEX_OPTS=$(COPTS.$(REGEX)) CPU_OPTS=$(COPTS.$(CPU)) -COPTS=-I. $(CPU_OPTS) $(TARGET_OPTS) $(REGEX_OPTS) $(SMALL_OPTS) -LIBS=$(LIBS.$(TARGET)) $(LIBS.$(REGEX)) +COPTS=-I. $(ADDINC) $(CPU_OPTS) $(TARGET_OPTS) $(REGEX_OPTS) $(SMALL_OPTS) +LIBS=$(LIBS.$(TARGET)) $(LIBS.$(REGEX)) $(ADDLIB) # - use -DSTATTIME=0 to disable statistics, else specify an interval in # milliseconds. diff --git a/TODO b/TODO index 047468b3b8..66a4ec6ac6 100644 --- a/TODO +++ b/TODO @@ -139,4 +139,5 @@ Todo for 1.2 * appcookie - weighted round robin - option to shutdown(listen_sock) when max connections reached - +* epoll +- replace the event scheduler with an O(log(N)) one diff --git a/haproxy.c b/haproxy.c index c5c29177cf..79819e6227 100644 --- a/haproxy.c +++ b/haproxy.c @@ -62,6 +62,8 @@ #include #endif +#include + #include "include/appsession.h" #define HAPROXY_VERSION "1.2.5" @@ -575,6 +577,7 @@ struct fdtab { /*********************************************************************/ int cfg_maxpconn = 2000; /* # of simultaneous connections per proxy (-N) */ +int cfg_use_epoll = 0; /* use epoll() instead of select() ? */ char *cfg_cfgfile = NULL; /* configuration file */ char *progname = NULL; /* program name */ int pid; /* current process id */ @@ -607,6 +610,11 @@ fd_set *ReadEvent, *StaticReadEvent, *StaticWriteEvent; +/* used by the epoll() emulation of select() */ +fd_set *PrevReadEvent, *PrevWriteEvent; +struct epoll_event *epoll_events; +int epoll_fd; + void **pool_session = NULL, **pool_buffer = NULL, **pool_fdtab = NULL, @@ -803,7 +811,9 @@ void usage(char *name) { " -c check mode : only check config file and exit\n" " -n sets the maximum total # of connections (%d)\n" " -N sets the default, per-proxy maximum # of connections (%d)\n" - " -p writes pids of all children to this file\n\n", + " -p writes pids of all children to this file\n" + " -e tries to use epoll() instead of select()\n" + "\n", name, DEFAULT_MAXCONN, cfg_maxpconn); exit(1); } @@ -1384,6 +1394,20 @@ static inline struct timeval *tv_min(struct timeval *tvmin, static inline void fd_delete(int fd) { FD_CLR(fd, StaticReadEvent); FD_CLR(fd, StaticWriteEvent); + if (cfg_use_epoll) { + struct epoll_event ev; + + ev.data.fd = fd; + if (epoll_ctl(epoll_fd, EPOLL_CTL_DEL, fd, &ev) < 0) { + // it's impossible to tell whether it has already + // been done. + //perror("epoll_ctl(DEL)"); + //exit(1); + } + + FD_CLR(fd, PrevReadEvent); + FD_CLR(fd, PrevWriteEvent); + } close(fd); fdtab[fd].state = FD_STCLOSE; @@ -2130,6 +2154,20 @@ int event_srv_write(int fd) { void client_retnclose(struct session *s, int len, const char *msg) { FD_CLR(s->cli_fd, StaticReadEvent); FD_SET(s->cli_fd, StaticWriteEvent); + if (cfg_use_epoll) { + struct epoll_event ev; + + ev.data.fd = s->cli_fd; + if (epoll_ctl(epoll_fd, EPOLL_CTL_DEL, s->cli_fd, &ev) < 0) { + // it's impossible to tell whether it has already + // been done. + //perror("epoll_ctl(DEL)"); + //exit(1); + } + + FD_CLR(s->cli_fd, PrevReadEvent); + FD_CLR(s->cli_fd, PrevWriteEvent); + } tv_eternity(&s->crexpire); shutdown(s->cli_fd, SHUT_RD); s->cli_state = CL_STSHUTR; @@ -4832,71 +4870,232 @@ void select_loop() { next_time = MINTIME(time2, next_time); #endif - if (next_time > 0) { /* FIXME */ - /* Convert to timeval */ - /* to avoid eventual select loops due to timer precision */ - next_time += SCHEDULER_RESOLUTION; - delta.tv_sec = next_time / 1000; - delta.tv_usec = (next_time % 1000) * 1000; - } - else if (next_time == 0) { /* allow select to return immediately when needed */ - delta.tv_sec = delta.tv_usec = 0; - } + if (cfg_use_epoll) { + /* use epoll() */ + int fds, count; + int pr, pw, sr, sw; + unsigned rn, ro, wn, wo; /* read new, read old, write new, write old */ + struct epoll_event ev; - /* let's restore fdset state */ + for (fds = 0; (fds << INTBITS) < maxfd; fds++) { - readnotnull = 0; writenotnull = 0; - for (i = 0; i < (maxfd + FD_SETSIZE - 1)/(8*sizeof(int)); i++) { - readnotnull |= (*(((int*)ReadEvent)+i) = *(((int*)StaticReadEvent)+i)) != 0; - writenotnull |= (*(((int*)WriteEvent)+i) = *(((int*)StaticWriteEvent)+i)) != 0; - } + rn = ((int*)StaticReadEvent)[fds]; ro = ((int*)PrevReadEvent)[fds]; + wn = ((int*)StaticWriteEvent)[fds]; wo = ((int*)PrevWriteEvent)[fds]; -// /* just a verification code, needs to be removed for performance */ -// for (i=0; i= 0) ? &delta : NULL); - - /* this is an experiment on the separation of the select work */ - // status = (readnotnull ? select(maxfd, ReadEvent, NULL, NULL, (next_time >= 0) ? &delta : NULL) : 0); - // status |= (writenotnull ? select(maxfd, NULL, WriteEvent, NULL, (next_time >= 0) ? &delta : NULL) : 0); - - tv_now(&now); - - if (status > 0) { /* must proceed with events */ - - int fds; - char count; - - for (fds = 0; (fds << INTBITS) < maxfd; fds++) - if ((((int *)(ReadEvent))[fds] | ((int *)(WriteEvent))[fds]) != 0) + if ((ro^rn) | (wo^wn)) { for (count = 1<> ((1<> ((1<> ((1<> ((1< 0) { /* FIXME */ + /* Convert to timeval */ + /* to avoid eventual select loops due to timer precision */ + next_time += SCHEDULER_RESOLUTION; + delta.tv_sec = next_time / 1000; + delta.tv_usec = (next_time % 1000) * 1000; + } + else if (next_time == 0) { /* allow select to return immediately when needed */ + delta.tv_sec = delta.tv_usec = 0; + } + + + /* let's restore fdset state */ + + readnotnull = 0; writenotnull = 0; + for (i = 0; i < (maxfd + FD_SETSIZE - 1)/(8*sizeof(int)); i++) { + readnotnull |= (*(((int*)ReadEvent)+i) = *(((int*)StaticReadEvent)+i)) != 0; + writenotnull |= (*(((int*)WriteEvent)+i) = *(((int*)StaticWriteEvent)+i)) != 0; + } + + // /* just a verification code, needs to be removed for performance */ + // for (i=0; i= 0) ? &delta : NULL); + + /* this is an experiment on the separation of the select work */ + // status = (readnotnull ? select(maxfd, ReadEvent, NULL, NULL, (next_time >= 0) ? &delta : NULL) : 0); + // status |= (writenotnull ? select(maxfd, NULL, WriteEvent, NULL, (next_time >= 0) ? &delta : NULL) : 0); + + tv_now(&now); + + if (status > 0) { /* must proceed with events */ + + int fds; + char count; + + for (fds = 0; (fds << INTBITS) < maxfd; fds++) + if ((((int *)(ReadEvent))[fds] | ((int *)(WriteEvent))[fds]) != 0) + for (count = 1<