* released 1.2.5-pre3
* experimental epoll() support (use temporary '-e' argument)
This commit is contained in:
parent
c1f4753d60
commit
ad90a0c80e
|
@ -1,7 +1,10 @@
|
|||
ChangeLog :
|
||||
===========
|
||||
|
||||
2005/04/24
|
||||
2005/04/25 : 1.2.5-pre3
|
||||
- experimental epoll() support (use temporary '-e' argument)
|
||||
|
||||
2005/04/24 : 1.2.5-pre2
|
||||
- implemented the HTTP 303 code for error redirection. This forces the
|
||||
browser to fetch the given URI with a GET request. The new keyword for
|
||||
this is 'errorloc303', and a new 'errorloc302' keyword has been created
|
||||
|
@ -9,7 +12,7 @@ ChangeLog :
|
|||
- added more controls in the parser for valid use of '\x' sequence.
|
||||
- few fixes from Alex & Klaus
|
||||
|
||||
2005/02/17
|
||||
2005/02/17 : 1.2.5-pre1
|
||||
- fixed a few errors in the documentation
|
||||
|
||||
2005/02/13
|
||||
|
|
4
Makefile
4
Makefile
|
@ -73,8 +73,8 @@ TARGET_OPTS=$(COPTS.$(TARGET))
|
|||
REGEX_OPTS=$(COPTS.$(REGEX))
|
||||
CPU_OPTS=$(COPTS.$(CPU))
|
||||
|
||||
COPTS=-I. $(CPU_OPTS) $(TARGET_OPTS) $(REGEX_OPTS) $(SMALL_OPTS)
|
||||
LIBS=$(LIBS.$(TARGET)) $(LIBS.$(REGEX))
|
||||
COPTS=-I. $(ADDINC) $(CPU_OPTS) $(TARGET_OPTS) $(REGEX_OPTS) $(SMALL_OPTS)
|
||||
LIBS=$(LIBS.$(TARGET)) $(LIBS.$(REGEX)) $(ADDLIB)
|
||||
|
||||
# - use -DSTATTIME=0 to disable statistics, else specify an interval in
|
||||
# milliseconds.
|
||||
|
|
3
TODO
3
TODO
|
@ -139,4 +139,5 @@ Todo for 1.2
|
|||
* appcookie
|
||||
- weighted round robin
|
||||
- option to shutdown(listen_sock) when max connections reached
|
||||
|
||||
* epoll
|
||||
- replace the event scheduler with an O(log(N)) one
|
||||
|
|
337
haproxy.c
337
haproxy.c
|
@ -62,6 +62,8 @@
|
|||
#include <strings.h>
|
||||
#endif
|
||||
|
||||
#include <sys/epoll.h>
|
||||
|
||||
#include "include/appsession.h"
|
||||
|
||||
#define HAPROXY_VERSION "1.2.5"
|
||||
|
@ -575,6 +577,7 @@ struct fdtab {
|
|||
/*********************************************************************/
|
||||
|
||||
int cfg_maxpconn = 2000; /* # of simultaneous connections per proxy (-N) */
|
||||
int cfg_use_epoll = 0; /* use epoll() instead of select() ? */
|
||||
char *cfg_cfgfile = NULL; /* configuration file */
|
||||
char *progname = NULL; /* program name */
|
||||
int pid; /* current process id */
|
||||
|
@ -607,6 +610,11 @@ fd_set *ReadEvent,
|
|||
*StaticReadEvent,
|
||||
*StaticWriteEvent;
|
||||
|
||||
/* used by the epoll() emulation of select() */
|
||||
fd_set *PrevReadEvent, *PrevWriteEvent;
|
||||
struct epoll_event *epoll_events;
|
||||
int epoll_fd;
|
||||
|
||||
void **pool_session = NULL,
|
||||
**pool_buffer = NULL,
|
||||
**pool_fdtab = NULL,
|
||||
|
@ -803,7 +811,9 @@ void usage(char *name) {
|
|||
" -c check mode : only check config file and exit\n"
|
||||
" -n sets the maximum total # of connections (%d)\n"
|
||||
" -N sets the default, per-proxy maximum # of connections (%d)\n"
|
||||
" -p writes pids of all children to this file\n\n",
|
||||
" -p writes pids of all children to this file\n"
|
||||
" -e tries to use epoll() instead of select()\n"
|
||||
"\n",
|
||||
name, DEFAULT_MAXCONN, cfg_maxpconn);
|
||||
exit(1);
|
||||
}
|
||||
|
@ -1384,6 +1394,20 @@ static inline struct timeval *tv_min(struct timeval *tvmin,
|
|||
static inline void fd_delete(int fd) {
|
||||
FD_CLR(fd, StaticReadEvent);
|
||||
FD_CLR(fd, StaticWriteEvent);
|
||||
if (cfg_use_epoll) {
|
||||
struct epoll_event ev;
|
||||
|
||||
ev.data.fd = fd;
|
||||
if (epoll_ctl(epoll_fd, EPOLL_CTL_DEL, fd, &ev) < 0) {
|
||||
// it's impossible to tell whether it has already
|
||||
// been done.
|
||||
//perror("epoll_ctl(DEL)");
|
||||
//exit(1);
|
||||
}
|
||||
|
||||
FD_CLR(fd, PrevReadEvent);
|
||||
FD_CLR(fd, PrevWriteEvent);
|
||||
}
|
||||
close(fd);
|
||||
fdtab[fd].state = FD_STCLOSE;
|
||||
|
||||
|
@ -2130,6 +2154,20 @@ int event_srv_write(int fd) {
|
|||
void client_retnclose(struct session *s, int len, const char *msg) {
|
||||
FD_CLR(s->cli_fd, StaticReadEvent);
|
||||
FD_SET(s->cli_fd, StaticWriteEvent);
|
||||
if (cfg_use_epoll) {
|
||||
struct epoll_event ev;
|
||||
|
||||
ev.data.fd = s->cli_fd;
|
||||
if (epoll_ctl(epoll_fd, EPOLL_CTL_DEL, s->cli_fd, &ev) < 0) {
|
||||
// it's impossible to tell whether it has already
|
||||
// been done.
|
||||
//perror("epoll_ctl(DEL)");
|
||||
//exit(1);
|
||||
}
|
||||
|
||||
FD_CLR(s->cli_fd, PrevReadEvent);
|
||||
FD_CLR(s->cli_fd, PrevWriteEvent);
|
||||
}
|
||||
tv_eternity(&s->crexpire);
|
||||
shutdown(s->cli_fd, SHUT_RD);
|
||||
s->cli_state = CL_STSHUTR;
|
||||
|
@ -4832,71 +4870,232 @@ void select_loop() {
|
|||
next_time = MINTIME(time2, next_time);
|
||||
#endif
|
||||
|
||||
if (next_time > 0) { /* FIXME */
|
||||
/* Convert to timeval */
|
||||
/* to avoid eventual select loops due to timer precision */
|
||||
next_time += SCHEDULER_RESOLUTION;
|
||||
delta.tv_sec = next_time / 1000;
|
||||
delta.tv_usec = (next_time % 1000) * 1000;
|
||||
}
|
||||
else if (next_time == 0) { /* allow select to return immediately when needed */
|
||||
delta.tv_sec = delta.tv_usec = 0;
|
||||
}
|
||||
|
||||
if (cfg_use_epoll) {
|
||||
/* use epoll() */
|
||||
int fds, count;
|
||||
int pr, pw, sr, sw;
|
||||
unsigned rn, ro, wn, wo; /* read new, read old, write new, write old */
|
||||
struct epoll_event ev;
|
||||
|
||||
/* let's restore fdset state */
|
||||
for (fds = 0; (fds << INTBITS) < maxfd; fds++) {
|
||||
|
||||
readnotnull = 0; writenotnull = 0;
|
||||
for (i = 0; i < (maxfd + FD_SETSIZE - 1)/(8*sizeof(int)); i++) {
|
||||
readnotnull |= (*(((int*)ReadEvent)+i) = *(((int*)StaticReadEvent)+i)) != 0;
|
||||
writenotnull |= (*(((int*)WriteEvent)+i) = *(((int*)StaticWriteEvent)+i)) != 0;
|
||||
}
|
||||
rn = ((int*)StaticReadEvent)[fds]; ro = ((int*)PrevReadEvent)[fds];
|
||||
wn = ((int*)StaticWriteEvent)[fds]; wo = ((int*)PrevWriteEvent)[fds];
|
||||
|
||||
// /* just a verification code, needs to be removed for performance */
|
||||
// for (i=0; i<maxfd; i++) {
|
||||
// if (FD_ISSET(i, ReadEvent) != FD_ISSET(i, StaticReadEvent))
|
||||
// abort();
|
||||
// if (FD_ISSET(i, WriteEvent) != FD_ISSET(i, StaticWriteEvent))
|
||||
// abort();
|
||||
//
|
||||
// }
|
||||
|
||||
status = select(maxfd,
|
||||
readnotnull ? ReadEvent : NULL,
|
||||
writenotnull ? WriteEvent : NULL,
|
||||
NULL,
|
||||
(next_time >= 0) ? &delta : NULL);
|
||||
|
||||
/* this is an experiment on the separation of the select work */
|
||||
// status = (readnotnull ? select(maxfd, ReadEvent, NULL, NULL, (next_time >= 0) ? &delta : NULL) : 0);
|
||||
// status |= (writenotnull ? select(maxfd, NULL, WriteEvent, NULL, (next_time >= 0) ? &delta : NULL) : 0);
|
||||
|
||||
tv_now(&now);
|
||||
|
||||
if (status > 0) { /* must proceed with events */
|
||||
|
||||
int fds;
|
||||
char count;
|
||||
|
||||
for (fds = 0; (fds << INTBITS) < maxfd; fds++)
|
||||
if ((((int *)(ReadEvent))[fds] | ((int *)(WriteEvent))[fds]) != 0)
|
||||
if ((ro^rn) | (wo^wn)) {
|
||||
for (count = 1<<INTBITS, fd = fds << INTBITS; count && fd < maxfd; count--, fd++) {
|
||||
|
||||
/* if we specify read first, the accepts and zero reads will be
|
||||
* seen first. Moreover, system buffers will be flushed faster.
|
||||
*/
|
||||
if (fdtab[fd].state == FD_STCLOSE)
|
||||
continue;
|
||||
|
||||
if (FD_ISSET(fd, ReadEvent))
|
||||
fdtab[fd].read(fd);
|
||||
|
||||
if (FD_ISSET(fd, WriteEvent))
|
||||
fdtab[fd].write(fd);
|
||||
#define WE_KNOW_HOW_FDSET_WORKS
|
||||
#ifdef WE_KNOW_HOW_FDSET_WORKS
|
||||
|
||||
#define WE_REALLY_NOW_THAT_FDSETS_ARE_INTS
|
||||
#ifdef WE_REALLY_NOW_THAT_FDSETS_ARE_INTS
|
||||
pr = (ro >> ((1<<INTBITS)-count)) & 1;
|
||||
pw = (wo >> ((1<<INTBITS)-count)) & 1;
|
||||
sr = (rn >> ((1<<INTBITS)-count)) & 1;
|
||||
sw = (wn >> ((1<<INTBITS)-count)) & 1;
|
||||
#else
|
||||
pr = FD_ISSET(fd&((1<<INTBITS)-1), (typeof(fd_set*))&ro);
|
||||
pw = FD_ISSET(fd&((1<<INTBITS)-1), (typeof(fd_set*))&wo);
|
||||
sr = FD_ISSET(fd&((1<<INTBITS)-1), (typeof(fd_set*))&rn);
|
||||
sw = FD_ISSET(fd&((1<<INTBITS)-1), (typeof(fd_set*))&wn);
|
||||
#endif
|
||||
#else
|
||||
pr = FD_ISSET(fd, PrevReadEvent);
|
||||
pw = FD_ISSET(fd, PrevWriteEvent);
|
||||
sr = FD_ISSET(fd, StaticReadEvent);
|
||||
sw = FD_ISSET(fd, StaticWriteEvent);
|
||||
#endif
|
||||
if (!((sr^pr) | (sw^pw)))
|
||||
continue;
|
||||
|
||||
ev.events = (sr ? EPOLLIN : 0) | (sw ? EPOLLOUT : 0);
|
||||
ev.data.fd = fd;
|
||||
|
||||
if ((pr | pw)) {
|
||||
/* the file-descriptor already exists... */
|
||||
if ((sr | sw)) {
|
||||
/* ...and it will still exist */
|
||||
if (epoll_ctl(epoll_fd, EPOLL_CTL_MOD, fd, &ev) < 0) {
|
||||
perror("epoll_ctl(MOD)");
|
||||
exit(1);
|
||||
}
|
||||
} else {
|
||||
/* ...and it will be removed */
|
||||
if (epoll_ctl(epoll_fd, EPOLL_CTL_DEL, fd, &ev) < 0) {
|
||||
perror("epoll_ctl(DEL)");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* the file-descriptor did not exist, let's add it */
|
||||
if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
|
||||
perror("epoll_ctl(ADD)");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
// fprintf(stderr,"select returned %d, maxfd=%d\n", status, maxfd);
|
||||
((int*)PrevReadEvent)[fds] = rn;
|
||||
((int*)PrevWriteEvent)[fds] = wn;
|
||||
}
|
||||
|
||||
#if useless_optimization
|
||||
unsigned a, d, m; /* add mask, del mask, mod mask */
|
||||
|
||||
a = (rn|wn) & ~(ro|wo); /* fds to add */
|
||||
d = (ro|wo) & ~(rn|wn); /* fds to remove, normally none */
|
||||
m = (ro^rn) | (wo^wn); /* fds which change */
|
||||
|
||||
if (m) {
|
||||
struct epoll_event ev;
|
||||
m &= ~(a|d); /* keep only changes, not add/del */
|
||||
|
||||
if (m) { /* fds which only change */
|
||||
for (count = 1<<INTBITS, fd = fds << INTBITS; count && fd < maxfd; count--, fd++) {
|
||||
ev.data.fd = fd;
|
||||
ev.events = 0;
|
||||
|
||||
if ((FD_ISSET(fd, PrevReadEvent) || FD_ISSET(fd, PrevWriteEvent)) &&
|
||||
(FD_ISSET(fd, StaticReadEvent) || FD_ISSET(fd, StaticWriteEvent))) {
|
||||
if (FD_ISSET(fd, StaticReadEvent))
|
||||
ev.events |= EPOLLIN;
|
||||
if (FD_ISSET(fd, StaticWriteEvent))
|
||||
ev.events |= EPOLLOUT;
|
||||
if (ev.events && epoll_ctl(epoll_fd, EPOLL_CTL_MOD, fd, &ev) < 0) {
|
||||
perror("epoll_ctl(MOD)");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (a) { /* fds to add */
|
||||
// printf("a=%08x\n", a);
|
||||
for (count = 1<<INTBITS, fd = fds << INTBITS; count && fd < maxfd; count--, fd++) {
|
||||
ev.data.fd = fd;
|
||||
ev.events = 0;
|
||||
if (!FD_ISSET(fd, PrevReadEvent) && !FD_ISSET(fd, PrevWriteEvent)) {
|
||||
if (FD_ISSET(fd, StaticReadEvent))
|
||||
ev.events |= EPOLLIN;
|
||||
if (FD_ISSET(fd, StaticWriteEvent))
|
||||
ev.events |= EPOLLOUT;
|
||||
if (ev.events && epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
|
||||
perror("epoll_ctl(ADD)");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (d) { /* fds to delete */
|
||||
for (count = 1<<INTBITS, fd = fds << INTBITS; count && fd < maxfd; count--, fd++) {
|
||||
ev.data.fd = fd;
|
||||
ev.events = 0;
|
||||
if (FD_ISSET(fd, StaticReadEvent) || FD_ISSET(fd, StaticWriteEvent))
|
||||
continue;
|
||||
if (!FD_ISSET(fd, PrevReadEvent) && !FD_ISSET(fd, PrevWriteEvent))
|
||||
continue;
|
||||
if (epoll_ctl(epoll_fd, EPOLL_CTL_DEL, fd, &ev) < 0) {
|
||||
perror("epoll_ctl(DEL)");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
((int*)PrevReadEvent)[fds] = rn;
|
||||
((int*)PrevWriteEvent)[fds] = wn;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* now let's wait for events */
|
||||
status = epoll_wait(epoll_fd, epoll_events, maxfd, next_time);
|
||||
tv_now(&now);
|
||||
|
||||
for (count = 0; count < status; count++) {
|
||||
fd = epoll_events[count].data.fd;
|
||||
|
||||
if (fdtab[fd].state == FD_STCLOSE)
|
||||
continue;
|
||||
|
||||
if (epoll_events[count].events & ( EPOLLIN | EPOLLERR | EPOLLHUP ))
|
||||
fdtab[fd].read(fd);
|
||||
|
||||
if (fdtab[fd].state == FD_STCLOSE)
|
||||
continue;
|
||||
|
||||
if (epoll_events[count].events & ( EPOLLOUT | EPOLLERR | EPOLLHUP ))
|
||||
fdtab[fd].write(fd);
|
||||
}
|
||||
} else {
|
||||
/* use select() */
|
||||
|
||||
if (next_time > 0) { /* FIXME */
|
||||
/* Convert to timeval */
|
||||
/* to avoid eventual select loops due to timer precision */
|
||||
next_time += SCHEDULER_RESOLUTION;
|
||||
delta.tv_sec = next_time / 1000;
|
||||
delta.tv_usec = (next_time % 1000) * 1000;
|
||||
}
|
||||
else if (next_time == 0) { /* allow select to return immediately when needed */
|
||||
delta.tv_sec = delta.tv_usec = 0;
|
||||
}
|
||||
|
||||
|
||||
/* let's restore fdset state */
|
||||
|
||||
readnotnull = 0; writenotnull = 0;
|
||||
for (i = 0; i < (maxfd + FD_SETSIZE - 1)/(8*sizeof(int)); i++) {
|
||||
readnotnull |= (*(((int*)ReadEvent)+i) = *(((int*)StaticReadEvent)+i)) != 0;
|
||||
writenotnull |= (*(((int*)WriteEvent)+i) = *(((int*)StaticWriteEvent)+i)) != 0;
|
||||
}
|
||||
|
||||
// /* just a verification code, needs to be removed for performance */
|
||||
// for (i=0; i<maxfd; i++) {
|
||||
// if (FD_ISSET(i, ReadEvent) != FD_ISSET(i, StaticReadEvent))
|
||||
// abort();
|
||||
// if (FD_ISSET(i, WriteEvent) != FD_ISSET(i, StaticWriteEvent))
|
||||
// abort();
|
||||
//
|
||||
// }
|
||||
|
||||
status = select(maxfd,
|
||||
readnotnull ? ReadEvent : NULL,
|
||||
writenotnull ? WriteEvent : NULL,
|
||||
NULL,
|
||||
(next_time >= 0) ? &delta : NULL);
|
||||
|
||||
/* this is an experiment on the separation of the select work */
|
||||
// status = (readnotnull ? select(maxfd, ReadEvent, NULL, NULL, (next_time >= 0) ? &delta : NULL) : 0);
|
||||
// status |= (writenotnull ? select(maxfd, NULL, WriteEvent, NULL, (next_time >= 0) ? &delta : NULL) : 0);
|
||||
|
||||
tv_now(&now);
|
||||
|
||||
if (status > 0) { /* must proceed with events */
|
||||
|
||||
int fds;
|
||||
char count;
|
||||
|
||||
for (fds = 0; (fds << INTBITS) < maxfd; fds++)
|
||||
if ((((int *)(ReadEvent))[fds] | ((int *)(WriteEvent))[fds]) != 0)
|
||||
for (count = 1<<INTBITS, fd = fds << INTBITS; count && fd < maxfd; count--, fd++) {
|
||||
|
||||
/* if we specify read first, the accepts and zero reads will be
|
||||
* seen first. Moreover, system buffers will be flushed faster.
|
||||
*/
|
||||
if (fdtab[fd].state == FD_STCLOSE)
|
||||
continue;
|
||||
|
||||
if (FD_ISSET(fd, ReadEvent))
|
||||
fdtab[fd].read(fd);
|
||||
|
||||
if (FD_ISSET(fd, WriteEvent))
|
||||
fdtab[fd].write(fd);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// fprintf(stderr,"select returned %d, maxfd=%d\n", status, maxfd);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -6795,6 +6994,8 @@ void init(int argc, char **argv) {
|
|||
display_version();
|
||||
exit(0);
|
||||
}
|
||||
else if (*flag == 'e')
|
||||
cfg_use_epoll = 1;
|
||||
else if (*flag == 'V')
|
||||
arg_mode |= MODE_VERBOSE;
|
||||
else if (*flag == 'd')
|
||||
|
@ -6897,6 +7098,22 @@ void init(int argc, char **argv) {
|
|||
sizeof(fd_set) *
|
||||
(global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE);
|
||||
|
||||
if (cfg_use_epoll) {
|
||||
epoll_fd = epoll_create(global.maxsock + 1);
|
||||
if (epoll_fd < 0) {
|
||||
Warning("epoll() is not available. Using select() instead.\n");
|
||||
cfg_use_epoll = 0;
|
||||
} else {
|
||||
epoll_events = (struct epoll_event*) calloc(1, sizeof(struct epoll_event) * global.maxsock);
|
||||
PrevReadEvent = (fd_set *)calloc(1,
|
||||
sizeof(fd_set) *
|
||||
(global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE);
|
||||
PrevWriteEvent = (fd_set *)calloc(1,
|
||||
sizeof(fd_set) *
|
||||
(global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE);
|
||||
}
|
||||
}
|
||||
|
||||
fdtab = (struct fdtab *)calloc(1,
|
||||
sizeof(struct fdtab) * (global.maxsock));
|
||||
for (i = 0; i < global.maxsock; i++) {
|
||||
|
@ -7109,6 +7326,8 @@ void deinit(void){
|
|||
|
||||
if (ReadEvent) free(ReadEvent);
|
||||
if (WriteEvent) free(WriteEvent);
|
||||
if (PrevReadEvent) free(ReadEvent);
|
||||
if (PrevWriteEvent) free(WriteEvent);
|
||||
if (StaticReadEvent) free(StaticReadEvent);
|
||||
if (StaticWriteEvent) free(StaticWriteEvent);
|
||||
if (fdtab) free(fdtab);
|
||||
|
|
Loading…
Reference in New Issue