diff --git a/haproxy.c b/haproxy.c
index f1d369a57..2f9c89c7d 100644
--- a/haproxy.c
+++ b/haproxy.c
@@ -512,7 +512,7 @@ struct server {
int result; /* 0 = connect OK, -1 = connect KO */
int curfd; /* file desc used for current test, or -1 if not in test */
unsigned char uweight, eweight; /* user-specified weight-1, and effective weight-1 */
- unsigned short wsquare; /* eweight*eweight, to speed up map computation */
+ unsigned int wscore; /* weight score, used during srv map computation */
struct proxy *proxy; /* the proxy this server belongs to */
};
@@ -579,8 +579,12 @@ struct proxy {
struct in_addr mon_net, mon_mask; /* don't forward connections from this net (network order) FIXME: should support IPv6 */
int state; /* proxy state */
struct sockaddr_in dispatch_addr; /* the default address to connect to */
- struct server *srv, *cursrv; /* known servers, current server */
- int srv_act, srv_bck; /* # of servers */
+ struct server *srv; /* known servers */
+ int srv_act, srv_bck; /* # of running servers */
+ int tot_wact, tot_wbck; /* total weights of active and backup servers */
+ struct server **srv_map; /* the server map used to apply weights */
+ int srv_map_sz; /* the size of the effective server map */
+ int srv_rr_idx; /* next server to be elected in round robin mode */
char *cookie_name; /* name of the cookie to look for */
int cookie_len; /* strlen(cookie_name), computed only once */
char *appsession_name; /* name of the cookie to look for */
@@ -1809,77 +1813,101 @@ static inline void session_free(struct session *s) {
/*
* This function recounts the number of usable active and backup servers for
* proxy
. These numbers are returned into the p->srv_act and p->srv_bck.
+ * This function also recomputes the total active and backup weights.
*/
-static inline void recount_servers(struct proxy *px) {
+static void recount_servers(struct proxy *px) {
struct server *srv;
- px->srv_act = 0; px->srv_bck = 0;
+ px->srv_act = 0; px->srv_bck = px->tot_wact = px->tot_wbck = 0;
for (srv = px->srv; srv != NULL; srv = srv->next) {
if (srv->state & SRV_RUNNING) {
- if (srv->state & SRV_BACKUP)
+ if (srv->state & SRV_BACKUP) {
px->srv_bck++;
- else
+ px->tot_wbck += srv->eweight + 1;
+ } else {
px->srv_act++;
+ px->tot_wact += srv->eweight + 1;
+ }
}
}
}
+/* This function recomputes the server map for proxy px. It
+ * relies on px->tot_wact and px->tot_wbck, so it must be
+ * called after recount_servers(). It also expects px->srv_map
+ * to be initialized to the largest value needed.
+ */
+static void recalc_server_map(struct proxy *px) {
+ int o, tot, flag;
+ struct server *cur, *best;
+
+ if (px->srv_act) {
+ flag = SRV_RUNNING;
+ tot = px->tot_wact;
+ } else if (px->srv_bck) {
+ flag = SRV_RUNNING | SRV_BACKUP;
+ if (px->options & PR_O_USE_ALL_BK)
+ tot = px->tot_wbck;
+ else
+ tot = 1; /* the first server is enough */
+ } else {
+ px->srv_map_sz = 0;
+ return;
+ }
+
+ /* this algorithm gives priority to the first server, which means that
+ * it will respect the declaration order for equivalent weights, and
+ * that whatever the weights, the first server called will always be
+ * the first declard. This is an important asumption for the backup
+ * case, where we want the first server only.
+ */
+ for (cur = px->srv; cur; cur = cur->next)
+ cur->wscore = 0;
+
+ for (o = 0; o < tot; o++) {
+ int max = 0;
+ best = NULL;
+ for (cur = px->srv; cur; cur = cur->next) {
+ if ((cur->state & (SRV_RUNNING | SRV_BACKUP)) == flag) {
+ int v;
+
+ /* If we are forced to return only one server, we don't want to
+ * go further, because we would return the wrong one due to
+ * divide overflow.
+ */
+ if (tot == 1) {
+ best = cur;
+ break;
+ }
+
+ cur->wscore += cur->eweight + 1;
+ v = (cur->wscore + tot) / tot; /* result between 0 and 3 */
+ if (best == NULL || v > max) {
+ max = v;
+ best = cur;
+ }
+ }
+ }
+ px->srv_map[o] = best;
+ best->wscore -= tot;
+ }
+ px->srv_map_sz = tot;
+}
+
/*
* This function tries to find a running server for the proxy following
* the round-robin method. Depending on the number of active/backup servers,
* it will either look for active servers, or for backup servers.
- * If any server is found, it will be returned and px->cursrv will be updated
+ * If any server is found, it will be returned and px->srv_rr_idx will be updated
* to point to the next server. If no valid server is found, NULL is returned.
*/
static inline struct server *get_server_rr(struct proxy *px) {
- struct server *srv;
- struct server *end;
+ if (px->srv_map_sz == 0)
+ return NULL;
- if (px->srv_act) {
- srv = px->cursrv;
- if (srv == NULL)
- srv = px->srv;
- end = srv;
- do {
- if ((srv->state & (SRV_RUNNING | SRV_BACKUP)) == SRV_RUNNING) {
- px->cursrv = srv->next;
- return srv;
- }
-
- srv = srv->next;
- if (srv == NULL)
- srv = px->srv;
- } while (srv != end);
- /* note that theorically we should not get there */
- }
-
- if (px->srv_bck) {
- /* By default, we look for the first backup server if all others are
- * DOWN. But in some cases, it may be desirable to load-balance across
- * all backup servers.
- */
- if (px->options & PR_O_USE_ALL_BK)
- srv = px->cursrv;
- else
- srv = px->srv;
-
- if (srv == NULL)
- srv = px->srv;
- end = srv;
- do {
- if (srv->state & SRV_RUNNING) {
- px->cursrv = srv->next;
- return srv;
- }
- srv = srv->next;
- if (srv == NULL)
- srv = px->srv;
- } while (srv != end);
- /* note that theorically we should not get there */
- }
-
- /* if we get there, it means there are no available servers at all */
- return NULL;
+ if (px->srv_rr_idx < 0 || px->srv_rr_idx >= px->srv_map_sz)
+ px->srv_rr_idx = 0;
+ return px->srv_map[px->srv_rr_idx++];
}
@@ -1891,58 +1919,20 @@ static inline struct server *get_server_rr(struct proxy *px) {
* NULL is returned.
*/
static inline struct server *get_server_sh(struct proxy *px, char *addr, int len) {
- struct server *srv;
+ unsigned int h, l;
- if (px->srv_act) {
- unsigned int h, l;
+ if (px->srv_map_sz == 0)
+ return NULL;
- l = h = 0;
- if (px->srv_act > 1) {
- while ((l + sizeof (int)) <= len) {
- h ^= ntohl(*(unsigned int *)(&addr[l]));
- l += sizeof (int);
- }
- h %= px->srv_act;
- }
-
- for (srv = px->srv; srv; srv = srv->next) {
- if ((srv->state & (SRV_RUNNING | SRV_BACKUP)) == SRV_RUNNING) {
- if (!h)
- return srv;
- h--;
- }
+ l = h = 0;
+ if (px->srv_act > 1) {
+ while ((l + sizeof (int)) <= len) {
+ h ^= ntohl(*(unsigned int *)(&addr[l]));
+ l += sizeof (int);
}
- /* note that theorically we should not get there */
+ h %= px->srv_map_sz;
}
-
- if (px->srv_bck) {
- unsigned int h, l;
-
- /* By default, we look for the first backup server if all others are
- * DOWN. But in some cases, it may be desirable to load-balance across
- * all backup servers.
- */
- l = h = 0;
- if (px->srv_bck > 1 && px->options & PR_O_USE_ALL_BK) {
- while ((l + sizeof (int)) <= len) {
- h ^= ntohl(*(unsigned int *)(&addr[l]));
- l += sizeof (int);
- }
- h %= px->srv_bck;
- }
-
- for (srv = px->srv; srv; srv = srv->next) {
- if (srv->state & SRV_RUNNING) {
- if (!h)
- return srv;
- h--;
- }
- }
- /* note that theorically we should not get there */
- }
-
- /* if we get there, it means there are no available servers at all */
- return NULL;
+ return px->srv_map[h];
}
@@ -5338,6 +5328,7 @@ int process_chk(struct task *t) {
s->state &= ~SRV_RUNNING;
if (s->health == s->rise) {
recount_servers(s->proxy);
+ recalc_server_map(s->proxy);
Warning("%sServer %s/%s DOWN. %d active and %d backup servers left.%s\n",
s->state & SRV_BACKUP ? "Backup " : "",
s->proxy->id, s->id, s->proxy->srv_act, s->proxy->srv_bck,
@@ -5373,6 +5364,7 @@ int process_chk(struct task *t) {
if (s->health == s->rise) {
recount_servers(s->proxy);
+ recalc_server_map(s->proxy);
Warning("%sServer %s/%s UP. %d active and %d backup servers online.%s\n",
s->state & SRV_BACKUP ? "Backup " : "",
s->proxy->id, s->id, s->proxy->srv_act, s->proxy->srv_bck,
@@ -5403,6 +5395,7 @@ int process_chk(struct task *t) {
if (s->health == s->rise) {
recount_servers(s->proxy);
+ recalc_server_map(s->proxy);
Warning("%sServer %s/%s DOWN. %d active and %d backup servers left.%s\n",
s->state & SRV_BACKUP ? "Backup " : "",
s->proxy->id, s->id, s->proxy->srv_act, s->proxy->srv_bck,
@@ -7008,13 +7001,9 @@ int cfg_parse_listen(char *file, int linenum, char **args) {
return -1;
}
- if (curproxy->srv == NULL)
- curproxy->srv = newsrv;
- else
- curproxy->cursrv->next = newsrv;
- curproxy->cursrv = newsrv;
-
- newsrv->next = NULL;
+ /* the servers are linked backwards first */
+ newsrv->next = curproxy->srv;
+ curproxy->srv = newsrv;
newsrv->proxy = curproxy;
do_check = 0;
@@ -7828,7 +7817,6 @@ int readcfgfile(char *file) {
}
while (curproxy != NULL) {
- curproxy->cursrv = NULL;
if (curproxy->state == PR_STSTOPPED) {
curproxy = curproxy->next;
continue;
@@ -7885,29 +7873,59 @@ int readcfgfile(char *file) {
file, curproxy->id);
cfgerr++;
}
- else {
- struct server *srv;
- int pgcd;
+ }
- if (newsrv) {
- /* We will factor the weights to reduce the table,
- * using Euclide's largest common divisor algorithm
- */
- pgcd = newsrv->uweight + 1;
- for (srv = newsrv->next; srv && pgcd > 1; srv = srv->next) {
- int t, w;
+ /* first, we will invert the servers list order */
+ newsrv = NULL;
+ while (curproxy->srv) {
+ struct server *next;
- w = srv->uweight + 1;
- while (w) {
- t = pgcd % w;
- pgcd = w;
- w = t;
+ next = curproxy->srv->next;
+ curproxy->srv->next = newsrv;
+ newsrv = curproxy->srv;
+ if (!next)
+ break;
+ curproxy->srv = next;
+ }
+
+ /* now, newsrv == curproxy->srv */
+ if (newsrv) {
+ struct server *srv;
+ int pgcd;
+ int act, bck;
+
+ /* We will factor the weights to reduce the table,
+ * using Euclide's largest common divisor algorithm
+ */
+ pgcd = newsrv->uweight + 1;
+ for (srv = newsrv->next; srv && pgcd > 1; srv = srv->next) {
+ int t, w;
+
+ w = srv->uweight + 1;
+ while (w) {
+ t = pgcd % w;
+ pgcd = w;
+ w = t;
}
- }
- for (srv = newsrv; srv; srv = srv->next)
- srv->eweight = ((srv->uweight + 1) / pgcd) - 1;
- }
}
+
+ act = bck = 0;
+ for (srv = newsrv; srv; srv = srv->next) {
+ srv->eweight = ((srv->uweight + 1) / pgcd) - 1;
+ if (srv->state & SRV_BACKUP)
+ bck += srv->eweight + 1;
+ else
+ act += srv->eweight + 1;
+ }
+
+ /* this is the largest map we will ever need for this servers list */
+ if (act < bck)
+ act = bck;
+
+ curproxy->srv_map = (struct server **)calloc(act, sizeof(struct server *));
+ /* recounts servers and their weights */
+ recount_servers(curproxy);
+ recalc_server_map(curproxy);
}
if (curproxy->options & PR_O_LOGASAP)