From f89c1873f8a144e67403e402cf24879a9527978d Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Thu, 1 Oct 2009 11:19:37 +0200 Subject: [PATCH] [CLEANUP] backend: move LB algos to individual files It was becoming painful to have all the LB algos in backend.c. Let's move them to their own files. A few hashing functions still need be broken in two parts, one for the contents and one for the map position. --- Makefile | 1 + Makefile.bsd | 1 + Makefile.osx | 1 + include/proto/backend.h | 11 +- include/proto/lb_fwlc.h | 39 ++ include/proto/lb_fwrr.h | 39 ++ include/proto/lb_map.h | 42 ++ src/backend.c | 1074 +-------------------------------------- src/cfgparse.c | 3 + src/lb_fwlc.c | 316 ++++++++++++ src/lb_fwrr.c | 576 +++++++++++++++++++++ src/lb_map.c | 262 ++++++++++ 12 files changed, 1290 insertions(+), 1075 deletions(-) create mode 100644 include/proto/lb_fwlc.h create mode 100644 include/proto/lb_fwrr.h create mode 100644 include/proto/lb_map.h create mode 100644 src/lb_fwlc.c create mode 100644 src/lb_fwrr.c create mode 100644 src/lb_map.c diff --git a/Makefile b/Makefile index da5bee503..ed8cb27c9 100644 --- a/Makefile +++ b/Makefile @@ -459,6 +459,7 @@ OBJS = src/haproxy.o src/sessionhash.o src/base64.o src/protocols.o \ src/time.o src/fd.o src/pipe.o src/regex.o src/cfgparse.o src/server.o \ src/checks.o src/queue.o src/client.o src/proxy.o src/proto_uxst.o \ src/proto_http.o src/stream_sock.o src/appsession.o src/backend.o \ + src/lb_fwlc.o src/lb_fwrr.o src/lb_map.o \ src/stream_interface.o src/dumpstats.o src/proto_tcp.o \ src/session.o src/hdr_idx.o src/ev_select.o src/signal.o \ src/acl.o src/memory.o src/freq_ctr.o \ diff --git a/Makefile.bsd b/Makefile.bsd index 5d57725b6..eb2b6b085 100644 --- a/Makefile.bsd +++ b/Makefile.bsd @@ -105,6 +105,7 @@ OBJS = src/haproxy.o src/sessionhash.o src/base64.o src/protocols.o \ src/proto_http.o src/stream_sock.o src/appsession.o src/backend.o \ src/stream_interface.o src/dumpstats.o src/proto_tcp.o \ src/session.o src/hdr_idx.o src/ev_select.o src/signal.o \ + src/lb_fwlc.o src/lb_fwrr.o src/lb_map.o \ src/ev_poll.o src/ev_kqueue.o \ src/acl.o src/memory.o src/freq_ctr.o \ src/ebtree.o src/eb32tree.o diff --git a/Makefile.osx b/Makefile.osx index 23494a4d1..9ba146b27 100644 --- a/Makefile.osx +++ b/Makefile.osx @@ -102,6 +102,7 @@ OBJS = src/haproxy.o src/sessionhash.o src/base64.o src/protocols.o \ src/proto_http.o src/stream_sock.o src/appsession.o src/backend.o \ src/stream_interface.o src/dumpstats.o src/proto_tcp.o \ src/session.o src/hdr_idx.o src/ev_select.o src/signal.o \ + src/lb_fwlc.o src/lb_fwrr.o src/lb_map.o \ src/ev_poll.o \ src/acl.o src/memory.o src/freq_ctr.o \ src/ebtree.o src/eb32tree.o diff --git a/include/proto/backend.h b/include/proto/backend.h index 157850ae3..7a248da1f 100644 --- a/include/proto/backend.h +++ b/include/proto/backend.h @@ -2,7 +2,7 @@ include/proto/backend.h Functions prototypes for the backend. - Copyright (C) 2000-2008 Willy Tarreau - w@1wt.eu + Copyright (C) 2000-2009 Willy Tarreau - w@1wt.eu This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -25,10 +25,10 @@ #include #include +#include +#include #include -#include - int assign_server(struct session *s); int assign_server_address(struct session *s); int assign_server_and_queue(struct session *s); @@ -37,14 +37,9 @@ int srv_redispatch_connect(struct session *t); int backend_parse_balance(const char **args, char *err, int errlen, struct proxy *curproxy); -void recalc_server_map(struct proxy *px); int be_downtime(struct proxy *px); -void init_server_map(struct proxy *p); -void fwrr_init_server_groups(struct proxy *p); -void fwlc_init_server_tree(struct proxy *p); void recount_servers(struct proxy *px); void update_backend_weight(struct proxy *px); -struct server *get_server_rr_with_conns(struct proxy *px, struct server *srvtoavoid); struct server *get_server_sh(struct proxy *px, const char *addr, int len); struct server *get_server_uh(struct proxy *px, char *uri, int uri_len); diff --git a/include/proto/lb_fwlc.h b/include/proto/lb_fwlc.h new file mode 100644 index 000000000..8c5674656 --- /dev/null +++ b/include/proto/lb_fwlc.h @@ -0,0 +1,39 @@ +/* + * include/proto/lb_fwlc.h + * Fast Weighted Least Connection load balancing algorithm. + * + * Copyright (C) 2000-2009 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _PROTO_LB_FWLC_H +#define _PROTO_LB_FWLC_H + +#include +#include +#include + +struct server *fwlc_get_next_server(struct proxy *p, struct server *srvtoavoid); +void fwlc_init_server_tree(struct proxy *p); + +#endif /* _PROTO_LB_FWLC_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/proto/lb_fwrr.h b/include/proto/lb_fwrr.h new file mode 100644 index 000000000..1189c3888 --- /dev/null +++ b/include/proto/lb_fwrr.h @@ -0,0 +1,39 @@ +/* + * include/proto/lb_fwrr.h + * Fast Weighted Round Robin load balancing algorithm. + * + * Copyright (C) 2000-2009 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _PROTO_LB_FWRR_H +#define _PROTO_LB_FWRR_H + +#include +#include +#include + +void fwrr_init_server_groups(struct proxy *p); +struct server *fwrr_get_next_server(struct proxy *p, struct server *srvtoavoid); + +#endif /* _PROTO_LB_FWRR_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/proto/lb_map.h b/include/proto/lb_map.h new file mode 100644 index 000000000..11fd60e2f --- /dev/null +++ b/include/proto/lb_map.h @@ -0,0 +1,42 @@ +/* + * include/proto/lb_map.h + * Map-based load-balancing (RR and HASH) + * + * Copyright (C) 2000-2009 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _PROTO_LB_MAP_H +#define _PROTO_LB_MAP_H + +#include +#include +#include + +void map_set_server_status_down(struct server *srv); +void map_set_server_status_up(struct server *srv); +void recalc_server_map(struct proxy *px); +void init_server_map(struct proxy *p); +struct server *map_get_server_rr(struct proxy *px, struct server *srvtoavoid); + +#endif /* _PROTO_LB_MAP_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/src/backend.c b/src/backend.c index e7ee3e4c6..d8d7e2e03 100644 --- a/src/backend.c +++ b/src/backend.c @@ -1,7 +1,7 @@ /* * Backend variables and functions. * - * Copyright 2000-2008 Willy Tarreau + * Copyright 2000-2009 Willy Tarreau * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -22,7 +22,6 @@ #include #include #include -#include #include #include @@ -31,6 +30,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -38,12 +40,6 @@ #include #include -static inline void fwrr_remove_from_tree(struct server *s); -static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s); -static inline void fwrr_dequeue_srv(struct server *s); -static void fwrr_get_srv(struct server *s); -static void fwrr_queue_srv(struct server *s); - /* * This function recounts the number of usable active and backup servers for * proxy

. These numbers are returned into the p->srv_act and p->srv_bck. @@ -96,1015 +92,6 @@ void update_backend_weight(struct proxy *px) } } -/* this function updates the map according to server 's new state */ -static void map_set_server_status_down(struct server *srv) -{ - struct proxy *p = srv->proxy; - - if (srv->state == srv->prev_state && - srv->eweight == srv->prev_eweight) - return; - - if (srv_is_usable(srv->state, srv->eweight)) - goto out_update_state; - - /* FIXME: could be optimized since we know what changed */ - recount_servers(p); - update_backend_weight(p); - p->lbprm.map.state |= PR_MAP_RECALC; - out_update_state: - srv->prev_state = srv->state; - srv->prev_eweight = srv->eweight; -} - -/* This function updates the map according to server 's new state */ -static void map_set_server_status_up(struct server *srv) -{ - struct proxy *p = srv->proxy; - - if (srv->state == srv->prev_state && - srv->eweight == srv->prev_eweight) - return; - - if (!srv_is_usable(srv->state, srv->eweight)) - goto out_update_state; - - /* FIXME: could be optimized since we know what changed */ - recount_servers(p); - update_backend_weight(p); - p->lbprm.map.state |= PR_MAP_RECALC; - out_update_state: - srv->prev_state = srv->state; - srv->prev_eweight = srv->eweight; -} - -/* This function recomputes the server map for proxy px. It relies on - * px->lbprm.tot_wact, tot_wbck, tot_used, tot_weight, so it must be - * called after recount_servers(). It also expects px->lbprm.map.srv - * to be allocated with the largest size needed. It updates tot_weight. - */ -void recalc_server_map(struct proxy *px) -{ - int o, tot, flag; - struct server *cur, *best; - - switch (px->lbprm.tot_used) { - case 0: /* no server */ - px->lbprm.map.state &= ~PR_MAP_RECALC; - return; - case 1: /* only one server, just fill first entry */ - tot = 1; - break; - default: - tot = px->lbprm.tot_weight; - break; - } - - /* here we *know* that we have some servers */ - if (px->srv_act) - flag = SRV_RUNNING; - else - flag = SRV_RUNNING | SRV_BACKUP; - - /* this algorithm gives priority to the first server, which means that - * it will respect the declaration order for equivalent weights, and - * that whatever the weights, the first server called will always be - * the first declared. This is an important asumption for the backup - * case, where we want the first server only. - */ - for (cur = px->srv; cur; cur = cur->next) - cur->wscore = 0; - - for (o = 0; o < tot; o++) { - int max = 0; - best = NULL; - for (cur = px->srv; cur; cur = cur->next) { - if (cur->eweight && - flag == (cur->state & - (SRV_RUNNING | SRV_GOINGDOWN | SRV_BACKUP))) { - int v; - - /* If we are forced to return only one server, we don't want to - * go further, because we would return the wrong one due to - * divide overflow. - */ - if (tot == 1) { - best = cur; - /* note that best->wscore will be wrong but we don't care */ - break; - } - - cur->wscore += cur->eweight; - v = (cur->wscore + tot) / tot; /* result between 0 and 3 */ - if (best == NULL || v > max) { - max = v; - best = cur; - } - } - } - px->lbprm.map.srv[o] = best; - best->wscore -= tot; - } - px->lbprm.map.state &= ~PR_MAP_RECALC; -} - -/* This function is responsible of building the server MAP for map-based LB - * algorithms, allocating the map, and setting p->lbprm.wmult to the GCD of the - * weights if applicable. It should be called only once per proxy, at config - * time. - */ -void init_server_map(struct proxy *p) -{ - struct server *srv; - int pgcd; - int act, bck; - - p->lbprm.set_server_status_up = map_set_server_status_up; - p->lbprm.set_server_status_down = map_set_server_status_down; - p->lbprm.update_server_eweight = NULL; - - if (!p->srv) - return; - - /* We will factor the weights to reduce the table, - * using Euclide's largest common divisor algorithm. - * Since we may have zero weights, we have to first - * find a non-zero weight server. - */ - pgcd = 1; - srv = p->srv; - while (srv && !srv->uweight) - srv = srv->next; - - if (srv) { - pgcd = srv->uweight; /* note: cannot be zero */ - while (pgcd > 1 && (srv = srv->next)) { - int w = srv->uweight; - while (w) { - int t = pgcd % w; - pgcd = w; - w = t; - } - } - } - - /* It is sometimes useful to know what factor to apply - * to the backend's effective weight to know its real - * weight. - */ - p->lbprm.wmult = pgcd; - - act = bck = 0; - for (srv = p->srv; srv; srv = srv->next) { - srv->eweight = srv->uweight / pgcd; - srv->prev_eweight = srv->eweight; - srv->prev_state = srv->state; - if (srv->state & SRV_BACKUP) - bck += srv->eweight; - else - act += srv->eweight; - } - - /* this is the largest map we will ever need for this servers list */ - if (act < bck) - act = bck; - - if (!act) - act = 1; - - p->lbprm.map.srv = (struct server **)calloc(act, sizeof(struct server *)); - /* recounts servers and their weights */ - p->lbprm.map.state = PR_MAP_RECALC; - recount_servers(p); - update_backend_weight(p); - recalc_server_map(p); -} - -/* This function updates the server trees according to server 's new - * state. It should be called when server 's status changes to down. - * It is not important whether the server was already down or not. It is not - * important either that the new state is completely down (the caller may not - * know all the variables of a server's state). - */ -static void fwrr_set_server_status_down(struct server *srv) -{ - struct proxy *p = srv->proxy; - struct fwrr_group *grp; - - if (srv->state == srv->prev_state && - srv->eweight == srv->prev_eweight) - return; - - if (srv_is_usable(srv->state, srv->eweight)) - goto out_update_state; - - if (!srv_is_usable(srv->prev_state, srv->prev_eweight)) - /* server was already down */ - goto out_update_backend; - - grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act; - grp->next_weight -= srv->prev_eweight; - - if (srv->state & SRV_BACKUP) { - p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight; - p->srv_bck--; - - if (srv == p->lbprm.fbck) { - /* we lost the first backup server in a single-backup - * configuration, we must search another one. - */ - struct server *srv2 = p->lbprm.fbck; - do { - srv2 = srv2->next; - } while (srv2 && - !((srv2->state & SRV_BACKUP) && - srv_is_usable(srv2->state, srv2->eweight))); - p->lbprm.fbck = srv2; - } - } else { - p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight; - p->srv_act--; - } - - fwrr_dequeue_srv(srv); - fwrr_remove_from_tree(srv); - -out_update_backend: - /* check/update tot_used, tot_weight */ - update_backend_weight(p); - out_update_state: - srv->prev_state = srv->state; - srv->prev_eweight = srv->eweight; -} - -/* This function updates the server trees according to server 's new - * state. It should be called when server 's status changes to up. - * It is not important whether the server was already down or not. It is not - * important either that the new state is completely UP (the caller may not - * know all the variables of a server's state). This function will not change - * the weight of a server which was already up. - */ -static void fwrr_set_server_status_up(struct server *srv) -{ - struct proxy *p = srv->proxy; - struct fwrr_group *grp; - - if (srv->state == srv->prev_state && - srv->eweight == srv->prev_eweight) - return; - - if (!srv_is_usable(srv->state, srv->eweight)) - goto out_update_state; - - if (srv_is_usable(srv->prev_state, srv->prev_eweight)) - /* server was already up */ - goto out_update_backend; - - grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act; - grp->next_weight += srv->eweight; - - if (srv->state & SRV_BACKUP) { - p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight; - p->srv_bck++; - - if (!(p->options & PR_O_USE_ALL_BK)) { - if (!p->lbprm.fbck) { - /* there was no backup server anymore */ - p->lbprm.fbck = srv; - } else { - /* we may have restored a backup server prior to fbck, - * in which case it should replace it. - */ - struct server *srv2 = srv; - do { - srv2 = srv2->next; - } while (srv2 && (srv2 != p->lbprm.fbck)); - if (srv2) - p->lbprm.fbck = srv; - } - } - } else { - p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight; - p->srv_act++; - } - - /* note that eweight cannot be 0 here */ - fwrr_get_srv(srv); - srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight; - fwrr_queue_srv(srv); - -out_update_backend: - /* check/update tot_used, tot_weight */ - update_backend_weight(p); - out_update_state: - srv->prev_state = srv->state; - srv->prev_eweight = srv->eweight; -} - -/* This function must be called after an update to server 's effective - * weight. It may be called after a state change too. - */ -static void fwrr_update_server_weight(struct server *srv) -{ - int old_state, new_state; - struct proxy *p = srv->proxy; - struct fwrr_group *grp; - - if (srv->state == srv->prev_state && - srv->eweight == srv->prev_eweight) - return; - - /* If changing the server's weight changes its state, we simply apply - * the procedures we already have for status change. If the state - * remains down, the server is not in any tree, so it's as easy as - * updating its values. If the state remains up with different weights, - * there are some computations to perform to find a new place and - * possibly a new tree for this server. - */ - - old_state = srv_is_usable(srv->prev_state, srv->prev_eweight); - new_state = srv_is_usable(srv->state, srv->eweight); - - if (!old_state && !new_state) { - srv->prev_state = srv->state; - srv->prev_eweight = srv->eweight; - return; - } - else if (!old_state && new_state) { - fwrr_set_server_status_up(srv); - return; - } - else if (old_state && !new_state) { - fwrr_set_server_status_down(srv); - return; - } - - grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act; - grp->next_weight = grp->next_weight - srv->prev_eweight + srv->eweight; - - p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight; - p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight; - - if (srv->lb_tree == grp->init) { - fwrr_dequeue_srv(srv); - fwrr_queue_by_weight(grp->init, srv); - } - else if (!srv->lb_tree) { - /* FIXME: server was down. This is not possible right now but - * may be needed soon for slowstart or graceful shutdown. - */ - fwrr_dequeue_srv(srv); - fwrr_get_srv(srv); - srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight; - fwrr_queue_srv(srv); - } else { - /* The server is either active or in the next queue. If it's - * still in the active queue and it has not consumed all of its - * places, let's adjust its next position. - */ - fwrr_get_srv(srv); - - if (srv->eweight > 0) { - int prev_next = srv->npos; - int step = grp->next_weight / srv->eweight; - - srv->npos = srv->lpos + step; - srv->rweight = 0; - - if (srv->npos > prev_next) - srv->npos = prev_next; - if (srv->npos < grp->curr_pos + 2) - srv->npos = grp->curr_pos + step; - } else { - /* push it into the next tree */ - srv->npos = grp->curr_pos + grp->curr_weight; - } - - fwrr_dequeue_srv(srv); - fwrr_queue_srv(srv); - } - - update_backend_weight(p); - srv->prev_state = srv->state; - srv->prev_eweight = srv->eweight; -} - -/* Remove a server from a tree. It must have previously been dequeued. This - * function is meant to be called when a server is going down or has its - * weight disabled. - */ -static inline void fwrr_remove_from_tree(struct server *s) -{ - s->lb_tree = NULL; -} - -/* Queue a server in the weight tree , assuming the weight is >0. - * We want to sort them by inverted weights, because we need to place - * heavy servers first in order to get a smooth distribution. - */ -static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s) -{ - s->lb_node.key = SRV_EWGHT_MAX - s->eweight; - eb32_insert(root, &s->lb_node); - s->lb_tree = root; -} - -/* This function is responsible for building the weight trees in case of fast - * weighted round-robin. It also sets p->lbprm.wdiv to the eweight to uweight - * ratio. Both active and backup groups are initialized. - */ -void fwrr_init_server_groups(struct proxy *p) -{ - struct server *srv; - struct eb_root init_head = EB_ROOT; - - p->lbprm.set_server_status_up = fwrr_set_server_status_up; - p->lbprm.set_server_status_down = fwrr_set_server_status_down; - p->lbprm.update_server_eweight = fwrr_update_server_weight; - - p->lbprm.wdiv = BE_WEIGHT_SCALE; - for (srv = p->srv; srv; srv = srv->next) { - srv->prev_eweight = srv->eweight = srv->uweight * BE_WEIGHT_SCALE; - srv->prev_state = srv->state; - } - - recount_servers(p); - update_backend_weight(p); - - /* prepare the active servers group */ - p->lbprm.fwrr.act.curr_pos = p->lbprm.fwrr.act.curr_weight = - p->lbprm.fwrr.act.next_weight = p->lbprm.tot_wact; - p->lbprm.fwrr.act.curr = p->lbprm.fwrr.act.t0 = - p->lbprm.fwrr.act.t1 = init_head; - p->lbprm.fwrr.act.init = &p->lbprm.fwrr.act.t0; - p->lbprm.fwrr.act.next = &p->lbprm.fwrr.act.t1; - - /* prepare the backup servers group */ - p->lbprm.fwrr.bck.curr_pos = p->lbprm.fwrr.bck.curr_weight = - p->lbprm.fwrr.bck.next_weight = p->lbprm.tot_wbck; - p->lbprm.fwrr.bck.curr = p->lbprm.fwrr.bck.t0 = - p->lbprm.fwrr.bck.t1 = init_head; - p->lbprm.fwrr.bck.init = &p->lbprm.fwrr.bck.t0; - p->lbprm.fwrr.bck.next = &p->lbprm.fwrr.bck.t1; - - /* queue active and backup servers in two distinct groups */ - for (srv = p->srv; srv; srv = srv->next) { - if (!srv_is_usable(srv->state, srv->eweight)) - continue; - fwrr_queue_by_weight((srv->state & SRV_BACKUP) ? - p->lbprm.fwrr.bck.init : - p->lbprm.fwrr.act.init, - srv); - } -} - -/* simply removes a server from a weight tree */ -static inline void fwrr_dequeue_srv(struct server *s) -{ - eb32_delete(&s->lb_node); -} - -/* queues a server into the appropriate group and tree depending on its - * backup status, and ->npos. If the server is disabled, simply assign - * it to the NULL tree. - */ -static void fwrr_queue_srv(struct server *s) -{ - struct proxy *p = s->proxy; - struct fwrr_group *grp; - - grp = (s->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act; - - /* Delay everything which does not fit into the window and everything - * which does not fit into the theorical new window. - */ - if (!srv_is_usable(s->state, s->eweight)) { - fwrr_remove_from_tree(s); - } - else if (s->eweight <= 0 || - s->npos >= 2 * grp->curr_weight || - s->npos >= grp->curr_weight + grp->next_weight) { - /* put into next tree, and readjust npos in case we could - * finally take this back to current. */ - s->npos -= grp->curr_weight; - fwrr_queue_by_weight(grp->next, s); - } - else { - /* The sorting key is stored in units of s->npos * user_weight - * in order to avoid overflows. As stated in backend.h, the - * lower the scale, the rougher the weights modulation, and the - * higher the scale, the lower the number of servers without - * overflow. With this formula, the result is always positive, - * so we can use eb3é_insert(). - */ - s->lb_node.key = SRV_UWGHT_RANGE * s->npos + - (unsigned)(SRV_EWGHT_MAX + s->rweight - s->eweight) / BE_WEIGHT_SCALE; - - eb32_insert(&grp->curr, &s->lb_node); - s->lb_tree = &grp->curr; - } -} - -/* prepares a server when extracting it from the "init" tree */ -static inline void fwrr_get_srv_init(struct server *s) -{ - s->npos = s->rweight = 0; -} - -/* prepares a server when extracting it from the "next" tree */ -static inline void fwrr_get_srv_next(struct server *s) -{ - struct fwrr_group *grp = (s->state & SRV_BACKUP) ? - &s->proxy->lbprm.fwrr.bck : - &s->proxy->lbprm.fwrr.act; - - s->npos += grp->curr_weight; -} - -/* prepares a server when it was marked down */ -static inline void fwrr_get_srv_down(struct server *s) -{ - struct fwrr_group *grp = (s->state & SRV_BACKUP) ? - &s->proxy->lbprm.fwrr.bck : - &s->proxy->lbprm.fwrr.act; - - s->npos = grp->curr_pos; -} - -/* prepares a server when extracting it from its tree */ -static void fwrr_get_srv(struct server *s) -{ - struct proxy *p = s->proxy; - struct fwrr_group *grp = (s->state & SRV_BACKUP) ? - &p->lbprm.fwrr.bck : - &p->lbprm.fwrr.act; - - if (s->lb_tree == grp->init) { - fwrr_get_srv_init(s); - } - else if (s->lb_tree == grp->next) { - fwrr_get_srv_next(s); - } - else if (s->lb_tree == NULL) { - fwrr_get_srv_down(s); - } -} - -/* switches trees "init" and "next" for FWRR group . "init" should be empty - * when this happens, and "next" filled with servers sorted by weights. - */ -static inline void fwrr_switch_trees(struct fwrr_group *grp) -{ - struct eb_root *swap; - swap = grp->init; - grp->init = grp->next; - grp->next = swap; - grp->curr_weight = grp->next_weight; - grp->curr_pos = grp->curr_weight; -} - -/* return next server from the current tree in FWRR group , or a server - * from the "init" tree if appropriate. If both trees are empty, return NULL. - */ -static struct server *fwrr_get_server_from_group(struct fwrr_group *grp) -{ - struct eb32_node *node; - struct server *s; - - node = eb32_first(&grp->curr); - s = eb32_entry(node, struct server, lb_node); - - if (!node || s->npos > grp->curr_pos) { - /* either we have no server left, or we have a hole */ - struct eb32_node *node2; - node2 = eb32_first(grp->init); - if (node2) { - node = node2; - s = eb32_entry(node, struct server, lb_node); - fwrr_get_srv_init(s); - if (s->eweight == 0) /* FIXME: is it possible at all ? */ - node = NULL; - } - } - if (node) - return s; - else - return NULL; -} - -/* Computes next position of server in the group. It is mandatory for - * to have a non-zero, positive eweight. -*/ -static inline void fwrr_update_position(struct fwrr_group *grp, struct server *s) -{ - if (!s->npos) { - /* first time ever for this server */ - s->lpos = grp->curr_pos; - s->npos = grp->curr_pos + grp->next_weight / s->eweight; - s->rweight += grp->next_weight % s->eweight; - - if (s->rweight >= s->eweight) { - s->rweight -= s->eweight; - s->npos++; - } - } else { - s->lpos = s->npos; - s->npos += grp->next_weight / s->eweight; - s->rweight += grp->next_weight % s->eweight; - - if (s->rweight >= s->eweight) { - s->rweight -= s->eweight; - s->npos++; - } - } -} - -/* Return next server from the current tree in backend

, or a server from - * the init tree if appropriate. If both trees are empty, return NULL. - * Saturated servers are skipped and requeued. - */ -static struct server *fwrr_get_next_server(struct proxy *p, struct server *srvtoavoid) -{ - struct server *srv, *full, *avoided; - struct fwrr_group *grp; - int switched; - - if (p->srv_act) - grp = &p->lbprm.fwrr.act; - else if (p->lbprm.fbck) - return p->lbprm.fbck; - else if (p->srv_bck) - grp = &p->lbprm.fwrr.bck; - else - return NULL; - - switched = 0; - avoided = NULL; - full = NULL; /* NULL-terminated list of saturated servers */ - while (1) { - /* if we see an empty group, let's first try to collect weights - * which might have recently changed. - */ - if (!grp->curr_weight) - grp->curr_pos = grp->curr_weight = grp->next_weight; - - /* get first server from the "current" tree. When the end of - * the tree is reached, we may have to switch, but only once. - */ - while (1) { - srv = fwrr_get_server_from_group(grp); - if (srv) - break; - if (switched) { - if (avoided) { - srv = avoided; - break; - } - goto requeue_servers; - } - switched = 1; - fwrr_switch_trees(grp); - - } - - /* OK, we have a server. However, it may be saturated, in which - * case we don't want to reconsider it for now. We'll update - * its position and dequeue it anyway, so that we can move it - * to a better place afterwards. - */ - fwrr_update_position(grp, srv); - fwrr_dequeue_srv(srv); - grp->curr_pos++; - if (!srv->maxconn || (!srv->nbpend && srv->served < srv_dynamic_maxconn(srv))) { - /* make sure it is not the server we are trying to exclude... */ - if (srv != srvtoavoid || avoided) - break; - - avoided = srv; /* ...but remember that is was selected yet avoided */ - } - - /* the server is saturated or avoided, let's chain it for later reinsertion */ - srv->next_full = full; - full = srv; - } - - /* OK, we got the best server, let's update it */ - fwrr_queue_srv(srv); - - requeue_servers: - /* Requeue all extracted servers. If full==srv then it was - * avoided (unsucessfully) and chained, omit it now. - */ - if (unlikely(full != NULL)) { - if (switched) { - /* the tree has switched, requeue all extracted servers - * into "init", because their place was lost, and only - * their weight matters. - */ - do { - if (likely(full != srv)) - fwrr_queue_by_weight(grp->init, full); - full = full->next_full; - } while (full); - } else { - /* requeue all extracted servers just as if they were consumed - * so that they regain their expected place. - */ - do { - if (likely(full != srv)) - fwrr_queue_srv(full); - full = full->next_full; - } while (full); - } - } - return srv; -} - -/* Remove a server from a tree. It must have previously been dequeued. This - * function is meant to be called when a server is going down or has its - * weight disabled. - */ -static inline void fwlc_remove_from_tree(struct server *s) -{ - s->lb_tree = NULL; -} - -/* simply removes a server from a tree */ -static inline void fwlc_dequeue_srv(struct server *s) -{ - eb32_delete(&s->lb_node); -} - -/* Queue a server in its associated tree, assuming the weight is >0. - * Servers are sorted by #conns/weight. To ensure maximum accuracy, - * we use #conns*SRV_EWGHT_MAX/eweight as the sorting key. - */ -static inline void fwlc_queue_srv(struct server *s) -{ - s->lb_node.key = s->served * SRV_EWGHT_MAX / s->eweight; - eb32_insert(s->lb_tree, &s->lb_node); -} - -/* Re-position the server in the FWLC tree after it has been assigned one - * connection or after it has released one. Note that it is possible that - * the server has been moved out of the tree due to failed health-checks. - */ -static void fwlc_srv_reposition(struct server *s) -{ - if (!s->lb_tree) - return; - fwlc_dequeue_srv(s); - fwlc_queue_srv(s); -} - -/* This function updates the server trees according to server 's new - * state. It should be called when server 's status changes to down. - * It is not important whether the server was already down or not. It is not - * important either that the new state is completely down (the caller may not - * know all the variables of a server's state). - */ -static void fwlc_set_server_status_down(struct server *srv) -{ - struct proxy *p = srv->proxy; - - if (srv->state == srv->prev_state && - srv->eweight == srv->prev_eweight) - return; - - if (srv_is_usable(srv->state, srv->eweight)) - goto out_update_state; - - if (!srv_is_usable(srv->prev_state, srv->prev_eweight)) - /* server was already down */ - goto out_update_backend; - - if (srv->state & SRV_BACKUP) { - p->lbprm.tot_wbck -= srv->prev_eweight; - p->srv_bck--; - - if (srv == p->lbprm.fbck) { - /* we lost the first backup server in a single-backup - * configuration, we must search another one. - */ - struct server *srv2 = p->lbprm.fbck; - do { - srv2 = srv2->next; - } while (srv2 && - !((srv2->state & SRV_BACKUP) && - srv_is_usable(srv2->state, srv2->eweight))); - p->lbprm.fbck = srv2; - } - } else { - p->lbprm.tot_wact -= srv->prev_eweight; - p->srv_act--; - } - - fwlc_dequeue_srv(srv); - fwlc_remove_from_tree(srv); - -out_update_backend: - /* check/update tot_used, tot_weight */ - update_backend_weight(p); - out_update_state: - srv->prev_state = srv->state; - srv->prev_eweight = srv->eweight; -} - -/* This function updates the server trees according to server 's new - * state. It should be called when server 's status changes to up. - * It is not important whether the server was already down or not. It is not - * important either that the new state is completely UP (the caller may not - * know all the variables of a server's state). This function will not change - * the weight of a server which was already up. - */ -static void fwlc_set_server_status_up(struct server *srv) -{ - struct proxy *p = srv->proxy; - - if (srv->state == srv->prev_state && - srv->eweight == srv->prev_eweight) - return; - - if (!srv_is_usable(srv->state, srv->eweight)) - goto out_update_state; - - if (srv_is_usable(srv->prev_state, srv->prev_eweight)) - /* server was already up */ - goto out_update_backend; - - if (srv->state & SRV_BACKUP) { - srv->lb_tree = &p->lbprm.fwlc.bck; - p->lbprm.tot_wbck += srv->eweight; - p->srv_bck++; - - if (!(p->options & PR_O_USE_ALL_BK)) { - if (!p->lbprm.fbck) { - /* there was no backup server anymore */ - p->lbprm.fbck = srv; - } else { - /* we may have restored a backup server prior to fbck, - * in which case it should replace it. - */ - struct server *srv2 = srv; - do { - srv2 = srv2->next; - } while (srv2 && (srv2 != p->lbprm.fbck)); - if (srv2) - p->lbprm.fbck = srv; - } - } - } else { - srv->lb_tree = &p->lbprm.fwlc.act; - p->lbprm.tot_wact += srv->eweight; - p->srv_act++; - } - - /* note that eweight cannot be 0 here */ - fwlc_queue_srv(srv); - - out_update_backend: - /* check/update tot_used, tot_weight */ - update_backend_weight(p); - out_update_state: - srv->prev_state = srv->state; - srv->prev_eweight = srv->eweight; -} - -/* This function must be called after an update to server 's effective - * weight. It may be called after a state change too. - */ -static void fwlc_update_server_weight(struct server *srv) -{ - int old_state, new_state; - struct proxy *p = srv->proxy; - - if (srv->state == srv->prev_state && - srv->eweight == srv->prev_eweight) - return; - - /* If changing the server's weight changes its state, we simply apply - * the procedures we already have for status change. If the state - * remains down, the server is not in any tree, so it's as easy as - * updating its values. If the state remains up with different weights, - * there are some computations to perform to find a new place and - * possibly a new tree for this server. - */ - - old_state = srv_is_usable(srv->prev_state, srv->prev_eweight); - new_state = srv_is_usable(srv->state, srv->eweight); - - if (!old_state && !new_state) { - srv->prev_state = srv->state; - srv->prev_eweight = srv->eweight; - return; - } - else if (!old_state && new_state) { - fwlc_set_server_status_up(srv); - return; - } - else if (old_state && !new_state) { - fwlc_set_server_status_down(srv); - return; - } - - if (srv->lb_tree) - fwlc_dequeue_srv(srv); - - if (srv->state & SRV_BACKUP) { - p->lbprm.tot_wbck += srv->eweight - srv->prev_eweight; - srv->lb_tree = &p->lbprm.fwlc.bck; - } else { - p->lbprm.tot_wact += srv->eweight - srv->prev_eweight; - srv->lb_tree = &p->lbprm.fwlc.act; - } - - fwlc_queue_srv(srv); - - update_backend_weight(p); - srv->prev_state = srv->state; - srv->prev_eweight = srv->eweight; -} - -/* This function is responsible for building the trees in case of fast - * weighted least-conns. It also sets p->lbprm.wdiv to the eweight to - * uweight ratio. Both active and backup groups are initialized. - */ -void fwlc_init_server_tree(struct proxy *p) -{ - struct server *srv; - struct eb_root init_head = EB_ROOT; - - p->lbprm.set_server_status_up = fwlc_set_server_status_up; - p->lbprm.set_server_status_down = fwlc_set_server_status_down; - p->lbprm.update_server_eweight = fwlc_update_server_weight; - p->lbprm.server_take_conn = fwlc_srv_reposition; - p->lbprm.server_drop_conn = fwlc_srv_reposition; - - p->lbprm.wdiv = BE_WEIGHT_SCALE; - for (srv = p->srv; srv; srv = srv->next) { - srv->prev_eweight = srv->eweight = srv->uweight * BE_WEIGHT_SCALE; - srv->prev_state = srv->state; - } - - recount_servers(p); - update_backend_weight(p); - - p->lbprm.fwlc.act = init_head; - p->lbprm.fwlc.bck = init_head; - - /* queue active and backup servers in two distinct groups */ - for (srv = p->srv; srv; srv = srv->next) { - if (!srv_is_usable(srv->state, srv->eweight)) - continue; - srv->lb_tree = (srv->state & SRV_BACKUP) ? &p->lbprm.fwlc.bck : &p->lbprm.fwlc.act; - fwlc_queue_srv(srv); - } -} - -/* Return next server from the FWLC tree in backend

. If the tree is empty, - * return NULL. Saturated servers are skipped. - */ -static struct server *fwlc_get_next_server(struct proxy *p, struct server *srvtoavoid) -{ - struct server *srv, *avoided; - struct eb32_node *node; - - srv = avoided = NULL; - - if (p->srv_act) - node = eb32_first(&p->lbprm.fwlc.act); - else if (p->lbprm.fbck) - return p->lbprm.fbck; - else if (p->srv_bck) - node = eb32_first(&p->lbprm.fwlc.bck); - else - return NULL; - - while (node) { - /* OK, we have a server. However, it may be saturated, in which - * case we don't want to reconsider it for now, so we'll simply - * skip it. Same if it's the server we try to avoid, in which - * case we simply remember it for later use if needed. - */ - struct server *s; - - s = eb32_entry(node, struct server, lb_node); - if (!s->maxconn || (!s->nbpend && s->served < srv_dynamic_maxconn(s))) { - if (s != srvtoavoid) { - srv = s; - break; - } - avoided = s; - } - node = eb32_next(node); - } - - if (!srv) - srv = avoided; - - return srv; -} - /* * This function tries to find a running server for the proxy following * the URL parameter hash method. It looks for a specific parameter in the @@ -1495,7 +482,7 @@ int assign_server(struct session *s) if (!s->srv) { /* parameter not found, fall back to round robin on the map */ - s->srv = get_server_rr_with_conns(s->be, s->prev_srv); + s->srv = map_get_server_rr(s->be, s->prev_srv); if (!s->srv) { err = SRV_STATUS_FULL; goto out; @@ -1508,7 +495,7 @@ int assign_server(struct session *s) if (!s->srv) { /* parameter not found, fall back to round robin on the map */ - s->srv = get_server_rr_with_conns(s->be, s->prev_srv); + s->srv = map_get_server_rr(s->be, s->prev_srv); if (!s->srv) { err = SRV_STATUS_FULL; goto out; @@ -1521,7 +508,7 @@ int assign_server(struct session *s) if (!s->srv) { /* parameter not found, fall back to round robin on the map */ - s->srv = get_server_rr_with_conns(s->be, s->prev_srv); + s->srv = map_get_server_rr(s->be, s->prev_srv); if (!s->srv) { err = SRV_STATUS_FULL; goto out; @@ -2165,53 +1152,6 @@ static void __backend_init(void) acl_register_keywords(&acl_kws); } -/* - * This function tries to find a running server with free connection slots for - * the proxy following the round-robin method. - * If any server is found, it will be returned and px->lbprm.map.rr_idx will be updated - * to point to the next server. If no valid server is found, NULL is returned. - */ -struct server *get_server_rr_with_conns(struct proxy *px, struct server *srvtoavoid) -{ - int newidx, avoididx; - struct server *srv, *avoided; - - if (px->lbprm.tot_weight == 0) - return NULL; - - if (px->lbprm.map.state & PR_MAP_RECALC) - recalc_server_map(px); - - if (px->lbprm.map.rr_idx < 0 || px->lbprm.map.rr_idx >= px->lbprm.tot_weight) - px->lbprm.map.rr_idx = 0; - newidx = px->lbprm.map.rr_idx; - - avoided = NULL; - avoididx = 0; /* shut a gcc warning */ - do { - srv = px->lbprm.map.srv[newidx++]; - if (!srv->maxconn || srv->cur_sess < srv_dynamic_maxconn(srv)) { - /* make sure it is not the server we are try to exclude... */ - if (srv != srvtoavoid) { - px->lbprm.map.rr_idx = newidx; - return srv; - } - - avoided = srv; /* ...but remember that is was selected yet avoided */ - avoididx = newidx; - } - if (newidx == px->lbprm.tot_weight) - newidx = 0; - } while (newidx != px->lbprm.map.rr_idx); - - if (avoided) - px->lbprm.map.rr_idx = avoididx; - - /* return NULL or srvtoavoid if found */ - return avoided; -} - - /* * This function tries to find a running server for the proxy following * the source hash method. Depending on the number of active/backup servers, diff --git a/src/cfgparse.c b/src/cfgparse.c index fd273b085..793865478 100644 --- a/src/cfgparse.c +++ b/src/cfgparse.c @@ -42,6 +42,9 @@ #include #include #include +#include +#include +#include #include #include #include diff --git a/src/lb_fwlc.c b/src/lb_fwlc.c new file mode 100644 index 000000000..5d1a31be2 --- /dev/null +++ b/src/lb_fwlc.c @@ -0,0 +1,316 @@ +/* + * Fast Weighted Least Connection load balancing algorithm. + * + * Copyright 2000-2009 Willy Tarreau + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include + +#include +#include + +#include +#include + + +/* Remove a server from a tree. It must have previously been dequeued. This + * function is meant to be called when a server is going down or has its + * weight disabled. + */ +static inline void fwlc_remove_from_tree(struct server *s) +{ + s->lb_tree = NULL; +} + +/* simply removes a server from a tree */ +static inline void fwlc_dequeue_srv(struct server *s) +{ + eb32_delete(&s->lb_node); +} + +/* Queue a server in its associated tree, assuming the weight is >0. + * Servers are sorted by #conns/weight. To ensure maximum accuracy, + * we use #conns*SRV_EWGHT_MAX/eweight as the sorting key. + */ +static inline void fwlc_queue_srv(struct server *s) +{ + s->lb_node.key = s->served * SRV_EWGHT_MAX / s->eweight; + eb32_insert(s->lb_tree, &s->lb_node); +} + +/* Re-position the server in the FWLC tree after it has been assigned one + * connection or after it has released one. Note that it is possible that + * the server has been moved out of the tree due to failed health-checks. + */ +static void fwlc_srv_reposition(struct server *s) +{ + if (!s->lb_tree) + return; + fwlc_dequeue_srv(s); + fwlc_queue_srv(s); +} + +/* This function updates the server trees according to server 's new + * state. It should be called when server 's status changes to down. + * It is not important whether the server was already down or not. It is not + * important either that the new state is completely down (the caller may not + * know all the variables of a server's state). + */ +static void fwlc_set_server_status_down(struct server *srv) +{ + struct proxy *p = srv->proxy; + + if (srv->state == srv->prev_state && + srv->eweight == srv->prev_eweight) + return; + + if (srv_is_usable(srv->state, srv->eweight)) + goto out_update_state; + + if (!srv_is_usable(srv->prev_state, srv->prev_eweight)) + /* server was already down */ + goto out_update_backend; + + if (srv->state & SRV_BACKUP) { + p->lbprm.tot_wbck -= srv->prev_eweight; + p->srv_bck--; + + if (srv == p->lbprm.fbck) { + /* we lost the first backup server in a single-backup + * configuration, we must search another one. + */ + struct server *srv2 = p->lbprm.fbck; + do { + srv2 = srv2->next; + } while (srv2 && + !((srv2->state & SRV_BACKUP) && + srv_is_usable(srv2->state, srv2->eweight))); + p->lbprm.fbck = srv2; + } + } else { + p->lbprm.tot_wact -= srv->prev_eweight; + p->srv_act--; + } + + fwlc_dequeue_srv(srv); + fwlc_remove_from_tree(srv); + +out_update_backend: + /* check/update tot_used, tot_weight */ + update_backend_weight(p); + out_update_state: + srv->prev_state = srv->state; + srv->prev_eweight = srv->eweight; +} + +/* This function updates the server trees according to server 's new + * state. It should be called when server 's status changes to up. + * It is not important whether the server was already down or not. It is not + * important either that the new state is completely UP (the caller may not + * know all the variables of a server's state). This function will not change + * the weight of a server which was already up. + */ +static void fwlc_set_server_status_up(struct server *srv) +{ + struct proxy *p = srv->proxy; + + if (srv->state == srv->prev_state && + srv->eweight == srv->prev_eweight) + return; + + if (!srv_is_usable(srv->state, srv->eweight)) + goto out_update_state; + + if (srv_is_usable(srv->prev_state, srv->prev_eweight)) + /* server was already up */ + goto out_update_backend; + + if (srv->state & SRV_BACKUP) { + srv->lb_tree = &p->lbprm.fwlc.bck; + p->lbprm.tot_wbck += srv->eweight; + p->srv_bck++; + + if (!(p->options & PR_O_USE_ALL_BK)) { + if (!p->lbprm.fbck) { + /* there was no backup server anymore */ + p->lbprm.fbck = srv; + } else { + /* we may have restored a backup server prior to fbck, + * in which case it should replace it. + */ + struct server *srv2 = srv; + do { + srv2 = srv2->next; + } while (srv2 && (srv2 != p->lbprm.fbck)); + if (srv2) + p->lbprm.fbck = srv; + } + } + } else { + srv->lb_tree = &p->lbprm.fwlc.act; + p->lbprm.tot_wact += srv->eweight; + p->srv_act++; + } + + /* note that eweight cannot be 0 here */ + fwlc_queue_srv(srv); + + out_update_backend: + /* check/update tot_used, tot_weight */ + update_backend_weight(p); + out_update_state: + srv->prev_state = srv->state; + srv->prev_eweight = srv->eweight; +} + +/* This function must be called after an update to server 's effective + * weight. It may be called after a state change too. + */ +static void fwlc_update_server_weight(struct server *srv) +{ + int old_state, new_state; + struct proxy *p = srv->proxy; + + if (srv->state == srv->prev_state && + srv->eweight == srv->prev_eweight) + return; + + /* If changing the server's weight changes its state, we simply apply + * the procedures we already have for status change. If the state + * remains down, the server is not in any tree, so it's as easy as + * updating its values. If the state remains up with different weights, + * there are some computations to perform to find a new place and + * possibly a new tree for this server. + */ + + old_state = srv_is_usable(srv->prev_state, srv->prev_eweight); + new_state = srv_is_usable(srv->state, srv->eweight); + + if (!old_state && !new_state) { + srv->prev_state = srv->state; + srv->prev_eweight = srv->eweight; + return; + } + else if (!old_state && new_state) { + fwlc_set_server_status_up(srv); + return; + } + else if (old_state && !new_state) { + fwlc_set_server_status_down(srv); + return; + } + + if (srv->lb_tree) + fwlc_dequeue_srv(srv); + + if (srv->state & SRV_BACKUP) { + p->lbprm.tot_wbck += srv->eweight - srv->prev_eweight; + srv->lb_tree = &p->lbprm.fwlc.bck; + } else { + p->lbprm.tot_wact += srv->eweight - srv->prev_eweight; + srv->lb_tree = &p->lbprm.fwlc.act; + } + + fwlc_queue_srv(srv); + + update_backend_weight(p); + srv->prev_state = srv->state; + srv->prev_eweight = srv->eweight; +} + +/* This function is responsible for building the trees in case of fast + * weighted least-conns. It also sets p->lbprm.wdiv to the eweight to + * uweight ratio. Both active and backup groups are initialized. + */ +void fwlc_init_server_tree(struct proxy *p) +{ + struct server *srv; + struct eb_root init_head = EB_ROOT; + + p->lbprm.set_server_status_up = fwlc_set_server_status_up; + p->lbprm.set_server_status_down = fwlc_set_server_status_down; + p->lbprm.update_server_eweight = fwlc_update_server_weight; + p->lbprm.server_take_conn = fwlc_srv_reposition; + p->lbprm.server_drop_conn = fwlc_srv_reposition; + + p->lbprm.wdiv = BE_WEIGHT_SCALE; + for (srv = p->srv; srv; srv = srv->next) { + srv->prev_eweight = srv->eweight = srv->uweight * BE_WEIGHT_SCALE; + srv->prev_state = srv->state; + } + + recount_servers(p); + update_backend_weight(p); + + p->lbprm.fwlc.act = init_head; + p->lbprm.fwlc.bck = init_head; + + /* queue active and backup servers in two distinct groups */ + for (srv = p->srv; srv; srv = srv->next) { + if (!srv_is_usable(srv->state, srv->eweight)) + continue; + srv->lb_tree = (srv->state & SRV_BACKUP) ? &p->lbprm.fwlc.bck : &p->lbprm.fwlc.act; + fwlc_queue_srv(srv); + } +} + +/* Return next server from the FWLC tree in backend

. If the tree is empty, + * return NULL. Saturated servers are skipped. + */ +struct server *fwlc_get_next_server(struct proxy *p, struct server *srvtoavoid) +{ + struct server *srv, *avoided; + struct eb32_node *node; + + srv = avoided = NULL; + + if (p->srv_act) + node = eb32_first(&p->lbprm.fwlc.act); + else if (p->lbprm.fbck) + return p->lbprm.fbck; + else if (p->srv_bck) + node = eb32_first(&p->lbprm.fwlc.bck); + else + return NULL; + + while (node) { + /* OK, we have a server. However, it may be saturated, in which + * case we don't want to reconsider it for now, so we'll simply + * skip it. Same if it's the server we try to avoid, in which + * case we simply remember it for later use if needed. + */ + struct server *s; + + s = eb32_entry(node, struct server, lb_node); + if (!s->maxconn || (!s->nbpend && s->served < srv_dynamic_maxconn(s))) { + if (s != srvtoavoid) { + srv = s; + break; + } + avoided = s; + } + node = eb32_next(node); + } + + if (!srv) + srv = avoided; + + return srv; +} + + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/src/lb_fwrr.c b/src/lb_fwrr.c new file mode 100644 index 000000000..84d179da3 --- /dev/null +++ b/src/lb_fwrr.c @@ -0,0 +1,576 @@ +/* + * Fast Weighted Round Robin load balancing algorithm. + * + * Copyright 2000-2009 Willy Tarreau + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include + +#include +#include + +#include +#include + +static inline void fwrr_remove_from_tree(struct server *s); +static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s); +static inline void fwrr_dequeue_srv(struct server *s); +static void fwrr_get_srv(struct server *s); +static void fwrr_queue_srv(struct server *s); + + +/* This function updates the server trees according to server 's new + * state. It should be called when server 's status changes to down. + * It is not important whether the server was already down or not. It is not + * important either that the new state is completely down (the caller may not + * know all the variables of a server's state). + */ +static void fwrr_set_server_status_down(struct server *srv) +{ + struct proxy *p = srv->proxy; + struct fwrr_group *grp; + + if (srv->state == srv->prev_state && + srv->eweight == srv->prev_eweight) + return; + + if (srv_is_usable(srv->state, srv->eweight)) + goto out_update_state; + + if (!srv_is_usable(srv->prev_state, srv->prev_eweight)) + /* server was already down */ + goto out_update_backend; + + grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act; + grp->next_weight -= srv->prev_eweight; + + if (srv->state & SRV_BACKUP) { + p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight; + p->srv_bck--; + + if (srv == p->lbprm.fbck) { + /* we lost the first backup server in a single-backup + * configuration, we must search another one. + */ + struct server *srv2 = p->lbprm.fbck; + do { + srv2 = srv2->next; + } while (srv2 && + !((srv2->state & SRV_BACKUP) && + srv_is_usable(srv2->state, srv2->eweight))); + p->lbprm.fbck = srv2; + } + } else { + p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight; + p->srv_act--; + } + + fwrr_dequeue_srv(srv); + fwrr_remove_from_tree(srv); + +out_update_backend: + /* check/update tot_used, tot_weight */ + update_backend_weight(p); + out_update_state: + srv->prev_state = srv->state; + srv->prev_eweight = srv->eweight; +} + +/* This function updates the server trees according to server 's new + * state. It should be called when server 's status changes to up. + * It is not important whether the server was already down or not. It is not + * important either that the new state is completely UP (the caller may not + * know all the variables of a server's state). This function will not change + * the weight of a server which was already up. + */ +static void fwrr_set_server_status_up(struct server *srv) +{ + struct proxy *p = srv->proxy; + struct fwrr_group *grp; + + if (srv->state == srv->prev_state && + srv->eweight == srv->prev_eweight) + return; + + if (!srv_is_usable(srv->state, srv->eweight)) + goto out_update_state; + + if (srv_is_usable(srv->prev_state, srv->prev_eweight)) + /* server was already up */ + goto out_update_backend; + + grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act; + grp->next_weight += srv->eweight; + + if (srv->state & SRV_BACKUP) { + p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight; + p->srv_bck++; + + if (!(p->options & PR_O_USE_ALL_BK)) { + if (!p->lbprm.fbck) { + /* there was no backup server anymore */ + p->lbprm.fbck = srv; + } else { + /* we may have restored a backup server prior to fbck, + * in which case it should replace it. + */ + struct server *srv2 = srv; + do { + srv2 = srv2->next; + } while (srv2 && (srv2 != p->lbprm.fbck)); + if (srv2) + p->lbprm.fbck = srv; + } + } + } else { + p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight; + p->srv_act++; + } + + /* note that eweight cannot be 0 here */ + fwrr_get_srv(srv); + srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight; + fwrr_queue_srv(srv); + +out_update_backend: + /* check/update tot_used, tot_weight */ + update_backend_weight(p); + out_update_state: + srv->prev_state = srv->state; + srv->prev_eweight = srv->eweight; +} + +/* This function must be called after an update to server 's effective + * weight. It may be called after a state change too. + */ +static void fwrr_update_server_weight(struct server *srv) +{ + int old_state, new_state; + struct proxy *p = srv->proxy; + struct fwrr_group *grp; + + if (srv->state == srv->prev_state && + srv->eweight == srv->prev_eweight) + return; + + /* If changing the server's weight changes its state, we simply apply + * the procedures we already have for status change. If the state + * remains down, the server is not in any tree, so it's as easy as + * updating its values. If the state remains up with different weights, + * there are some computations to perform to find a new place and + * possibly a new tree for this server. + */ + + old_state = srv_is_usable(srv->prev_state, srv->prev_eweight); + new_state = srv_is_usable(srv->state, srv->eweight); + + if (!old_state && !new_state) { + srv->prev_state = srv->state; + srv->prev_eweight = srv->eweight; + return; + } + else if (!old_state && new_state) { + fwrr_set_server_status_up(srv); + return; + } + else if (old_state && !new_state) { + fwrr_set_server_status_down(srv); + return; + } + + grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act; + grp->next_weight = grp->next_weight - srv->prev_eweight + srv->eweight; + + p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight; + p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight; + + if (srv->lb_tree == grp->init) { + fwrr_dequeue_srv(srv); + fwrr_queue_by_weight(grp->init, srv); + } + else if (!srv->lb_tree) { + /* FIXME: server was down. This is not possible right now but + * may be needed soon for slowstart or graceful shutdown. + */ + fwrr_dequeue_srv(srv); + fwrr_get_srv(srv); + srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight; + fwrr_queue_srv(srv); + } else { + /* The server is either active or in the next queue. If it's + * still in the active queue and it has not consumed all of its + * places, let's adjust its next position. + */ + fwrr_get_srv(srv); + + if (srv->eweight > 0) { + int prev_next = srv->npos; + int step = grp->next_weight / srv->eweight; + + srv->npos = srv->lpos + step; + srv->rweight = 0; + + if (srv->npos > prev_next) + srv->npos = prev_next; + if (srv->npos < grp->curr_pos + 2) + srv->npos = grp->curr_pos + step; + } else { + /* push it into the next tree */ + srv->npos = grp->curr_pos + grp->curr_weight; + } + + fwrr_dequeue_srv(srv); + fwrr_queue_srv(srv); + } + + update_backend_weight(p); + srv->prev_state = srv->state; + srv->prev_eweight = srv->eweight; +} + +/* Remove a server from a tree. It must have previously been dequeued. This + * function is meant to be called when a server is going down or has its + * weight disabled. + */ +static inline void fwrr_remove_from_tree(struct server *s) +{ + s->lb_tree = NULL; +} + +/* Queue a server in the weight tree , assuming the weight is >0. + * We want to sort them by inverted weights, because we need to place + * heavy servers first in order to get a smooth distribution. + */ +static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s) +{ + s->lb_node.key = SRV_EWGHT_MAX - s->eweight; + eb32_insert(root, &s->lb_node); + s->lb_tree = root; +} + +/* This function is responsible for building the weight trees in case of fast + * weighted round-robin. It also sets p->lbprm.wdiv to the eweight to uweight + * ratio. Both active and backup groups are initialized. + */ +void fwrr_init_server_groups(struct proxy *p) +{ + struct server *srv; + struct eb_root init_head = EB_ROOT; + + p->lbprm.set_server_status_up = fwrr_set_server_status_up; + p->lbprm.set_server_status_down = fwrr_set_server_status_down; + p->lbprm.update_server_eweight = fwrr_update_server_weight; + + p->lbprm.wdiv = BE_WEIGHT_SCALE; + for (srv = p->srv; srv; srv = srv->next) { + srv->prev_eweight = srv->eweight = srv->uweight * BE_WEIGHT_SCALE; + srv->prev_state = srv->state; + } + + recount_servers(p); + update_backend_weight(p); + + /* prepare the active servers group */ + p->lbprm.fwrr.act.curr_pos = p->lbprm.fwrr.act.curr_weight = + p->lbprm.fwrr.act.next_weight = p->lbprm.tot_wact; + p->lbprm.fwrr.act.curr = p->lbprm.fwrr.act.t0 = + p->lbprm.fwrr.act.t1 = init_head; + p->lbprm.fwrr.act.init = &p->lbprm.fwrr.act.t0; + p->lbprm.fwrr.act.next = &p->lbprm.fwrr.act.t1; + + /* prepare the backup servers group */ + p->lbprm.fwrr.bck.curr_pos = p->lbprm.fwrr.bck.curr_weight = + p->lbprm.fwrr.bck.next_weight = p->lbprm.tot_wbck; + p->lbprm.fwrr.bck.curr = p->lbprm.fwrr.bck.t0 = + p->lbprm.fwrr.bck.t1 = init_head; + p->lbprm.fwrr.bck.init = &p->lbprm.fwrr.bck.t0; + p->lbprm.fwrr.bck.next = &p->lbprm.fwrr.bck.t1; + + /* queue active and backup servers in two distinct groups */ + for (srv = p->srv; srv; srv = srv->next) { + if (!srv_is_usable(srv->state, srv->eweight)) + continue; + fwrr_queue_by_weight((srv->state & SRV_BACKUP) ? + p->lbprm.fwrr.bck.init : + p->lbprm.fwrr.act.init, + srv); + } +} + +/* simply removes a server from a weight tree */ +static inline void fwrr_dequeue_srv(struct server *s) +{ + eb32_delete(&s->lb_node); +} + +/* queues a server into the appropriate group and tree depending on its + * backup status, and ->npos. If the server is disabled, simply assign + * it to the NULL tree. + */ +static void fwrr_queue_srv(struct server *s) +{ + struct proxy *p = s->proxy; + struct fwrr_group *grp; + + grp = (s->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act; + + /* Delay everything which does not fit into the window and everything + * which does not fit into the theorical new window. + */ + if (!srv_is_usable(s->state, s->eweight)) { + fwrr_remove_from_tree(s); + } + else if (s->eweight <= 0 || + s->npos >= 2 * grp->curr_weight || + s->npos >= grp->curr_weight + grp->next_weight) { + /* put into next tree, and readjust npos in case we could + * finally take this back to current. */ + s->npos -= grp->curr_weight; + fwrr_queue_by_weight(grp->next, s); + } + else { + /* The sorting key is stored in units of s->npos * user_weight + * in order to avoid overflows. As stated in backend.h, the + * lower the scale, the rougher the weights modulation, and the + * higher the scale, the lower the number of servers without + * overflow. With this formula, the result is always positive, + * so we can use eb3é_insert(). + */ + s->lb_node.key = SRV_UWGHT_RANGE * s->npos + + (unsigned)(SRV_EWGHT_MAX + s->rweight - s->eweight) / BE_WEIGHT_SCALE; + + eb32_insert(&grp->curr, &s->lb_node); + s->lb_tree = &grp->curr; + } +} + +/* prepares a server when extracting it from the "init" tree */ +static inline void fwrr_get_srv_init(struct server *s) +{ + s->npos = s->rweight = 0; +} + +/* prepares a server when extracting it from the "next" tree */ +static inline void fwrr_get_srv_next(struct server *s) +{ + struct fwrr_group *grp = (s->state & SRV_BACKUP) ? + &s->proxy->lbprm.fwrr.bck : + &s->proxy->lbprm.fwrr.act; + + s->npos += grp->curr_weight; +} + +/* prepares a server when it was marked down */ +static inline void fwrr_get_srv_down(struct server *s) +{ + struct fwrr_group *grp = (s->state & SRV_BACKUP) ? + &s->proxy->lbprm.fwrr.bck : + &s->proxy->lbprm.fwrr.act; + + s->npos = grp->curr_pos; +} + +/* prepares a server when extracting it from its tree */ +static void fwrr_get_srv(struct server *s) +{ + struct proxy *p = s->proxy; + struct fwrr_group *grp = (s->state & SRV_BACKUP) ? + &p->lbprm.fwrr.bck : + &p->lbprm.fwrr.act; + + if (s->lb_tree == grp->init) { + fwrr_get_srv_init(s); + } + else if (s->lb_tree == grp->next) { + fwrr_get_srv_next(s); + } + else if (s->lb_tree == NULL) { + fwrr_get_srv_down(s); + } +} + +/* switches trees "init" and "next" for FWRR group . "init" should be empty + * when this happens, and "next" filled with servers sorted by weights. + */ +static inline void fwrr_switch_trees(struct fwrr_group *grp) +{ + struct eb_root *swap; + swap = grp->init; + grp->init = grp->next; + grp->next = swap; + grp->curr_weight = grp->next_weight; + grp->curr_pos = grp->curr_weight; +} + +/* return next server from the current tree in FWRR group , or a server + * from the "init" tree if appropriate. If both trees are empty, return NULL. + */ +static struct server *fwrr_get_server_from_group(struct fwrr_group *grp) +{ + struct eb32_node *node; + struct server *s; + + node = eb32_first(&grp->curr); + s = eb32_entry(node, struct server, lb_node); + + if (!node || s->npos > grp->curr_pos) { + /* either we have no server left, or we have a hole */ + struct eb32_node *node2; + node2 = eb32_first(grp->init); + if (node2) { + node = node2; + s = eb32_entry(node, struct server, lb_node); + fwrr_get_srv_init(s); + if (s->eweight == 0) /* FIXME: is it possible at all ? */ + node = NULL; + } + } + if (node) + return s; + else + return NULL; +} + +/* Computes next position of server in the group. It is mandatory for + * to have a non-zero, positive eweight. +*/ +static inline void fwrr_update_position(struct fwrr_group *grp, struct server *s) +{ + if (!s->npos) { + /* first time ever for this server */ + s->lpos = grp->curr_pos; + s->npos = grp->curr_pos + grp->next_weight / s->eweight; + s->rweight += grp->next_weight % s->eweight; + + if (s->rweight >= s->eweight) { + s->rweight -= s->eweight; + s->npos++; + } + } else { + s->lpos = s->npos; + s->npos += grp->next_weight / s->eweight; + s->rweight += grp->next_weight % s->eweight; + + if (s->rweight >= s->eweight) { + s->rweight -= s->eweight; + s->npos++; + } + } +} + +/* Return next server from the current tree in backend

, or a server from + * the init tree if appropriate. If both trees are empty, return NULL. + * Saturated servers are skipped and requeued. + */ +struct server *fwrr_get_next_server(struct proxy *p, struct server *srvtoavoid) +{ + struct server *srv, *full, *avoided; + struct fwrr_group *grp; + int switched; + + if (p->srv_act) + grp = &p->lbprm.fwrr.act; + else if (p->lbprm.fbck) + return p->lbprm.fbck; + else if (p->srv_bck) + grp = &p->lbprm.fwrr.bck; + else + return NULL; + + switched = 0; + avoided = NULL; + full = NULL; /* NULL-terminated list of saturated servers */ + while (1) { + /* if we see an empty group, let's first try to collect weights + * which might have recently changed. + */ + if (!grp->curr_weight) + grp->curr_pos = grp->curr_weight = grp->next_weight; + + /* get first server from the "current" tree. When the end of + * the tree is reached, we may have to switch, but only once. + */ + while (1) { + srv = fwrr_get_server_from_group(grp); + if (srv) + break; + if (switched) { + if (avoided) { + srv = avoided; + break; + } + goto requeue_servers; + } + switched = 1; + fwrr_switch_trees(grp); + + } + + /* OK, we have a server. However, it may be saturated, in which + * case we don't want to reconsider it for now. We'll update + * its position and dequeue it anyway, so that we can move it + * to a better place afterwards. + */ + fwrr_update_position(grp, srv); + fwrr_dequeue_srv(srv); + grp->curr_pos++; + if (!srv->maxconn || (!srv->nbpend && srv->served < srv_dynamic_maxconn(srv))) { + /* make sure it is not the server we are trying to exclude... */ + if (srv != srvtoavoid || avoided) + break; + + avoided = srv; /* ...but remember that is was selected yet avoided */ + } + + /* the server is saturated or avoided, let's chain it for later reinsertion */ + srv->next_full = full; + full = srv; + } + + /* OK, we got the best server, let's update it */ + fwrr_queue_srv(srv); + + requeue_servers: + /* Requeue all extracted servers. If full==srv then it was + * avoided (unsucessfully) and chained, omit it now. + */ + if (unlikely(full != NULL)) { + if (switched) { + /* the tree has switched, requeue all extracted servers + * into "init", because their place was lost, and only + * their weight matters. + */ + do { + if (likely(full != srv)) + fwrr_queue_by_weight(grp->init, full); + full = full->next_full; + } while (full); + } else { + /* requeue all extracted servers just as if they were consumed + * so that they regain their expected place. + */ + do { + if (likely(full != srv)) + fwrr_queue_srv(full); + full = full->next_full; + } while (full); + } + } + return srv; +} + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/src/lb_map.c b/src/lb_map.c new file mode 100644 index 000000000..6850a0a01 --- /dev/null +++ b/src/lb_map.c @@ -0,0 +1,262 @@ +/* + * Map-based load-balancing (RR and HASH) + * + * Copyright 2000-2009 Willy Tarreau + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +/* this function updates the map according to server 's new state */ +static void map_set_server_status_down(struct server *srv) +{ + struct proxy *p = srv->proxy; + + if (srv->state == srv->prev_state && + srv->eweight == srv->prev_eweight) + return; + + if (srv_is_usable(srv->state, srv->eweight)) + goto out_update_state; + + /* FIXME: could be optimized since we know what changed */ + recount_servers(p); + update_backend_weight(p); + p->lbprm.map.state |= PR_MAP_RECALC; + out_update_state: + srv->prev_state = srv->state; + srv->prev_eweight = srv->eweight; +} + +/* This function updates the map according to server 's new state */ +static void map_set_server_status_up(struct server *srv) +{ + struct proxy *p = srv->proxy; + + if (srv->state == srv->prev_state && + srv->eweight == srv->prev_eweight) + return; + + if (!srv_is_usable(srv->state, srv->eweight)) + goto out_update_state; + + /* FIXME: could be optimized since we know what changed */ + recount_servers(p); + update_backend_weight(p); + p->lbprm.map.state |= PR_MAP_RECALC; + out_update_state: + srv->prev_state = srv->state; + srv->prev_eweight = srv->eweight; +} + +/* This function recomputes the server map for proxy px. It relies on + * px->lbprm.tot_wact, tot_wbck, tot_used, tot_weight, so it must be + * called after recount_servers(). It also expects px->lbprm.map.srv + * to be allocated with the largest size needed. It updates tot_weight. + */ +void recalc_server_map(struct proxy *px) +{ + int o, tot, flag; + struct server *cur, *best; + + switch (px->lbprm.tot_used) { + case 0: /* no server */ + px->lbprm.map.state &= ~PR_MAP_RECALC; + return; + case 1: /* only one server, just fill first entry */ + tot = 1; + break; + default: + tot = px->lbprm.tot_weight; + break; + } + + /* here we *know* that we have some servers */ + if (px->srv_act) + flag = SRV_RUNNING; + else + flag = SRV_RUNNING | SRV_BACKUP; + + /* this algorithm gives priority to the first server, which means that + * it will respect the declaration order for equivalent weights, and + * that whatever the weights, the first server called will always be + * the first declared. This is an important asumption for the backup + * case, where we want the first server only. + */ + for (cur = px->srv; cur; cur = cur->next) + cur->wscore = 0; + + for (o = 0; o < tot; o++) { + int max = 0; + best = NULL; + for (cur = px->srv; cur; cur = cur->next) { + if (cur->eweight && + flag == (cur->state & + (SRV_RUNNING | SRV_GOINGDOWN | SRV_BACKUP))) { + int v; + + /* If we are forced to return only one server, we don't want to + * go further, because we would return the wrong one due to + * divide overflow. + */ + if (tot == 1) { + best = cur; + /* note that best->wscore will be wrong but we don't care */ + break; + } + + cur->wscore += cur->eweight; + v = (cur->wscore + tot) / tot; /* result between 0 and 3 */ + if (best == NULL || v > max) { + max = v; + best = cur; + } + } + } + px->lbprm.map.srv[o] = best; + best->wscore -= tot; + } + px->lbprm.map.state &= ~PR_MAP_RECALC; +} + +/* This function is responsible of building the server MAP for map-based LB + * algorithms, allocating the map, and setting p->lbprm.wmult to the GCD of the + * weights if applicable. It should be called only once per proxy, at config + * time. + */ +void init_server_map(struct proxy *p) +{ + struct server *srv; + int pgcd; + int act, bck; + + p->lbprm.set_server_status_up = map_set_server_status_up; + p->lbprm.set_server_status_down = map_set_server_status_down; + p->lbprm.update_server_eweight = NULL; + + if (!p->srv) + return; + + /* We will factor the weights to reduce the table, + * using Euclide's largest common divisor algorithm. + * Since we may have zero weights, we have to first + * find a non-zero weight server. + */ + pgcd = 1; + srv = p->srv; + while (srv && !srv->uweight) + srv = srv->next; + + if (srv) { + pgcd = srv->uweight; /* note: cannot be zero */ + while (pgcd > 1 && (srv = srv->next)) { + int w = srv->uweight; + while (w) { + int t = pgcd % w; + pgcd = w; + w = t; + } + } + } + + /* It is sometimes useful to know what factor to apply + * to the backend's effective weight to know its real + * weight. + */ + p->lbprm.wmult = pgcd; + + act = bck = 0; + for (srv = p->srv; srv; srv = srv->next) { + srv->eweight = srv->uweight / pgcd; + srv->prev_eweight = srv->eweight; + srv->prev_state = srv->state; + if (srv->state & SRV_BACKUP) + bck += srv->eweight; + else + act += srv->eweight; + } + + /* this is the largest map we will ever need for this servers list */ + if (act < bck) + act = bck; + + if (!act) + act = 1; + + p->lbprm.map.srv = (struct server **)calloc(act, sizeof(struct server *)); + /* recounts servers and their weights */ + p->lbprm.map.state = PR_MAP_RECALC; + recount_servers(p); + update_backend_weight(p); + recalc_server_map(p); +} + +/* + * This function tries to find a running server with free connection slots for + * the proxy following the round-robin method. + * If any server is found, it will be returned and px->lbprm.map.rr_idx will be updated + * to point to the next server. If no valid server is found, NULL is returned. + */ +struct server *map_get_server_rr(struct proxy *px, struct server *srvtoavoid) +{ + int newidx, avoididx; + struct server *srv, *avoided; + + if (px->lbprm.tot_weight == 0) + return NULL; + + if (px->lbprm.map.state & PR_MAP_RECALC) + recalc_server_map(px); + + if (px->lbprm.map.rr_idx < 0 || px->lbprm.map.rr_idx >= px->lbprm.tot_weight) + px->lbprm.map.rr_idx = 0; + newidx = px->lbprm.map.rr_idx; + + avoided = NULL; + avoididx = 0; /* shut a gcc warning */ + do { + srv = px->lbprm.map.srv[newidx++]; + if (!srv->maxconn || srv->cur_sess < srv_dynamic_maxconn(srv)) { + /* make sure it is not the server we are try to exclude... */ + if (srv != srvtoavoid) { + px->lbprm.map.rr_idx = newidx; + return srv; + } + + avoided = srv; /* ...but remember that is was selected yet avoided */ + avoididx = newidx; + } + if (newidx == px->lbprm.tot_weight) + newidx = 0; + } while (newidx != px->lbprm.map.rr_idx); + + if (avoided) + px->lbprm.map.rr_idx = avoididx; + + /* return NULL or srvtoavoid if found */ + return avoided; +} + + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */