[MAJOR] implement the Fast Weighted Round Robin (FWRR) algo

This round robin algorithm was written from trees, so that we
do not have to recompute any table when changing server weights.
This solution allows on-the-fly weight adjustments with immediate
effect on the load distribution.

There is still a limitation due to 32-bit computations, to about
2000 servers at full scale (weight 255), or more servers with
lower weights. Basically, sum(srv.weight)*4096 must be below 2^31.

Test configurations and an example program used to develop the
tree will be added next.

Many changes have been brought to the weights computations and
variables in order to accomodate for the possiblity of a server to
be running but disabled from load balancing due to a null weight.
This commit is contained in:
Willy Tarreau 2007-11-26 01:15:43 +01:00
parent 5dc2fa660c
commit b625a085d8
7 changed files with 658 additions and 43 deletions

View File

@ -39,10 +39,10 @@ int srv_redispatch_connect(struct session *t);
int backend_parse_balance(const char **args, char *err,
int errlen, struct proxy *curproxy);
void recount_servers(struct proxy *px);
void recalc_server_map(struct proxy *px);
int be_downtime(struct proxy *px);
void init_server_map(struct proxy *p);
void fwrr_init_server_groups(struct proxy *p);
/*
* This function tries to find a running server with free connection slots for
@ -118,7 +118,9 @@ static inline struct server *get_server_sh(struct proxy *px,
recalc_server_map(px);
l = h = 0;
if (px->srv_act > 1 || (px->srv_act == 0 && px->srv_bck > 1)) {
/* note: we won't hash if there's only one server left */
if (px->lbprm.tot_used > 1) {
while ((l + sizeof (int)) <= len) {
h ^= ntohl(*(unsigned int *)(&addr[l]));
l += sizeof (int);

View File

@ -71,6 +71,10 @@
#define PR_O_CONTSTATS 0x80000000 /* continous counters */
/* various constants */
#define BE_WEIGHT_SCALE 256 /* scale between user weight and effective weight */
#endif /* _TYPES_BACKEND_H */
/*

View File

@ -29,6 +29,7 @@
#include <common/appsession.h>
#include <common/config.h>
#include <common/ebtree.h>
#include <common/mini-clist.h>
#include <common/regex.h>
#include <common/sessionhash.h>
@ -64,6 +65,17 @@
#define PR_CAP_RS 0x0004
#define PR_CAP_LISTEN (PR_CAP_FE|PR_CAP_BE|PR_CAP_RS)
/* This structure is used to apply fast weighted round robin on a server group */
struct fwrr_group {
struct eb_root curr; /* tree for servers in "current" time range */
struct eb_root t0, t1; /* "init" and "next" servers */
struct eb_root *init; /* servers waiting to be placed */
struct eb_root *next; /* servers to be placed at next run */
int curr_pos; /* current position in the tree */
int curr_weight; /* total weight of the current time range */
int next_weight; /* total weight of the next time range */
};
struct proxy {
struct listener *listen; /* the listen addresses and sockets */
struct in_addr mon_net, mon_mask; /* don't forward connections from this net (network order) FIXME: should support IPv6 */
@ -79,7 +91,7 @@ struct proxy {
struct list block_cond; /* early blocking conditions (chained) */
struct list switching_rules; /* content switching rules (chained) */
struct server *srv; /* known servers */
int srv_act, srv_bck; /* # of running servers */
int srv_act, srv_bck; /* # of servers eligible for LB (UP|!checked) AND (enabled+weight!=0) */
struct {
int tot_wact, tot_wbck; /* total effective weights of active and backup servers */
@ -87,11 +99,19 @@ struct proxy {
int tot_used; /* total number of servers used for LB */
int wmult; /* ratio between user weight and effective weight */
int wdiv; /* ratio between effective weight and user weight */
struct server *fbck; /* first backup server when !PR_O_USE_ALL_BK, or NULL */
struct {
struct server **srv; /* the server map used to apply weights */
int rr_idx; /* next server to be elected in round robin mode */
int state; /* PR_MAP_RECALC */
} map; /* LB parameters for map-based algorithms */
struct {
struct fwrr_group act; /* weighted round robin on the active servers */
struct fwrr_group bck; /* weighted round robin on the backup servers */
} fwrr;
void (*update_server_eweight)(struct server *);/* if non-NULL, to be called after eweight change */
void (*set_server_status_up)(struct server *);/* to be called after status changes to UP */
void (*set_server_status_down)(struct server *);/* to be called after status changes to DOWN */
} lbprm; /* LB parameters for all algorithms */
char *cookie_name; /* name of the cookie to look for */

View File

@ -2,7 +2,7 @@
include/types/server.h
This file defines everything related to servers.
Copyright (C) 2000-2006 Willy Tarreau - w@1wt.eu
Copyright (C) 2000-2007 Willy Tarreau - w@1wt.eu
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@ -26,6 +26,7 @@
#include <arpa/inet.h>
#include <common/config.h>
#include <common/eb32tree.h>
#include <common/mini-clist.h>
#include <types/buffers.h>
@ -57,6 +58,7 @@
struct server {
struct server *next;
int state; /* server state (SRV_*) */
int prev_state; /* server state before last change (SRV_*) */
int cklen; /* the len of the cookie, to speed up checks */
char *cookie; /* the id set in the cookie */
@ -85,6 +87,12 @@ struct server {
char *id; /* just for identification */
unsigned uweight, eweight; /* user-specified weight, and effective weight */
unsigned wscore; /* weight score, used during srv map computation */
unsigned prev_eweight; /* eweight before last change */
unsigned rweight; /* remainer of weight in the current LB tree */
unsigned npos, lpos; /* next and last positions in the LB tree */
struct eb32_node lb_node; /* node used for tree-based load balancing */
struct eb_root *lb_tree; /* we want to know in what tree the server is */
struct server *next_full; /* next server in the temporary full list */
unsigned failed_checks, down_trans; /* failed checks and up-down transitions */
unsigned down_time; /* total time the server was down */

View File

@ -19,6 +19,7 @@
#include <common/compat.h>
#include <common/config.h>
#include <common/eb32tree.h>
#include <common/time.h>
#include <types/buffers.h>
@ -46,51 +47,109 @@
#include <libtcpsplice.h>
#endif
static inline void fwrr_remove_from_tree(struct server *s);
static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s);
static inline void fwrr_dequeue_srv(struct server *s);
static void fwrr_get_srv(struct server *s);
static void fwrr_queue_srv(struct server *s);
/* This function returns non-zero if a server with the given weight and state
* is usable for LB, otherwise zero.
*/
static inline int srv_is_usable(int state, int weight)
{
if (!weight)
return 0;
if (!(state & SRV_RUNNING))
return 0;
return 1;
}
/*
* This function recounts the number of usable active and backup servers for
* proxy <p>. These numbers are returned into the p->srv_act and p->srv_bck.
* This function also recomputes the total active and backup weights.
* This function also recomputes the total active and backup weights. However,
* it does nout update tot_weight nor tot_used. Use update_backend_weight() for
* this.
*/
void recount_servers(struct proxy *px)
static void recount_servers(struct proxy *px)
{
struct server *srv;
int first_bkw = 0;
px->srv_act = px->srv_bck = 0;
px->lbprm.tot_wact = px->lbprm.tot_wbck = 0;
px->lbprm.fbck = NULL;
for (srv = px->srv; srv != NULL; srv = srv->next) {
if (srv->state & SRV_RUNNING) {
if (srv->state & SRV_BACKUP) {
px->srv_bck++;
px->lbprm.tot_wbck += srv->eweight;
if (px->srv_bck == 1)
first_bkw = srv->eweight;
} else {
px->srv_act++;
px->lbprm.tot_wact += srv->eweight;
}
if (!srv_is_usable(srv->state, srv->eweight))
continue;
if (srv->state & SRV_BACKUP) {
if (!px->srv_bck &&
!(px->options & PR_O_USE_ALL_BK))
px->lbprm.fbck = srv;
px->srv_bck++;
px->lbprm.tot_wbck += srv->eweight;
} else {
px->srv_act++;
px->lbprm.tot_wact += srv->eweight;
}
}
}
/* This function simply updates the backend's tot_weight and tot_used values
* after servers weights have been updated. It is designed to be used after
* recount_servers() or equivalent.
*/
static void update_backend_weight(struct proxy *px)
{
if (px->srv_act) {
px->lbprm.tot_weight = px->lbprm.tot_wact;
px->lbprm.tot_used = px->srv_act;
}
else if (px->srv_bck) {
if (px->options & PR_O_USE_ALL_BK) {
px->lbprm.tot_weight = px->lbprm.tot_wbck;
px->lbprm.tot_used = px->srv_bck;
}
else { /* the first backup server is enough */
px->lbprm.tot_weight = first_bkw;
px->lbprm.tot_used = 1;
}
else if (px->lbprm.fbck) {
/* use only the first backup server */
px->lbprm.tot_weight = px->lbprm.fbck->eweight;
px->lbprm.tot_used = 1;
}
else {
px->lbprm.tot_weight = 0;
px->lbprm.tot_used = 0;
px->lbprm.tot_weight = px->lbprm.tot_wbck;
px->lbprm.tot_used = px->srv_bck;
}
}
/* this function updates the map according to server <srv>'s new state */
static void map_set_server_status_down(struct server *srv)
{
struct proxy *p = srv->proxy;
if (srv->state == srv->prev_state &&
srv->eweight == srv->prev_eweight)
return;
/* FIXME: could be optimized since we know what changed */
recount_servers(p);
update_backend_weight(p);
srv->prev_state = srv->state;
srv->prev_eweight = srv->eweight;
p->lbprm.map.state |= PR_MAP_RECALC;
}
/* this function updates the map according to server <srv>'s new state */
static void map_set_server_status_up(struct server *srv)
{
struct proxy *p = srv->proxy;
if (srv->state == srv->prev_state &&
srv->eweight == srv->prev_eweight)
return;
/* FIXME: could be optimized since we know what changed */
recount_servers(p);
update_backend_weight(p);
srv->prev_state = srv->state;
srv->prev_eweight = srv->eweight;
p->lbprm.map.state |= PR_MAP_RECALC;
}
/* This function recomputes the server map for proxy px. It relies on
@ -172,6 +231,10 @@ void init_server_map(struct proxy *p)
int pgcd;
int act, bck;
p->lbprm.set_server_status_up = map_set_server_status_up;
p->lbprm.set_server_status_down = map_set_server_status_down;
p->lbprm.update_server_eweight = NULL;
if (!p->srv)
return;
@ -197,6 +260,8 @@ void init_server_map(struct proxy *p)
act = bck = 0;
for (srv = p->srv; srv; srv = srv->next) {
srv->eweight = srv->uweight / pgcd;
srv->prev_eweight = srv->eweight;
srv->prev_state = srv->state;
if (srv->state & SRV_BACKUP)
bck += srv->eweight;
else
@ -211,9 +276,522 @@ void init_server_map(struct proxy *p)
/* recounts servers and their weights */
p->lbprm.map.state = PR_MAP_RECALC;
recount_servers(p);
update_backend_weight(p);
recalc_server_map(p);
}
/* This function updates the server trees according to server <srv>'s new
* state. It should be called when server <srv>'s status changes to down.
* It is not important whether the server was already down or not. However,
* it is mandatory that the new state be down.
*/
static void fwrr_set_server_status_down(struct server *srv)
{
struct proxy *p = srv->proxy;
struct fwrr_group *grp;
if (srv->state == srv->prev_state &&
srv->eweight == srv->prev_eweight)
return;
if (!srv_is_usable(srv->prev_state, srv->prev_eweight))
/* server was already down */
goto out_update_backend;
grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
grp->next_weight -= srv->prev_eweight;
if (srv->state & SRV_BACKUP) {
p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
p->srv_bck--;
if (srv == p->lbprm.fbck) {
/* we lost the first backup server in a single-backup
* configuration, we must search another one.
*/
struct server *srv2 = p->lbprm.fbck;
do {
srv2 = srv2->next;
} while (srv2 &&
!((srv2->state & SRV_BACKUP) &&
srv_is_usable(srv2->state, srv2->eweight)));
p->lbprm.fbck = srv2;
}
} else {
p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
p->srv_act--;
}
fwrr_dequeue_srv(srv);
fwrr_remove_from_tree(srv);
out_update_backend:
/* check/update tot_used, tot_weight */
update_backend_weight(p);
srv->prev_state = srv->state;
srv->prev_eweight = srv->eweight;
}
/* This function updates the server trees according to server <srv>'s new
* state. It should be called when server <srv>'s status changes to up.
* It is not important whether the server was already down or not. However,
* it is mandatory that the new state be up. This function will not change
* the weight of a server which was already up.
*/
static void fwrr_set_server_status_up(struct server *srv)
{
struct proxy *p = srv->proxy;
struct fwrr_group *grp;
if (srv->state == srv->prev_state &&
srv->eweight == srv->prev_eweight)
return;
if (srv_is_usable(srv->prev_state, srv->prev_eweight))
/* server was already up */
goto out_update_backend;
grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
grp->next_weight += srv->eweight;
if (srv->state & SRV_BACKUP) {
p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
p->srv_bck++;
if (p->lbprm.fbck) {
/* we may have restored a backup server prior to fbck,
* in which case it should replace it.
*/
struct server *srv2 = srv;
do {
srv2 = srv2->next;
} while (srv2 && (srv2 != p->lbprm.fbck));
if (srv2)
p->lbprm.fbck = srv;
}
} else {
p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
p->srv_act++;
}
/* note that eweight cannot be 0 here */
fwrr_get_srv(srv);
srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight;
fwrr_queue_srv(srv);
out_update_backend:
/* check/update tot_used, tot_weight */
update_backend_weight(p);
srv->prev_state = srv->state;
srv->prev_eweight = srv->eweight;
}
/* This function must be called after an update to server <srv>'s effective
* weight. It may be called after a state change too.
*/
static void fwrr_update_server_weight(struct server *srv)
{
int old_state, new_state;
struct proxy *p = srv->proxy;
struct fwrr_group *grp;
if (srv->state == srv->prev_state &&
srv->eweight == srv->prev_eweight)
return;
/* If changing the server's weight changes its state, we simply apply
* the procedures we already have for status change. If the state
* remains down, the server is not in any tree, so it's as easy as
* updating its values. If the state remains up with different weights,
* there are some computations to perform to find a new place and
* possibly a new tree for this server.
*/
old_state = srv_is_usable(srv->prev_state, srv->prev_eweight);
new_state = srv_is_usable(srv->state, srv->eweight);
if (!old_state && !new_state) {
srv->prev_state = srv->state;
srv->prev_eweight = srv->eweight;
return;
}
else if (!old_state && new_state) {
fwrr_set_server_status_up(srv);
return;
}
else if (old_state && !new_state) {
fwrr_set_server_status_down(srv);
return;
}
grp = (srv->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
grp->next_weight = grp->next_weight - srv->prev_eweight + srv->eweight;
p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
if (srv->lb_tree == grp->init) {
fwrr_dequeue_srv(srv);
fwrr_queue_by_weight(grp->init, srv);
}
else if (!srv->lb_tree) {
/* FIXME: server was down. This is not possible right now but
* may be needed soon for slowstart or graceful shutdown.
*/
fwrr_dequeue_srv(srv);
fwrr_get_srv(srv);
srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->eweight;
fwrr_queue_srv(srv);
} else {
/* The server is either active or in the next queue. If it's
* still in the active queue and it has not consumed all of its
* places, let's adjust its next position.
*/
fwrr_get_srv(srv);
if (srv->eweight > 0) {
int prev_next = srv->npos;
int step = grp->next_weight / srv->eweight;
srv->npos = srv->lpos + step;
srv->rweight = 0;
if (srv->npos > prev_next)
srv->npos = prev_next;
if (srv->npos < grp->curr_pos + 2)
srv->npos = grp->curr_pos + step;
} else {
/* push it into the next tree */
srv->npos = grp->curr_pos + grp->curr_weight;
}
fwrr_dequeue_srv(srv);
fwrr_queue_srv(srv);
}
update_backend_weight(p);
srv->prev_state = srv->state;
srv->prev_eweight = srv->eweight;
}
/* Remove a server from a tree. It must have previously been dequeued. This
* function is meant to be called when a server is going down or has its
* weight disabled.
*/
static inline void fwrr_remove_from_tree(struct server *s)
{
s->lb_tree = NULL;
}
/* Queue a server in the weight tree <root>, assuming the weight is >0.
* We want to sort them by inverted weights, because we need to place
* heavy servers first in order to get a smooth distribution.
*/
static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s)
{
/* eweight can be as high as 256*255 */
s->lb_node.key = BE_WEIGHT_SCALE*255 - s->eweight;
eb32_insert(root, &s->lb_node);
s->lb_tree = root;
}
/* This function is responsible for building the weight trees in case of fast
* weighted round-robin. It also sets p->lbprm.wdiv to the eweight to uweight
* ratio. Both active and backup groups are initialized.
*/
void fwrr_init_server_groups(struct proxy *p)
{
struct server *srv;
struct eb_root init_head = EB_ROOT;
p->lbprm.set_server_status_up = fwrr_set_server_status_up;
p->lbprm.set_server_status_down = fwrr_set_server_status_down;
p->lbprm.update_server_eweight = fwrr_update_server_weight;
p->lbprm.wdiv = BE_WEIGHT_SCALE;
for (srv = p->srv; srv; srv = srv->next) {
srv->prev_eweight = srv->eweight = srv->uweight * BE_WEIGHT_SCALE;
srv->prev_state = srv->state;
}
recount_servers(p);
update_backend_weight(p);
/* prepare the active servers group */
p->lbprm.fwrr.act.curr_pos = p->lbprm.fwrr.act.curr_weight =
p->lbprm.fwrr.act.next_weight = p->lbprm.tot_wact;
p->lbprm.fwrr.act.curr = p->lbprm.fwrr.act.t0 =
p->lbprm.fwrr.act.t1 = init_head;
p->lbprm.fwrr.act.init = &p->lbprm.fwrr.act.t0;
p->lbprm.fwrr.act.next = &p->lbprm.fwrr.act.t1;
/* prepare the backup servers group */
p->lbprm.fwrr.bck.curr_pos = p->lbprm.fwrr.bck.curr_weight =
p->lbprm.fwrr.bck.next_weight = p->lbprm.tot_wbck;
p->lbprm.fwrr.bck.curr = p->lbprm.fwrr.bck.t0 =
p->lbprm.fwrr.bck.t1 = init_head;
p->lbprm.fwrr.bck.init = &p->lbprm.fwrr.bck.t0;
p->lbprm.fwrr.bck.next = &p->lbprm.fwrr.bck.t1;
/* queue active and backup servers in two distinct groups */
for (srv = p->srv; srv; srv = srv->next) {
if (!srv_is_usable(srv->state, srv->eweight))
continue;
fwrr_queue_by_weight((srv->state & SRV_BACKUP) ?
p->lbprm.fwrr.bck.init :
p->lbprm.fwrr.act.init,
srv);
}
}
/* simply removes a server from a weight tree */
static inline void fwrr_dequeue_srv(struct server *s)
{
eb32_delete(&s->lb_node);
}
/* queues a server into the appropriate group and tree depending on its
* backup status, and ->npos. If the server is disabled, simply assign
* it to the NULL tree.
*/
static void fwrr_queue_srv(struct server *s)
{
struct proxy *p = s->proxy;
struct fwrr_group *grp;
grp = (s->state & SRV_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
/* Delay everything which does not fit into the window and everything
* which does not fit into the theorical new window.
*/
if (!srv_is_usable(s->state, s->eweight)) {
fwrr_remove_from_tree(s);
}
else if (s->eweight <= 0 ||
s->npos >= 2 * grp->curr_weight ||
s->npos >= grp->curr_weight + grp->next_weight) {
/* put into next tree, and readjust npos in case we could
* finally take this back to current. */
s->npos -= grp->curr_weight;
fwrr_queue_by_weight(grp->next, s);
}
else {
/* FIXME: we want to multiply by a constant to avoid overrides
* after weight changes, but this can easily overflow on 32-bit
* values. We need to change this for a 64-bit tree, and keep
* the 65536 factor for optimal smoothness (both rweight and
* eweight are 16 bit entities). s->npos is bound by the number
* of servers times the maximum eweight (~= nsrv << 16).
*/
//s->lb_node.key = grp->curr_weight * s->npos + s->rweight - s->eweight;
//s->lb_node.key = 65536 * s->npos + s->rweight - s->eweight;
s->lb_node.key = 16 * s->npos + (s->rweight - s->eweight) / 4096;
eb32i_insert(&grp->curr, &s->lb_node);
s->lb_tree = &grp->curr;
}
}
/* prepares a server when extracting it from the "init" tree */
static inline void fwrr_get_srv_init(struct server *s)
{
s->npos = s->rweight = 0;
}
/* prepares a server when extracting it from the "next" tree */
static inline void fwrr_get_srv_next(struct server *s)
{
struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
&s->proxy->lbprm.fwrr.bck :
&s->proxy->lbprm.fwrr.act;
s->npos += grp->curr_weight;
}
/* prepares a server when it was marked down */
static inline void fwrr_get_srv_down(struct server *s)
{
struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
&s->proxy->lbprm.fwrr.bck :
&s->proxy->lbprm.fwrr.act;
s->npos = grp->curr_pos;
}
/* prepares a server when extracting it from its tree */
static void fwrr_get_srv(struct server *s)
{
struct proxy *p = s->proxy;
struct fwrr_group *grp = (s->state & SRV_BACKUP) ?
&p->lbprm.fwrr.bck :
&p->lbprm.fwrr.act;
if (s->lb_tree == grp->init) {
fwrr_get_srv_init(s);
}
else if (s->lb_tree == grp->next) {
fwrr_get_srv_next(s);
}
else if (s->lb_tree == NULL) {
fwrr_get_srv_down(s);
}
}
/* switches trees "init" and "next" for FWRR group <grp>. "init" should be empty
* when this happens, and "next" filled with servers sorted by weights.
*/
static inline void fwrr_switch_trees(struct fwrr_group *grp)
{
struct eb_root *swap;
swap = grp->init;
grp->init = grp->next;
grp->next = swap;
grp->curr_weight = grp->next_weight;
grp->curr_pos = grp->curr_weight;
}
/* return next server from the current tree in FWRR group <grp>, or a server
* from the "init" tree if appropriate. If both trees are empty, return NULL.
*/
static struct server *fwrr_get_server_from_group(struct fwrr_group *grp)
{
struct eb32_node *node;
struct server *s;
node = eb32_first(&grp->curr);
s = eb32_entry(node, struct server, lb_node);
if (!node || s->npos > grp->curr_pos) {
/* either we have no server left, or we have a hole */
struct eb32_node *node2;
node2 = eb32_first(grp->init);
if (node2) {
node = node2;
s = eb32_entry(node, struct server, lb_node);
fwrr_get_srv_init(s);
if (s->eweight == 0) /* FIXME: is it possible at all ? */
node = NULL;
}
}
if (node)
return s;
else
return NULL;
}
/* Computes next position of server <s> in the group. It is mandatory for <s>
* to have a non-zero, positive eweight.
*/
static inline void fwrr_update_position(struct fwrr_group *grp, struct server *s)
{
if (!s->npos) {
/* first time ever for this server */
s->lpos = grp->curr_pos;
s->npos = grp->curr_pos + grp->next_weight / s->eweight;
s->rweight += grp->next_weight % s->eweight;
if (s->rweight >= s->eweight) {
s->rweight -= s->eweight;
s->npos++;
}
} else {
s->lpos = s->npos;
s->npos += grp->next_weight / s->eweight;
s->rweight += grp->next_weight % s->eweight;
if (s->rweight >= s->eweight) {
s->rweight -= s->eweight;
s->npos++;
}
}
}
/* Return next server from the current tree in backend <p>, or a server from
* the init tree if appropriate. If both trees are empty, return NULL.
* Saturated servers are skipped and requeued.
*/
static struct server *fwrr_get_next_server(struct proxy *p)
{
struct server *srv;
struct fwrr_group *grp;
struct server *full;
int switched;
if (p->srv_act)
grp = &p->lbprm.fwrr.act;
else if (p->lbprm.fbck)
return p->lbprm.fbck;
else if (p->srv_bck)
grp = &p->lbprm.fwrr.bck;
else
return NULL;
switched = 0;
full = NULL; /* NULL-terminated list of saturated servers */
while (1) {
/* if we see an empty group, let's first try to collect weights
* which might have recently changed.
*/
if (!grp->curr_weight)
grp->curr_pos = grp->curr_weight = grp->next_weight;
/* get first server from the "current" tree. When the end of
* the tree is reached, we may have to switch, but only once.
*/
while (1) {
srv = fwrr_get_server_from_group(grp);
if (srv)
break;
if (switched)
goto requeue_servers;
switched = 1;
fwrr_switch_trees(grp);
}
/* OK, we have a server. However, it may be saturated, in which
* case we don't want to reconsider it for now. We'll update
* its position and dequeue it anyway, so that we can move it
* to a better place afterwards.
*/
fwrr_update_position(grp, srv);
fwrr_dequeue_srv(srv);
grp->curr_pos++;
if (!srv->maxconn || srv->cur_sess < srv_dynamic_maxconn(srv))
break;
/* the server is saturated, let's chain it for later reinsertion */
srv->next_full = full;
full = srv;
}
/* OK, we got the best server, let's update it */
fwrr_queue_srv(srv);
requeue_servers:
if (unlikely(full)) {
if (switched) {
/* the tree has switched, requeue all extracted servers
* into "init", because their place was lost, and only
* their weight matters.
*/
do {
fwrr_queue_by_weight(grp->init, full);
full = full->next_full;
} while (full);
} else {
/* requeue all extracted servers just as if they were consumed
* so that they regain their expected place.
*/
do {
fwrr_queue_srv(full);
full = full->next_full;
} while (full);
}
}
return srv;
}
/*
* This function tries to find a running server for the proxy <px> following
* the URL parameter hash method. It looks for a specific parameter in the
@ -314,12 +892,12 @@ int assign_server(struct session *s)
return SRV_STATUS_OK;
}
if (!s->be->srv_act && !s->be->srv_bck)
if (!s->be->lbprm.tot_weight)
return SRV_STATUS_NOSRV;
switch (s->be->options & PR_O_BALANCE) {
case PR_O_BALANCE_RR:
s->srv = get_server_rr_with_conns(s->be);
s->srv = fwrr_get_next_server(s->be);
if (!s->srv)
return SRV_STATUS_FULL;
break;
@ -347,7 +925,7 @@ int assign_server(struct session *s)
s->txn.req.sol + s->txn.req.sl.rq.u,
s->txn.req.sl.rq.u_l);
if (!s->srv) {
/* parameter not found, fall back to round robin */
/* parameter not found, fall back to round robin on the map */
s->srv = get_server_rr_with_conns(s->be);
if (!s->srv)
return SRV_STATUS_FULL;
@ -883,7 +1461,7 @@ int srv_redispatch_connect(struct session *t)
}
int be_downtime(struct proxy *px) {
if ((px->srv_act || px->srv_bck) && px->last_change < now.tv_sec) // ignore negative time
if (px->lbprm.tot_weight && px->last_change < now.tv_sec) // ignore negative time
return px->down_time;
return now.tv_sec - px->last_change + px->down_time;

View File

@ -1564,6 +1564,8 @@ int cfg_parse_listen(const char *file, int linenum, char **args)
curproxy->srv_bck++;
else
curproxy->srv_act++;
newsrv->prev_state = newsrv->state;
}
else if (!strcmp(args[0], "log")) { /* syslog server address */
struct sockaddr_in *sa;
@ -2655,7 +2657,11 @@ int readcfgfile(const char *file)
curproxy->lbprm.wmult = 1; /* default weight multiplier */
curproxy->lbprm.wdiv = 1; /* default weight divider */
init_server_map(curproxy);
/* round robin relies on a weight tree */
if ((curproxy->options & PR_O_BALANCE) == PR_O_BALANCE_RR)
fwrr_init_server_groups(curproxy);
else
init_server_map(curproxy);
if (curproxy->options & PR_O_LOGASAP)
curproxy->to_log &= ~LW_BYTES;

View File

@ -61,9 +61,7 @@ static void set_server_down(struct server *s)
s->last_change = now.tv_sec;
s->state &= ~SRV_RUNNING;
recount_servers(s->proxy);
s->proxy->lbprm.map.state |= PR_MAP_RECALC;
s->proxy->lbprm.set_server_status_down(s);
/* we might have sessions queued on this server and waiting for
* a connection. Those which are redispatchable will be queued
@ -467,19 +465,18 @@ void process_chk(struct task *t, struct timeval *next)
if (s->health == s->rise) {
int xferred;
if (s->last_change < now.tv_sec) // ignore negative times
s->down_time += now.tv_sec - s->last_change;
s->last_change = now.tv_sec;
s->state |= SRV_RUNNING;
if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0) {
if (s->proxy->last_change < now.tv_sec) // ignore negative times
s->proxy->down_time += now.tv_sec - s->proxy->last_change;
s->proxy->last_change = now.tv_sec;
}
recount_servers(s->proxy);
s->proxy->lbprm.map.state |= PR_MAP_RECALC;
if (s->last_change < now.tv_sec) // ignore negative times
s->down_time += now.tv_sec - s->last_change;
s->last_change = now.tv_sec;
s->state |= SRV_RUNNING;
s->proxy->lbprm.set_server_status_up(s);
/* check if we can handle some connections queued at the proxy. We
* will take as many as we can handle.