mirror of
http://git.haproxy.org/git/haproxy.git/
synced 2025-01-10 16:00:08 +00:00
b698f0f4a2
Now we can compute the max place depending on the number of servers, maximum weight and weight scale. The formula has been stored as a comment so that it's easy to choose between smooth weight ramp up and high number of servers. The default scale has been set to 16, which permits 4000 servers with a granularity of 6% in the worst case (weight=1).
400 lines
9.5 KiB
C
400 lines
9.5 KiB
C
/*
|
|
* experimental weighted round robin scheduler - (c) 2007 willy tarreau.
|
|
*
|
|
* This filling algorithm is excellent at spreading the servers, as it also
|
|
* takes care of keeping the most uniform distance between occurences of each
|
|
* server, by maximizing this distance. It reduces the number of variables
|
|
* and expensive operations.
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include "eb32tree.h"
|
|
|
|
struct srv {
|
|
struct eb32_node node;
|
|
struct eb_root *tree; // we want to know where the server is
|
|
int num;
|
|
int w; /* weight */
|
|
int next, last;
|
|
int rem;
|
|
} *srv;
|
|
|
|
/* those trees represent a sliding window of 3 time frames */
|
|
struct eb_root tree_0 = EB_ROOT;
|
|
struct eb_root tree_1 = EB_ROOT;
|
|
struct eb_root tree_2 = EB_ROOT;
|
|
|
|
struct eb_root *init_tree; /* receives positions 0..sw-1 */
|
|
struct eb_root *next_tree; /* receives positions >= 2sw */
|
|
|
|
int nsrv; /* # of servers */
|
|
int nsw, sw; /* sum of weights */
|
|
int p; /* current position, between sw..2sw-1 */
|
|
|
|
/* queue a server in the weights tree */
|
|
void queue_by_weight(struct eb_root *root, struct srv *s) {
|
|
s->node.key = 255 - s->w;
|
|
eb32_insert(root, &s->node);
|
|
s->tree = root;
|
|
}
|
|
|
|
/* queue a server in the weight tree <root>, except if its weight is 0 */
|
|
void queue_by_weight_0(struct eb_root *root, struct srv *s) {
|
|
if (s->w) {
|
|
s->node.key = 255 - s->w;
|
|
eb32_insert(root, &s->node);
|
|
s->tree = root;
|
|
} else {
|
|
s->tree = NULL;
|
|
}
|
|
}
|
|
|
|
static inline void dequeue_srv(struct srv *s) {
|
|
eb32_delete(&s->node);
|
|
}
|
|
|
|
/* queues a server into the correct tree depending on ->next */
|
|
void put_srv(struct srv *s) {
|
|
if (s->w <= 0 ||
|
|
s->next >= 2*sw || /* delay everything which does not fit into the window */
|
|
s->next >= sw+nsw) { /* and everything which does not fit into the theorical new window */
|
|
/* put into next tree */
|
|
s->next -= sw; // readjust next in case we could finally take this back to current.
|
|
queue_by_weight_0(next_tree, s);
|
|
} else {
|
|
// The overflow problem is caused by the scale we want to apply to user weight
|
|
// to turn it into effective weight. Since this is only used to provide a smooth
|
|
// slowstart on very low weights (1), it is a pure waste. Thus, we just have to
|
|
// apply a small scaling factor and warn the user that slowstart is not very smooth
|
|
// on low weights.
|
|
// The max key is about ((scale*maxw)*(scale*maxw)*nbsrv)/ratio (where the ratio is
|
|
// the arbitrary divide we perform in the examples above). Assuming that ratio==scale,
|
|
// this translates to maxkey=scale*maxw^2*nbsrv, so
|
|
// max_nbsrv=2^32/255^2/scale ~= 66051/scale
|
|
// Using a scale of 16 is enough to support 4000 servers without overflow, providing
|
|
// 6% steps during slowstart.
|
|
|
|
s->node.key = 256 * s->next + (16*255 + s->rem - s->w) / 16;
|
|
|
|
/* check for overflows */
|
|
if ((int)s->node.key < 0)
|
|
printf(" OV: srv=%p w=%d rem=%d next=%d key=%d", s, s->w, s->rem, s->next, s->node.key);
|
|
eb32_insert(&tree_0, &s->node);
|
|
s->tree = &tree_0;
|
|
}
|
|
}
|
|
|
|
/* prepares a server when extracting it from the init tree */
|
|
static inline void get_srv_init(struct srv *s) {
|
|
s->next = s->rem = 0;
|
|
}
|
|
|
|
/* prepares a server when extracting it from the next tree */
|
|
static inline void get_srv_next(struct srv *s) {
|
|
s->next += sw;
|
|
}
|
|
|
|
/* prepares a server when extracting it from the next tree */
|
|
static inline void get_srv_down(struct srv *s) {
|
|
s->next = p;
|
|
}
|
|
|
|
/* prepares a server when extracting it from its tree */
|
|
void get_srv(struct srv *s) {
|
|
if (s->tree == init_tree) {
|
|
get_srv_init(s);
|
|
}
|
|
else if (s->tree == next_tree) {
|
|
get_srv_next(s);
|
|
}
|
|
else if (s->tree == NULL) {
|
|
get_srv_down(s);
|
|
}
|
|
}
|
|
|
|
|
|
/* return next server from the current tree, or a server from the init tree
|
|
* if appropriate. If both trees are empty, return NULL.
|
|
*/
|
|
struct srv *get_next_server() {
|
|
struct eb32_node *node;
|
|
struct srv *s;
|
|
|
|
node = eb32_first(&tree_0);
|
|
s = eb32_entry(node, struct srv, node);
|
|
|
|
if (!node || s->next > p) {
|
|
/* either we have no server left, or we have a hole */
|
|
struct eb32_node *node2;
|
|
node2 = eb32_first(init_tree);
|
|
if (node2) {
|
|
node = node2;
|
|
s = eb32_entry(node, struct srv, node);
|
|
get_srv_init(s);
|
|
if (s->w == 0)
|
|
node = NULL;
|
|
s->node.key = 0; // do not display random values
|
|
}
|
|
}
|
|
if (node)
|
|
return s;
|
|
else
|
|
return NULL;
|
|
}
|
|
|
|
void update_position(struct srv *s) {
|
|
//if (s->tree == init_tree) {
|
|
if (!s->next) {
|
|
// first time ever for this server
|
|
s->last = p;
|
|
s->next = p + nsw / s->w;
|
|
s->rem += nsw % s->w;
|
|
|
|
if (s->rem >= s->w) {
|
|
s->rem -= s->w;
|
|
s->next++;
|
|
}
|
|
} else {
|
|
s->last = s->next; // or p ?
|
|
//s->next += sw / s->w;
|
|
//s->rem += sw % s->w;
|
|
s->next += nsw / s->w;
|
|
s->rem += nsw % s->w;
|
|
|
|
if (s->rem >= s->w) {
|
|
s->rem -= s->w;
|
|
s->next++;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/* switches trees init_tree and next_tree. init_tree should be empty when
|
|
* this happens, and next_tree filled with servers sorted by weights.
|
|
*/
|
|
void switch_trees() {
|
|
struct eb_root *swap;
|
|
swap = init_tree;
|
|
init_tree = next_tree;
|
|
next_tree = swap;
|
|
sw = nsw;
|
|
p = sw;
|
|
}
|
|
|
|
main(int argc, char **argv) {
|
|
int conns;
|
|
int i;
|
|
|
|
struct srv *s;
|
|
|
|
argc--; argv++;
|
|
nsrv = argc;
|
|
|
|
if (!nsrv)
|
|
exit(1);
|
|
|
|
srv = (struct srv *)calloc(nsrv, sizeof(struct srv));
|
|
|
|
sw = 0;
|
|
for (i = 0; i < nsrv; i++) {
|
|
s = &srv[i];
|
|
s->num = i;
|
|
s->w = atol(argv[i]);
|
|
sw += s->w;
|
|
}
|
|
|
|
nsw = sw;
|
|
|
|
init_tree = &tree_1;
|
|
next_tree = &tree_2;
|
|
|
|
/* and insert all the servers in the PREV tree */
|
|
/* note that it is required to insert them according to
|
|
* the reverse order of their weights.
|
|
*/
|
|
printf("---------------:");
|
|
for (i = 0; i < nsrv; i++) {
|
|
s = &srv[i];
|
|
queue_by_weight_0(init_tree, s);
|
|
printf("%2d", s->w);
|
|
}
|
|
printf("\n");
|
|
|
|
p = sw; // time base of current tree
|
|
conns = 0;
|
|
while (1) {
|
|
struct eb32_node *node;
|
|
|
|
printf("%08d|%06d: ", conns, p);
|
|
|
|
/* if we have en empty tree, let's first try to collect weights
|
|
* which might have changed.
|
|
*/
|
|
if (!sw) {
|
|
if (nsw) {
|
|
sw = nsw;
|
|
p = sw;
|
|
/* do not switch trees, otherwise new servers (from init)
|
|
* would end up in next.
|
|
*/
|
|
//switch_trees();
|
|
//printf("bla\n");
|
|
}
|
|
else
|
|
goto next_iteration;
|
|
}
|
|
|
|
s = get_next_server();
|
|
if (!s) {
|
|
printf("----------- switch (empty) -- sw=%d -> %d ---------\n", sw, nsw);
|
|
switch_trees();
|
|
s = get_next_server();
|
|
printf("%08d|%06d: ", conns, p);
|
|
|
|
if (!s)
|
|
goto next_iteration;
|
|
}
|
|
else if (s->next >= 2*sw) {
|
|
printf("ARGGGGG! s[%d].next=%d, max=%d\n", s->num, s->next, 2*sw-1);
|
|
}
|
|
|
|
/* now we have THE server we want to put at this position */
|
|
for (i = 0; i < s->num; i++) {
|
|
if (srv[i].w > 0)
|
|
printf(". ");
|
|
else
|
|
printf("_ ");
|
|
}
|
|
printf("# ");
|
|
for (i = s->num + 1; i < nsrv; i++) {
|
|
if (srv[i].w > 0)
|
|
printf(". ");
|
|
else
|
|
printf("_ ");
|
|
}
|
|
printf(" : ");
|
|
|
|
printf("s=%02d v=%04d w=%03d n=%03d r=%03d ",
|
|
s->num, s->node.key, s->w, s->next, s->rem);
|
|
|
|
update_position(s);
|
|
printf(" | next=%03d, rem=%03d ", s->next, s->rem);
|
|
|
|
if (s->next >= sw * 2) {
|
|
dequeue_srv(s);
|
|
//queue_by_weight(next_tree, s);
|
|
put_srv(s);
|
|
printf(" => next (w=%d, n=%d) ", s->w, s->next);
|
|
}
|
|
else {
|
|
printf(" => curr ");
|
|
|
|
//s->node.key = s->next;
|
|
/* we want to ensure that in case of conflicts, servers with
|
|
* the highest weights will get served first. Also, we still
|
|
* have the remainder to see where the entry expected to be
|
|
* inserted.
|
|
*/
|
|
//s->node.key = 256 * s->next + 255 - s->w;
|
|
//s->node.key = sw * s->next + sw / s->w;
|
|
//s->node.key = sw * s->next + s->rem; /// seems best (check with filltab15) !
|
|
|
|
//s->node.key = (2 * sw * s->next) + s->rem + sw / s->w;
|
|
|
|
/* FIXME: must be optimized */
|
|
dequeue_srv(s);
|
|
put_srv(s);
|
|
//eb32i_insert(&tree_0, &s->node);
|
|
//s->tree = &tree_0;
|
|
}
|
|
|
|
next_iteration:
|
|
p++;
|
|
conns++;
|
|
if (/*conns == 30*/ /**/random()%100 == 0/**/) {
|
|
int w = /*20*//**/random()%4096/**/;
|
|
int num = /*1*//**/random()%nsrv/**/;
|
|
struct srv *s = &srv[num];
|
|
|
|
nsw = nsw - s->w + w;
|
|
//sw=nsw;
|
|
|
|
if (s->tree == init_tree) {
|
|
printf(" -- chgwght1(%d): %d->%d, n=%d --", s->num, s->w, w, s->next);
|
|
printf("(init)");
|
|
s->w = w;
|
|
dequeue_srv(s);
|
|
queue_by_weight_0(s->tree, s);
|
|
}
|
|
else if (s->tree == NULL) {
|
|
printf(" -- chgwght2(%d): %d->%d, n=%d --", s->num, s->w, w, s->next);
|
|
printf("(down)");
|
|
s->w = w;
|
|
dequeue_srv(s);
|
|
//queue_by_weight_0(init_tree, s);
|
|
get_srv(s);
|
|
s->next = p + (nsw + sw - p) / s->w;
|
|
put_srv(s);
|
|
}
|
|
else {
|
|
int oldnext;
|
|
|
|
/* the server is either active or in the next queue */
|
|
get_srv(s);
|
|
printf(" -- chgwght3(%d): %d->%d, n=%d, sw=%d, nsw=%d --", s->num, s->w, w, s->next, sw, nsw);
|
|
|
|
oldnext = s->next;
|
|
s->w = w;
|
|
|
|
/* we must measure how far we are from the end of the current window
|
|
* and try to fit their as many entries as should theorically be.
|
|
*/
|
|
|
|
//s->w = s->w * (2*sw - p) / sw;
|
|
if (s->w > 0) {
|
|
int step = (nsw /*+ sw - p*/) / s->w;
|
|
s->next = s->last + step;
|
|
s->rem = 0;
|
|
if (s->next > oldnext) {
|
|
s->next = oldnext;
|
|
printf(" aaaaaaa ");
|
|
}
|
|
|
|
if (s->next < p + 2) {
|
|
s->next = p + step;
|
|
printf(" bbbbbb ");
|
|
}
|
|
} else {
|
|
printf(" push -- ");
|
|
/* push it into the next tree */
|
|
s->w = 0;
|
|
s->next = p + sw;
|
|
}
|
|
|
|
|
|
dequeue_srv(s);
|
|
printf(" n=%d", s->next);
|
|
put_srv(s);
|
|
}
|
|
}
|
|
|
|
printf("\n");
|
|
|
|
if (0 && conns % 50000 == 0) {
|
|
printf("-------- %-5d : changing all weights ----\n", conns);
|
|
|
|
for (i = 0; i < nsrv; i++) {
|
|
int w = i + 1;
|
|
s = &srv[i];
|
|
nsw = nsw - s->w + w;
|
|
s->w = w;
|
|
dequeue_srv(s);
|
|
queue_by_weight_0(next_tree, s); // or init_tree ?
|
|
}
|
|
}
|
|
|
|
}
|
|
}
|
|
|