/* * experimental weighted round robin scheduler - (c) 2007 willy tarreau. * * This filling algorithm is excellent at spreading the servers, as it also * takes care of keeping the most uniform distance between occurences of each * server, by maximizing this distance. It reduces the number of variables * and expensive operations. */ #include #include #include "eb32tree.h" struct srv { struct eb32_node node; struct eb_root *tree; // we want to know where the server is int num; int w; /* weight */ int next, last; int rem; } *srv; /* those trees represent a sliding window of 3 time frames */ struct eb_root tree_0 = EB_ROOT; struct eb_root tree_1 = EB_ROOT; struct eb_root tree_2 = EB_ROOT; struct eb_root *init_tree; /* receives positions 0..sw-1 */ struct eb_root *next_tree; /* receives positions >= 2sw */ int nsrv; /* # of servers */ int nsw, sw; /* sum of weights */ int p; /* current position, between sw..2sw-1 */ /* queue a server in the weights tree */ void queue_by_weight(struct eb_root *root, struct srv *s) { s->node.key = 255 - s->w; eb32_insert(root, &s->node); s->tree = root; } /* queue a server in the weight tree , except if its weight is 0 */ void queue_by_weight_0(struct eb_root *root, struct srv *s) { if (s->w) { s->node.key = 255 - s->w; eb32_insert(root, &s->node); s->tree = root; } else { s->tree = NULL; } } static inline void dequeue_srv(struct srv *s) { eb32_delete(&s->node); } /* queues a server into the correct tree depending on ->next */ void put_srv(struct srv *s) { if (s->w <= 0 || s->next >= 2*sw || /* delay everything which does not fit into the window */ s->next >= sw+nsw) { /* and everything which does not fit into the theorical new window */ /* put into next tree */ s->next -= sw; // readjust next in case we could finally take this back to current. queue_by_weight_0(next_tree, s); } else { // The overflow problem is caused by the scale we want to apply to user weight // to turn it into effective weight. Since this is only used to provide a smooth // slowstart on very low weights (1), it is a pure waste. Thus, we just have to // apply a small scaling factor and warn the user that slowstart is not very smooth // on low weights. // The max key is about ((scale*maxw)*(scale*maxw)*nbsrv)/ratio (where the ratio is // the arbitrary divide we perform in the examples above). Assuming that ratio==scale, // this translates to maxkey=scale*maxw^2*nbsrv, so // max_nbsrv=2^32/255^2/scale ~= 66051/scale // Using a scale of 16 is enough to support 4000 servers without overflow, providing // 6% steps during slowstart. s->node.key = 256 * s->next + (16*255 + s->rem - s->w) / 16; /* check for overflows */ if ((int)s->node.key < 0) printf(" OV: srv=%p w=%d rem=%d next=%d key=%d", s, s->w, s->rem, s->next, s->node.key); eb32_insert(&tree_0, &s->node); s->tree = &tree_0; } } /* prepares a server when extracting it from the init tree */ static inline void get_srv_init(struct srv *s) { s->next = s->rem = 0; } /* prepares a server when extracting it from the next tree */ static inline void get_srv_next(struct srv *s) { s->next += sw; } /* prepares a server when extracting it from the next tree */ static inline void get_srv_down(struct srv *s) { s->next = p; } /* prepares a server when extracting it from its tree */ void get_srv(struct srv *s) { if (s->tree == init_tree) { get_srv_init(s); } else if (s->tree == next_tree) { get_srv_next(s); } else if (s->tree == NULL) { get_srv_down(s); } } /* return next server from the current tree, or a server from the init tree * if appropriate. If both trees are empty, return NULL. */ struct srv *get_next_server() { struct eb32_node *node; struct srv *s; node = eb32_first(&tree_0); s = eb32_entry(node, struct srv, node); if (!node || s->next > p) { /* either we have no server left, or we have a hole */ struct eb32_node *node2; node2 = eb32_first(init_tree); if (node2) { node = node2; s = eb32_entry(node, struct srv, node); get_srv_init(s); if (s->w == 0) node = NULL; s->node.key = 0; // do not display random values } } if (node) return s; else return NULL; } void update_position(struct srv *s) { //if (s->tree == init_tree) { if (!s->next) { // first time ever for this server s->last = p; s->next = p + nsw / s->w; s->rem += nsw % s->w; if (s->rem >= s->w) { s->rem -= s->w; s->next++; } } else { s->last = s->next; // or p ? //s->next += sw / s->w; //s->rem += sw % s->w; s->next += nsw / s->w; s->rem += nsw % s->w; if (s->rem >= s->w) { s->rem -= s->w; s->next++; } } } /* switches trees init_tree and next_tree. init_tree should be empty when * this happens, and next_tree filled with servers sorted by weights. */ void switch_trees() { struct eb_root *swap; swap = init_tree; init_tree = next_tree; next_tree = swap; sw = nsw; p = sw; } main(int argc, char **argv) { int conns; int i; struct srv *s; argc--; argv++; nsrv = argc; if (!nsrv) exit(1); srv = (struct srv *)calloc(nsrv, sizeof(struct srv)); sw = 0; for (i = 0; i < nsrv; i++) { s = &srv[i]; s->num = i; s->w = atol(argv[i]); sw += s->w; } nsw = sw; init_tree = &tree_1; next_tree = &tree_2; /* and insert all the servers in the PREV tree */ /* note that it is required to insert them according to * the reverse order of their weights. */ printf("---------------:"); for (i = 0; i < nsrv; i++) { s = &srv[i]; queue_by_weight_0(init_tree, s); printf("%2d", s->w); } printf("\n"); p = sw; // time base of current tree conns = 0; while (1) { struct eb32_node *node; printf("%08d|%06d: ", conns, p); /* if we have en empty tree, let's first try to collect weights * which might have changed. */ if (!sw) { if (nsw) { sw = nsw; p = sw; /* do not switch trees, otherwise new servers (from init) * would end up in next. */ //switch_trees(); //printf("bla\n"); } else goto next_iteration; } s = get_next_server(); if (!s) { printf("----------- switch (empty) -- sw=%d -> %d ---------\n", sw, nsw); switch_trees(); s = get_next_server(); printf("%08d|%06d: ", conns, p); if (!s) goto next_iteration; } else if (s->next >= 2*sw) { printf("ARGGGGG! s[%d].next=%d, max=%d\n", s->num, s->next, 2*sw-1); } /* now we have THE server we want to put at this position */ for (i = 0; i < s->num; i++) { if (srv[i].w > 0) printf(". "); else printf("_ "); } printf("# "); for (i = s->num + 1; i < nsrv; i++) { if (srv[i].w > 0) printf(". "); else printf("_ "); } printf(" : "); printf("s=%02d v=%04d w=%03d n=%03d r=%03d ", s->num, s->node.key, s->w, s->next, s->rem); update_position(s); printf(" | next=%03d, rem=%03d ", s->next, s->rem); if (s->next >= sw * 2) { dequeue_srv(s); //queue_by_weight(next_tree, s); put_srv(s); printf(" => next (w=%d, n=%d) ", s->w, s->next); } else { printf(" => curr "); //s->node.key = s->next; /* we want to ensure that in case of conflicts, servers with * the highest weights will get served first. Also, we still * have the remainder to see where the entry expected to be * inserted. */ //s->node.key = 256 * s->next + 255 - s->w; //s->node.key = sw * s->next + sw / s->w; //s->node.key = sw * s->next + s->rem; /// seems best (check with filltab15) ! //s->node.key = (2 * sw * s->next) + s->rem + sw / s->w; /* FIXME: must be optimized */ dequeue_srv(s); put_srv(s); //eb32i_insert(&tree_0, &s->node); //s->tree = &tree_0; } next_iteration: p++; conns++; if (/*conns == 30*/ /**/random()%100 == 0/**/) { int w = /*20*//**/random()%4096/**/; int num = /*1*//**/random()%nsrv/**/; struct srv *s = &srv[num]; nsw = nsw - s->w + w; //sw=nsw; if (s->tree == init_tree) { printf(" -- chgwght1(%d): %d->%d, n=%d --", s->num, s->w, w, s->next); printf("(init)"); s->w = w; dequeue_srv(s); queue_by_weight_0(s->tree, s); } else if (s->tree == NULL) { printf(" -- chgwght2(%d): %d->%d, n=%d --", s->num, s->w, w, s->next); printf("(down)"); s->w = w; dequeue_srv(s); //queue_by_weight_0(init_tree, s); get_srv(s); s->next = p + (nsw + sw - p) / s->w; put_srv(s); } else { int oldnext; /* the server is either active or in the next queue */ get_srv(s); printf(" -- chgwght3(%d): %d->%d, n=%d, sw=%d, nsw=%d --", s->num, s->w, w, s->next, sw, nsw); oldnext = s->next; s->w = w; /* we must measure how far we are from the end of the current window * and try to fit their as many entries as should theorically be. */ //s->w = s->w * (2*sw - p) / sw; if (s->w > 0) { int step = (nsw /*+ sw - p*/) / s->w; s->next = s->last + step; s->rem = 0; if (s->next > oldnext) { s->next = oldnext; printf(" aaaaaaa "); } if (s->next < p + 2) { s->next = p + step; printf(" bbbbbb "); } } else { printf(" push -- "); /* push it into the next tree */ s->w = 0; s->next = p + sw; } dequeue_srv(s); printf(" n=%d", s->next); put_srv(s); } } printf("\n"); if (0 && conns % 50000 == 0) { printf("-------- %-5d : changing all weights ----\n", conns); for (i = 0; i < nsrv; i++) { int w = i + 1; s = &srv[i]; nsw = nsw - s->w + w; s->w = w; dequeue_srv(s); queue_by_weight_0(next_tree, s); // or init_tree ? } } } }