diff --git a/include/common/ticks.h b/include/common/ticks.h index 0b3e102e13..f3c1a7de69 100644 --- a/include/common/ticks.h +++ b/include/common/ticks.h @@ -2,7 +2,7 @@ include/common/ticks.h Functions and macros for manipulation of expiration timers - Copyright (C) 2000-2008 Willy Tarreau - w@1wt.eu + Copyright (C) 2000-2009 Willy Tarreau - w@1wt.eu This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -22,20 +22,31 @@ /* * Using a mix of milliseconds and timeval for internal timers is expensive and * overkill, because we don't need such a precision to compute timeouts. - * So we're converting them to "ticks". Right now, one tick equals one - * millisecond, but that might change in the future. Ticks are stored as 32bit - * values, and sorted in four 30bit-wide rotating arrays, which means that any - * timer may be 2^30 ms in the future, or 12.4 days. The ticks are designed to - * wrap after they pass 2^32. That means that we cannot directly compare them, - * but we can check the sign of their difference. + * So we're converting them to "ticks". * + * A tick is a representation of a date relative to another one, and is + * measured in milliseconds. The natural usage is to represent an absolute date + * relative to the current date. Since it is not practical to update all values + * each time the current date changes, instead we use the absolute date rounded + * down to fit in a tick. We then have to compare a tick to the current date to + * know whether it is in the future or in the past. If a tick is below the + * current date, it is in the past. If it is above, it is in the future. The + * values will wrap so we can't compare that easily, instead we check the sign + * of the difference between a tick and the current date. + * + * Proceeding like this allows us to manipulate dates that are stored in + * scalars with enough precision and range. For this reason, we store ticks in + * 32-bit integers. This is enough to handle dates that are between 24.85 days + * in the past and as much in the future. + * * We must both support absolute dates (well in fact, dates relative to now+/- * 12 days), and intervals (for timeouts). Both types need an "eternity" magic * value. For optimal code generation, we'll use zero as the magic value * indicating that an expiration timer or a timeout is not set. We have to * check that we don't return this value when adding timeouts to . If a * computation returns 0, we must increase it to 1 (which will push the timeout - * 1 ms further). + * 1 ms further). For this reason, timeouts must not be added by hand but via + * the dedicated tick_add() function. */ #ifndef _COMMON_TICKS_H diff --git a/include/proto/task.h b/include/proto/task.h index 1db30805c0..67eb92499c 100644 --- a/include/proto/task.h +++ b/include/proto/task.h @@ -29,16 +29,124 @@ #include #include #include +#include #include +/* Principle of the wait queue. + * + * We want to be able to tell whether an expiration date is before of after the + * current time . We KNOW that expiration dates are never too far apart, + * because they are measured in ticks (milliseconds). We also know that almost + * all dates will be in the future, and that a very small part of them will be + * in the past, they are the ones which have expired since last time we checked + * them. Using ticks, we know if a date is in the future or in the past, but we + * cannot use that to store sorted information because that reference changes + * all the time. + * + * So we cut the time in 3 ranges, only one of which can be. The base of + * the range holding serves as a reference for as long as remains + * in this range : + * - previous : those ones are expired by definition (all before ) + * - current : some are expired, some are not (holds ) + * - next : none are expired (all after ) + * + * We use the higher two bits of the timers expressed in ticks to determine + * which range a timer is in, compared to : + * + * now previous current next0 next1 + * [31:30] [31:30] [31:30] [31:30] [31:30] + * 00 11 00 01 10 + * 01 00 01 10 11 + * 10 01 10 11 00 + * 11 10 11 00 01 + * + * By definition, is the range containing as well as all timers + * which have the same 2 high bits as , is the range just + * before, which contains all timers whose high bits equal those of minus + * 1. Last, is composed of the two remaining ranges. + * + * For ease of implementation, the timers will then be stored into 4 queues 0-3 + * determined by the 2 higher bits of the timer. The expiration algorithm is + * very simple : + * - expire everything in =queue[((now>>30)-1)&3] + * - expire from =queue[(now>>30)&3] everything where timer >= now + * + * With this algorithm, it's possible to queue tasks meant to expire 24.8 days + * in the future, and still be able to detect events remaining unprocessed for + * the last 12.4 days! Note that the principle might be extended to any number + * of higher bits as long as there is only one range for expired tasks. For + * instance, using the 8 higher bits to index the range, we would have one past + * range of 4.6 hours (24 bits in ms), and 254 ranges in the future totalizing + * 49.3 days. This would eat more memory for very little added benefit though. + * + * Also, in order to maintain the ability to perform time comparisons, it is + * preferable to avoid using the range above, as values in this range + * may not easily be compared to outside of these functions as it is the + * opposite of the range, and - may randomly be positive + * or negative. That means we're left with +/- 12.4 days timers. + * + * To keep timers ordered, we use 4 ebtrees [0..3]. We could have used instead + * of ticks, (seconds*1024)+milliseconds, as well as 1024th of seconds, but + * that makes comparisons with ticks more difficult, so in the end it's better + * to stick to the ticks. + * + * Another nice optimisation is to allow a timer to stay at an old place in the + * queue as long as it's not further than the real expiration date. That way, + * we use the tree as a place holder for a minorant of the real expiration + * date. Since we have a very low chance of hitting a timeout anyway, we can + * bounce the nodes to their right place when we scan the tree if we encounter + * a misplaced node once in a while. This even allows us not to remove the + * infinite timers from the wait queue. + * + * So, to summarize, we have : + * - node->key always defines current position in the wait queue + * - timer is the real expiration date (possibly infinite) + * - node->key <= timer + * + * The run queue works similarly to the wait queue except that the current date + * is replaced by an insertion counter which can also wrap without any problem. + */ + +/* the timers are stored as 32-bit values in the queues */ +#define TIMER_TICK_BITS 32 +#define TIMER_TREE_BITS 2 +#define TIMER_TREES (1 << TIMER_TREE_BITS) +#define TIMER_TREE_SHIFT (TIMER_TICK_BITS - TIMER_TREE_BITS) +#define TIMER_TREE_MASK (TIMER_TREES - 1) +#define TIMER_TICK_MASK ((1U << (TIMER_TICK_BITS-1)) * 2 - 1) +#define TIMER_SIGN_BIT (1 << (TIMER_TICK_BITS - 1)) + +/* a few exported variables */ extern unsigned int run_queue; /* run queue size */ extern unsigned int niced_tasks; /* number of niced tasks in the run queue */ extern struct pool_head *pool2_task; extern struct task *last_timer; /* optimization: last queued timer */ -/* perform minimal initializations, report 0 in case of error, 1 if OK. */ -int init_task(); +/* Convert ticks to timers. Must not be called with TICK_ETERNITY, which is not + * a problem inside tree scanning functions. Note that ticks are signed while + * timers are not. + */ +static inline unsigned int tick_to_timer(int tick) +{ + return tick & TIMER_TICK_MASK; +} + +/* Convert timer to ticks. This operation will be correct only as long as + * timers are stored on a minimum of 32-bit. We take care of not returning zero + * which would mean "eternity" for a tick. Also note that ticks are signed and + * timers are not. + */ +static inline int timer_to_tick(unsigned int timer) +{ + return timer ? timer : 1; +} + +/* returns a tree number based on a ticks value */ +static inline unsigned int timer_to_tree(unsigned int timer) +{ + return (timer >> TIMER_TREE_SHIFT) & TIMER_TREE_MASK; +} /* return 0 if task is in run queue, otherwise non-zero */ static inline int task_in_rq(struct task *t) @@ -158,6 +266,8 @@ void process_runnable_tasks(int *next); */ void wake_expired_tasks(int *next); +/* Perform minimal initializations, report 0 in case of error, 1 if OK. */ +int init_task(); #endif /* _PROTO_TASK_H */ diff --git a/include/types/task.h b/include/types/task.h index 1cc12a9f8f..13e2aacd1f 100644 --- a/include/types/task.h +++ b/include/types/task.h @@ -49,7 +49,7 @@ struct task { struct eb32_node wq; /* ebtree node used to hold the task in the wait queue */ struct eb32_node rq; /* ebtree node used to hold the task in the run queue */ int state; /* task state : bit field of TASK_* */ - unsigned int expire; /* next expiration time for this task */ + int expire; /* next expiration date for this task, in ticks */ struct task * (*process)(struct task *t); /* the function which processes the task */ void *context; /* the task's context */ int nice; /* the task's current nice value from -1024 to +1024 */ diff --git a/src/session.c b/src/session.c index 7d7b4b6d4b..34ed663a6f 100644 --- a/src/session.c +++ b/src/session.c @@ -1032,7 +1032,7 @@ struct task *process_session(struct task *t) #ifdef DEBUG_DEV /* this may only happen when no timeout is set or in case of an FSM bug */ - if (!t->expire) + if (!tick_isset(t->expire)) ABORT_NOW(); #endif return t; /* nothing more to do */ diff --git a/src/task.c b/src/task.c index 844862e3d4..f054e31707 100644 --- a/src/task.c +++ b/src/task.c @@ -26,109 +26,10 @@ unsigned int run_queue = 0; unsigned int niced_tasks = 0; /* number of niced tasks in the run queue */ struct task *last_timer = NULL; /* optimization: last queued timer */ -/* Principle of the wait queue. - * - * We want to be able to tell whether an expiration date is before of after the - * current time . We KNOW that expiration dates are never too far apart, - * because they are already computed by adding integer numbers of milliseconds - * to the current date. - * We also know that almost all dates will be in the future, and that a very - * small part of them will be in the past, they are the ones which have expired - * since last time we checked them. - * - * The current implementation uses a wrapping time cut into 3 ranges : - * - previous : those ones are expired by definition - * - current : some are expired, some are not - * - next : none are expired - * - * We use the higher two bits of the timers expressed in ticks (milliseconds) - * to determine which range a timer is in, compared to : - * - * now previous current next0 next1 - * [31:30] [31:30] [31:30] [31:30] [31:30] - * 00 11 00 01 10 - * 01 00 01 10 11 - * 10 01 10 11 00 - * 11 10 11 00 01 - * - * By definition, is the range containing as well as all timers - * which have the same 2 high bits as , is the range just - * before, which contains all timers whose high bits equal those of minus - * 1. Last, is composed of the two remaining ranges. - * - * For ease of implementation, the timers will then be stored into 4 queues 0-3 - * determined by the 2 higher bits of the timer. The expiration algorithm is - * very simple : - * - expire everything in =queue[((now>>30)-1)&3] - * - expire from =queue[(now>>30)&3] everything where timer >= now - * - * With this algorithm, it's possible to queue tasks meant to expire 24.8 days - * in the future, and still be able to detect events remaining unprocessed for - * the last 12.4 days! Note that the principle might be extended to any number - * of higher bits as long as there is only one range for expired tasks. For - * instance, using the 8 higher bits to index the range, we would have one past - * range of 4.6 hours (24 bits in ms), and 254 ranges in the future totalizing - * 49.3 days. This would eat more memory for a very little added benefit. - * - * Also, in order to maintain the ability to perform time comparisons, it is - * recommended to avoid using the range above, as values in this range - * may not easily be compared to outside of these functions as it is the - * opposite of the range, and - may randomly be positive - * or negative. That means we're left with +/- 12 days timers. - * - * To keep timers ordered, we use 4 ebtrees [0..3]. To keep computation low, we - * may use (seconds*1024)+milliseconds, which preserves ordering eventhough we - * can't do real computations on it. Future evolutions could make use of 1024th - * of seconds instead of milliseconds, with the special value 0 avoided (and - * replaced with 1), so that zero indicates the timer is not set. - * - * Another nice optimisation is to allow a timer to stay at an old place in the - * queue as long as it's not further than the real expected timeout. We really - * use the tree as a place holder for a minorant of the real expiration date. - * Since we have very low chance of hitting a timeout anyway, we can bounce the - * nodes to their right place when we scan the tree and encounter a misplaced - * node once in a while. This even allows us not to remove the infinite timers. - * - * So, to summarize, we have : - * - node->key always defines current position in the tree - * - timer is the real expiration date (possibly infinite) - * - node->key <= timer - */ - -#define TIMER_TICK_BITS 32 -#define TIMER_TREE_BITS 2 -#define TIMER_TREES (1 << TIMER_TREE_BITS) -#define TIMER_TREE_SHIFT (TIMER_TICK_BITS - TIMER_TREE_BITS) -#define TIMER_TREE_MASK (TIMER_TREES - 1) -#define TIMER_TICK_MASK ((1U << (TIMER_TICK_BITS-1)) * 2 - 1) -#define TIMER_SIGN_BIT (1 << (TIMER_TICK_BITS - 1)) - static struct eb_root timers[TIMER_TREES]; /* trees with MSB 00, 01, 10 and 11 */ static struct eb_root rqueue[TIMER_TREES]; /* trees constituting the run queue */ static unsigned int rqueue_ticks; /* insertion count */ -/* returns an ordered key based on an expiration date. */ -static inline unsigned int timeval_to_ticks(const struct timeval *t) -{ - unsigned int key; - - key = ((unsigned int)t->tv_sec * 1000) + ((unsigned int)t->tv_usec / 1000); - key &= TIMER_TICK_MASK; - return key; -} - -/* returns a tree number based on a ticks value */ -static inline unsigned int ticks_to_tree(unsigned int ticks) -{ - return (ticks >> TIMER_TREE_SHIFT) & TIMER_TREE_MASK; -} - -/* returns a tree number based on an expiration date. */ -static inline unsigned int timeval_to_tree(const struct timeval *t) -{ - return ticks_to_tree(timeval_to_ticks(t)); -} - /* Puts the task in run queue at a position depending on t->nice. is * returned. The nice value assigns boosts in 32th of the run queue size. A * nice value of -1024 sets the task to -run_queue*32, while a nice value of @@ -156,7 +57,7 @@ struct task *__task_wakeup(struct task *t) /* clear state flags at the same time */ t->state &= ~TASK_WOKEN_ANY; - eb32_insert(&rqueue[ticks_to_tree(t->rq.key)], &t->rq); + eb32_insert(&rqueue[timer_to_tree(t->rq.key)], &t->rq); return t; } @@ -184,18 +85,19 @@ void task_queue(struct task *task) * rely on wake_expired_tasks() to catch the node and move it to the * proper place should it ever happen. */ - if (!task->expire || ((task->wq.key - task->expire) & TIMER_SIGN_BIT)) + if (!tick_isset(task->expire) || + ((task->wq.key - tick_to_timer(task->expire)) & TIMER_SIGN_BIT)) return; __task_unlink_wq(task); } /* the task is not in the queue now */ - if (unlikely(!task->expire)) + if (unlikely(!tick_isset(task->expire))) return; - task->wq.key = task->expire; + task->wq.key = tick_to_timer(task->expire); #ifdef DEBUG_CHECK_INVALID_EXPIRATION_DATES - if ((task->wq.key - now_ms) & TIMER_SIGN_BIT) + if ((task->wq.key - tick_to_timer(now_ms)) & TIMER_SIGN_BIT) /* we're queuing too far away or in the past (most likely) */ return; #endif @@ -211,7 +113,7 @@ void task_queue(struct task *task) eb_insert_dup(&last_timer->wq.node, &task->wq.node); return; } - eb32_insert(&timers[ticks_to_tree(task->wq.key)], &task->wq); + eb32_insert(&timers[timer_to_tree(task->wq.key)], &task->wq); if (task->wq.node.bit == -1) last_timer = task; /* we only want dup a tree's root */ return; @@ -237,17 +139,17 @@ void wake_expired_tasks(int *next) * non-expired entry. */ - now_tree = ticks_to_tree(now_ms); + now_tree = timer_to_tree(tick_to_timer(now_ms)); tree = (now_tree - 1) & TIMER_TREE_MASK; do { eb = eb32_first(&timers[tree]); while (eb) { task = eb32_entry(eb, struct task, wq); - if ((now_ms - eb->key) & TIMER_SIGN_BIT) { + if ((tick_to_timer(now_ms) - eb->key) & TIMER_SIGN_BIT) { /* note that we don't need this check for the * tree, but it's cheaper than duplicating the code. */ - *next = eb->key; /* when we want to revisit the tree */ + *next = timer_to_tick(eb->key); return; } @@ -311,7 +213,7 @@ void process_runnable_tasks(int *next) if (likely(niced_tasks)) max_processed /= 4; - tree = ticks_to_tree(rqueue_ticks); + tree = timer_to_tree(rqueue_ticks); stop = (tree + TIMER_TREES / 2) & TIMER_TREE_MASK; tree = (tree - 1) & TIMER_TREE_MASK;