MINOR: sched: have one runqueue ticks counter per thread
The runqueue_ticks counts the number of task wakeups and is used to position new tasks in the run queue, but since we've had per-thread run queues, the values there are not very relevant anymore and the nice value doesn't apply well if some threads are more loaded than others. In addition, letting all threads compete over a shared counter is not smart as this may cause some excessive contention. Let's move this index close to the run queues themselves, i.e. one per thread and a global one. In addition to improving fairness, this has increased global performance by 2% on 16 threads thanks to the lower contention on rqueue_ticks. Fairness issues were not observed, but if any were to be, this patch could be backported as far as 2.0 to address them.
This commit is contained in:
parent
4d77bbf856
commit
c6ba9a0b9b
|
@ -76,10 +76,11 @@ struct task_per_thread {
|
|||
struct eb_root rqueue; /* tree constituting the per-thread run queue */
|
||||
struct mt_list shared_tasklet_list; /* Tasklet to be run, woken up by other threads */
|
||||
struct list tasklets[TL_CLASSES]; /* tasklets (and/or tasks) to run, by class */
|
||||
unsigned int rqueue_ticks; /* Insertion counter for the run queue */
|
||||
int task_list_size; /* Number of tasks among the tasklets */
|
||||
int rqueue_size; /* Number of elements in the per-thread run queue */
|
||||
struct task *current; /* current task (not tasklet) */
|
||||
int current_queue; /* points to current tasklet list being run, -1 if none */
|
||||
struct task *current; /* current task (not tasklet) */
|
||||
uint8_t tl_class_mask; /* bit mask of non-empty tasklets classes */
|
||||
__attribute__((aligned(64))) char end[0];
|
||||
};
|
||||
|
|
15
src/task.c
15
src/task.c
|
@ -48,11 +48,11 @@ __decl_aligned_spinlock(rq_lock); /* spin lock related to run queue */
|
|||
__decl_aligned_rwlock(wq_lock); /* RW lock related to the wait queue */
|
||||
|
||||
#ifdef USE_THREAD
|
||||
struct eb_root timers; /* sorted timers tree, global */
|
||||
struct eb_root rqueue; /* tree constituting the run queue */
|
||||
struct eb_root timers; /* sorted timers tree, global, accessed under wq_lock */
|
||||
struct eb_root rqueue; /* tree constituting the global run queue, accessed under rq_lock */
|
||||
static unsigned int global_rqueue_ticks; /* insertion count in the grq, use rq_lock */
|
||||
#endif
|
||||
|
||||
static unsigned int rqueue_ticks; /* insertion count */
|
||||
|
||||
struct task_per_thread task_per_thread[MAX_THREADS];
|
||||
|
||||
|
@ -130,10 +130,11 @@ void __task_wakeup(struct task *t, struct eb_root *root)
|
|||
#ifdef USE_THREAD
|
||||
if (root == &rqueue) {
|
||||
global_tasks_mask |= t->thread_mask;
|
||||
t->rq.key = ++global_rqueue_ticks;
|
||||
__ha_barrier_store();
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
t->rq.key = _HA_ATOMIC_ADD(&rqueue_ticks, 1);
|
||||
t->rq.key = ++sched->rqueue_ticks;
|
||||
|
||||
if (likely(t->nice)) {
|
||||
int offset;
|
||||
|
@ -643,7 +644,7 @@ void process_runnable_tasks()
|
|||
if ((global_tasks_mask & tid_bit) && !grq) {
|
||||
#ifdef USE_THREAD
|
||||
HA_SPIN_LOCK(TASK_RQ_LOCK, &rq_lock);
|
||||
grq = eb32sc_lookup_ge(&rqueue, rqueue_ticks - TIMER_LOOK_BACK, tid_bit);
|
||||
grq = eb32sc_lookup_ge(&rqueue, global_rqueue_ticks - TIMER_LOOK_BACK, tid_bit);
|
||||
if (unlikely(!grq)) {
|
||||
grq = eb32sc_first(&rqueue, tid_bit);
|
||||
if (!grq) {
|
||||
|
@ -659,7 +660,7 @@ void process_runnable_tasks()
|
|||
*/
|
||||
|
||||
if (!lrq) {
|
||||
lrq = eb32sc_lookup_ge(&tt->rqueue, rqueue_ticks - TIMER_LOOK_BACK, tid_bit);
|
||||
lrq = eb32sc_lookup_ge(&tt->rqueue, tt->rqueue_ticks - TIMER_LOOK_BACK, tid_bit);
|
||||
if (unlikely(!lrq))
|
||||
lrq = eb32sc_first(&tt->rqueue, tid_bit);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue