MINOR: tasks: split wake_expired_tasks() in two parts to avoid useless wakeups

We used to have wake_expired_tasks() wake up tasks and return the next
expiration delay. The problem this causes is that we have to call it just
before poll() in order to consider latest timers, but this also means that
we don't wake up all newly expired tasks upon return from poll(), which
thus systematically requires a second poll() round.

This is visible when running any scheduled task like a health check, as there
are systematically two poll() calls, one with the interval, nothing is done
after it, and another one with a zero delay, and the task is called:

  listen test
    bind *:8001
    server s1 127.0.0.1:1111 check

  09:37:38.200959 clock_gettime(CLOCK_THREAD_CPUTIME_ID, {tv_sec=0, tv_nsec=8696843}) = 0
  09:37:38.200967 epoll_wait(3, [], 200, 1000) = 0
  09:37:39.202459 clock_gettime(CLOCK_THREAD_CPUTIME_ID, {tv_sec=0, tv_nsec=8712467}) = 0
>> nothing run here, as the expired task was not woken up yet.
  09:37:39.202497 clock_gettime(CLOCK_THREAD_CPUTIME_ID, {tv_sec=0, tv_nsec=8715766}) = 0
  09:37:39.202505 epoll_wait(3, [], 200, 0) = 0
  09:37:39.202513 clock_gettime(CLOCK_THREAD_CPUTIME_ID, {tv_sec=0, tv_nsec=8719064}) = 0
>> now the expired task was woken up
  09:37:39.202522 socket(AF_INET, SOCK_STREAM, IPPROTO_TCP) = 7
  09:37:39.202537 fcntl(7, F_SETFL, O_RDONLY|O_NONBLOCK) = 0
  09:37:39.202565 setsockopt(7, SOL_TCP, TCP_NODELAY, [1], 4) = 0
  09:37:39.202577 setsockopt(7, SOL_TCP, TCP_QUICKACK, [0], 4) = 0
  09:37:39.202585 connect(7, {sa_family=AF_INET, sin_port=htons(1111), sin_addr=inet_addr("127.0.0.1")}, 16) = -1 EINPROGRESS (Operation now in progress)
  09:37:39.202659 epoll_ctl(3, EPOLL_CTL_ADD, 7, {EPOLLOUT, {u32=7, u64=7}}) = 0
  09:37:39.202673 clock_gettime(CLOCK_THREAD_CPUTIME_ID, {tv_sec=0, tv_nsec=8814713}) = 0
  09:37:39.202683 epoll_wait(3, [{EPOLLOUT|EPOLLERR|EPOLLHUP, {u32=7, u64=7}}], 200, 1000) = 1
  09:37:39.202693 clock_gettime(CLOCK_THREAD_CPUTIME_ID, {tv_sec=0, tv_nsec=8818617}) = 0
  09:37:39.202701 getsockopt(7, SOL_SOCKET, SO_ERROR, [111], [4]) = 0
  09:37:39.202715 close(7)                = 0

Let's instead split the function in two parts:
  - the first part, wake_expired_tasks(), called just before
    process_runnable_tasks(), wakes up all expired tasks; it doesn't
    compute any timeout.
  - the second part, next_timer_expiry(), called just before poll(),
    only computes the next timeout for the current thread.

Thanks to this, all expired tasks are properly woken up when leaving
poll, and each poll call's timeout remains up to date:

  09:41:16.270449 clock_gettime(CLOCK_THREAD_CPUTIME_ID, {tv_sec=0, tv_nsec=10223556}) = 0
  09:41:16.270457 epoll_wait(3, [], 200, 999) = 0
  09:41:17.270130 clock_gettime(CLOCK_THREAD_CPUTIME_ID, {tv_sec=0, tv_nsec=10238572}) = 0
  09:41:17.270157 socket(AF_INET, SOCK_STREAM, IPPROTO_TCP) = 7
  09:41:17.270194 fcntl(7, F_SETFL, O_RDONLY|O_NONBLOCK) = 0
  09:41:17.270204 setsockopt(7, SOL_TCP, TCP_NODELAY, [1], 4) = 0
  09:41:17.270216 setsockopt(7, SOL_TCP, TCP_QUICKACK, [0], 4) = 0
  09:41:17.270224 connect(7, {sa_family=AF_INET, sin_port=htons(1111), sin_addr=inet_addr("127.0.0.1")}, 16) = -1 EINPROGRESS (Operation now in progress)
  09:41:17.270299 epoll_ctl(3, EPOLL_CTL_ADD, 7, {EPOLLOUT, {u32=7, u64=7}}) = 0
  09:41:17.270314 clock_gettime(CLOCK_THREAD_CPUTIME_ID, {tv_sec=0, tv_nsec=10337841}) = 0
  09:41:17.270323 epoll_wait(3, [{EPOLLOUT|EPOLLERR|EPOLLHUP, {u32=7, u64=7}}], 200, 1000) = 1
  09:41:17.270332 clock_gettime(CLOCK_THREAD_CPUTIME_ID, {tv_sec=0, tv_nsec=10341860}) = 0
  09:41:17.270340 getsockopt(7, SOL_SOCKET, SO_ERROR, [111], [4]) = 0
  09:41:17.270367 close(7)                = 0

This may be backported to 2.1 and 2.0 though it's unlikely to bring any
user-visible improvement except to clarify debugging.
This commit is contained in:
Willy Tarreau 2019-12-11 08:12:23 +01:00
parent 440d09b244
commit c49ba52524
3 changed files with 58 additions and 20 deletions

View File

@ -599,9 +599,15 @@ void process_runnable_tasks();
/*
* Extract all expired timers from the timer queue, and wakes up all
* associated tasks. Returns the date of next event (or eternity).
* associated tasks.
*/
int wake_expired_tasks();
void wake_expired_tasks();
/* Checks the next timer for the current thread by looking into its own timer
* list and the global one. It may return TICK_ETERNITY if no timer is present.
* Note that the next timer might very well be slighly in the past.
*/
int next_timer_expiry();
/*
* Delete every tasks before running the master polling loop

View File

@ -2616,6 +2616,8 @@ static void run_poll_loop()
tv_update_date(0,1);
while (1) {
wake_expired_tasks();
/* Process a few tasks */
process_runnable_tasks();
@ -2624,9 +2626,6 @@ static void run_poll_loop()
if (tid == 0)
signal_process_queue();
/* Check if we can expire some tasks */
next = wake_expired_tasks();
/* stop when there's nothing left to do */
if ((jobs - unstoppable_jobs) == 0)
break;
@ -2651,6 +2650,9 @@ static void run_poll_loop()
wake = 0;
}
/* If we have to sleep, measure how long */
next = wake ? TICK_ETERNITY : next_timer_expiry();
/* The poller will ensure it returns around <next> */
cur_poller.poll(&cur_poller, next, wake);

View File

@ -155,14 +155,13 @@ void __task_queue(struct task *task, struct eb_root *wq)
/*
* Extract all expired timers from the timer queue, and wakes up all
* associated tasks. Returns the date of next event (or eternity).
* associated tasks.
*/
int wake_expired_tasks()
void wake_expired_tasks()
{
struct task_per_thread * const tt = sched; // thread's tasks
struct task *task;
struct eb32_node *eb;
int ret = TICK_ETERNITY;
__decl_hathreads(int key);
while (1) {
@ -178,11 +177,8 @@ int wake_expired_tasks()
break;
}
if (tick_is_lt(now_ms, eb->key)) {
/* timer not expired yet, revisit it later */
ret = eb->key;
if (tick_is_lt(now_ms, eb->key))
break;
}
/* timer looks expired, detach it from the queue */
task = eb32_entry(eb, struct task, wq);
@ -225,11 +221,8 @@ int wake_expired_tasks()
key = eb->key;
HA_RWLOCK_RDUNLOCK(TASK_WQ_LOCK, &wq_lock);
if (tick_is_lt(now_ms, key)) {
/* timer not expired yet, revisit it later */
ret = tick_first(ret, key);
if (tick_is_lt(now_ms, key))
goto leave;
}
/* There's really something of interest here, let's visit the queue */
@ -247,11 +240,8 @@ int wake_expired_tasks()
break;
}
if (tick_is_lt(now_ms, eb->key)) {
/* timer not expired yet, revisit it later */
ret = tick_first(ret, eb->key);
if (tick_is_lt(now_ms, eb->key))
break;
}
/* timer looks expired, detach it from the queue */
task = eb32_entry(eb, struct task, wq);
@ -282,6 +272,46 @@ int wake_expired_tasks()
HA_RWLOCK_WRUNLOCK(TASK_WQ_LOCK, &wq_lock);
#endif
leave:
return;
}
/* Checks the next timer for the current thread by looking into its own timer
* list and the global one. It may return TICK_ETERNITY if no timer is present.
* Note that the next timer might very well be slighly in the past.
*/
int next_timer_expiry()
{
struct task_per_thread * const tt = sched; // thread's tasks
struct eb32_node *eb;
int ret = TICK_ETERNITY;
__decl_hathreads(int key);
/* first check in the thread-local timers */
eb = eb32_lookup_ge(&tt->timers, now_ms - TIMER_LOOK_BACK);
if (!eb) {
/* we might have reached the end of the tree, typically because
* <now_ms> is in the first half and we're first scanning the last
* half. Let's loop back to the beginning of the tree now.
*/
eb = eb32_first(&tt->timers);
}
if (eb)
ret = eb->key;
#ifdef USE_THREAD
if (!eb_is_empty(&timers)) {
HA_RWLOCK_RDLOCK(TASK_WQ_LOCK, &wq_lock);
eb = eb32_lookup_ge(&timers, now_ms - TIMER_LOOK_BACK);
if (!eb)
eb = eb32_first(&timers);
if (eb)
key = eb->key;
HA_RWLOCK_RDUNLOCK(TASK_WQ_LOCK, &wq_lock);
if (eb)
ret = tick_first(ret, key);
}
#endif
return ret;
}