REORG: clock: move the updates of cpu/mono time to clock.c

The entering_poll/leaving_poll/measure_idle functions that were hard
to classify and used to move to various locations have now been placed
into clock.c since it's precisely about time-keeping. The functions
were renamed to clock_*. The samp_time and idle_time values are now
static since there is no reason for them to be read from outside.
This commit is contained in:
Willy Tarreau 2021-10-08 10:43:59 +02:00
parent 5554264f31
commit f9d5e1079c
10 changed files with 107 additions and 112 deletions

View File

@ -41,5 +41,8 @@ void clock_update_date(int max_wait, int interrupted);
void clock_init_process_date(void);
void clock_init_thread_date(void);
char *timeofday_as_iso_us(int pad);
uint clock_report_idle(void);
void clock_leaving_poll(int timeout, int interrupted);
void clock_entering_poll(void);
#endif

View File

@ -93,9 +93,6 @@ extern volatile unsigned long global_tasks_mask; /* Mask of threads with tasks i
extern unsigned int grq_total; /* total number of entries in the global run queue, atomic */
extern unsigned int niced_tasks; /* number of niced tasks in the run queue */
extern THREAD_LOCAL unsigned int samp_time; /* total elapsed time over current sample */
extern THREAD_LOCAL unsigned int idle_time; /* total idle time over current sample */
extern struct pool_head *pool_head_task;
extern struct pool_head *pool_head_tasklet;
extern struct pool_head *pool_head_notification;
@ -117,10 +114,6 @@ void tasklet_kill(struct tasklet *t);
void __task_wakeup(struct task *t);
void __task_queue(struct task *task, struct eb_root *wq);
uint sched_report_idle();
void sched_leaving_poll(int timeout, int interrupted);
void sched_entering_poll();
unsigned int run_tasks_from_lists(unsigned int budgets[]);
/*

View File

@ -14,6 +14,7 @@
#include <time.h>
#include <haproxy/api.h>
#include <haproxy/activity.h>
#include <haproxy/clock.h>
#include <haproxy/time.h>
#include <haproxy/tinfo-t.h>
@ -31,6 +32,8 @@ THREAD_LOCAL struct timeval date; /* the real current date (wall
THREAD_LOCAL struct timeval before_poll; /* system date before calling poll() */
THREAD_LOCAL struct timeval after_poll; /* system date after leaving poll() */
static THREAD_LOCAL unsigned int samp_time; /* total elapsed time over current sample */
static THREAD_LOCAL unsigned int idle_time; /* total idle time over current sample */
static THREAD_LOCAL unsigned int iso_time_sec; /* last iso time value for this thread */
static THREAD_LOCAL char iso_time_str[34]; /* ISO time representation of gettimeofday() */
@ -212,6 +215,95 @@ void clock_init_thread_date(void)
clock_update_date(0, 1);
}
/* report the average CPU idle percentage over all running threads, between 0 and 100 */
uint clock_report_idle(void)
{
uint total = 0;
uint rthr = 0;
uint thr;
for (thr = 0; thr < MAX_THREADS; thr++) {
if (!(all_threads_mask & (1UL << thr)))
continue;
total += HA_ATOMIC_LOAD(&ha_thread_info[thr].idle_pct);
rthr++;
}
return rthr ? total / rthr : 0;
}
/* Update the idle time value twice a second, to be called after
* clock_update_date() when called after poll(), and currently called only by
* clock_leaving_poll() below. It relies on <before_poll> to be updated to
* the system time before calling poll().
*/
static inline void clock_measure_idle(void)
{
/* Let's compute the idle to work ratio. We worked between after_poll
* and before_poll, and slept between before_poll and date. The idle_pct
* is updated at most twice every second. Note that the current second
* rarely changes so we avoid a multiply when not needed.
*/
int delta;
if ((delta = date.tv_sec - before_poll.tv_sec))
delta *= 1000000;
idle_time += delta + (date.tv_usec - before_poll.tv_usec);
if ((delta = date.tv_sec - after_poll.tv_sec))
delta *= 1000000;
samp_time += delta + (date.tv_usec - after_poll.tv_usec);
after_poll.tv_sec = date.tv_sec; after_poll.tv_usec = date.tv_usec;
if (samp_time < 500000)
return;
HA_ATOMIC_STORE(&ti->idle_pct, (100ULL * idle_time + samp_time / 2) / samp_time);
idle_time = samp_time = 0;
}
/* Collect date and time information after leaving poll(). <timeout> must be
* set to the maximum sleep time passed to poll (in milliseconds), and
* <interrupted> must be zero if the poller reached the timeout or non-zero
* otherwise, which generally is provided by the poller's return value.
*/
void clock_leaving_poll(int timeout, int interrupted)
{
clock_measure_idle();
ti->prev_cpu_time = now_cpu_time();
ti->prev_mono_time = now_mono_time();
}
/* Collect date and time information before calling poll(). This will be used
* to count the run time of the past loop and the sleep time of the next poll.
* It also compares the elasped and cpu times during the activity period to
* estimate the amount of stolen time, which is reported if higher than half
* a millisecond.
*/
void clock_entering_poll(void)
{
uint64_t new_mono_time;
uint64_t new_cpu_time;
int64_t stolen;
gettimeofday(&before_poll, NULL);
new_cpu_time = now_cpu_time();
new_mono_time = now_mono_time();
if (ti->prev_cpu_time && ti->prev_mono_time) {
new_cpu_time -= ti->prev_cpu_time;
new_mono_time -= ti->prev_mono_time;
stolen = new_mono_time - new_cpu_time;
if (unlikely(stolen >= 500000)) {
stolen /= 500000;
/* more than half a millisecond difference might
* indicate an undesired preemption.
*/
report_stolen_time(stolen);
}
}
}
/* returns the current date as returned by gettimeofday() in ISO+microsecond
* format. It uses a thread-local static variable that the reader can consume
* for as long as it wants until next call. Thus, do not call it from a signal

View File

@ -189,7 +189,7 @@ static void _do_poll(struct poller *p, int exp, int wake)
/* now let's wait for polled events */
wait_time = wake ? 0 : compute_poll_timeout(exp);
sched_entering_poll();
clock_entering_poll();
activity_count_runtime();
do {
int timeout = (global.tune.options & GTUNE_BUSY_POLLING) ? 0 : wait_time;
@ -209,7 +209,7 @@ static void _do_poll(struct poller *p, int exp, int wake)
break;
} while (1);
sched_leaving_poll(wait_time, status);
clock_leaving_poll(wait_time, status);
thread_harmless_end();
thread_idle_end();

View File

@ -159,7 +159,7 @@ static void _do_poll(struct poller *p, int exp, int wake)
* Determine how long to wait for events to materialise on the port.
*/
wait_time = wake ? 0 : compute_poll_timeout(exp);
sched_entering_poll();
clock_entering_poll();
activity_count_runtime();
do {
@ -203,7 +203,7 @@ static void _do_poll(struct poller *p, int exp, int wake)
break;
} while(1);
sched_leaving_poll(wait_time, nevlist);
clock_leaving_poll(wait_time, nevlist);
thread_harmless_end();
thread_idle_end();

View File

@ -146,7 +146,7 @@ static void _do_poll(struct poller *p, int exp, int wake)
/* now let's wait for events */
wait_time = wake ? 0 : compute_poll_timeout(exp);
fd = global.tune.maxpollevents;
sched_entering_poll();
clock_entering_poll();
activity_count_runtime();
do {
@ -175,7 +175,7 @@ static void _do_poll(struct poller *p, int exp, int wake)
break;
} while (1);
sched_leaving_poll(wait_time, status);
clock_leaving_poll(wait_time, status);
thread_harmless_end();
thread_idle_end();

View File

@ -202,11 +202,11 @@ static void _do_poll(struct poller *p, int exp, int wake)
/* now let's wait for events */
wait_time = wake ? 0 : compute_poll_timeout(exp);
sched_entering_poll();
clock_entering_poll();
activity_count_runtime();
status = poll(poll_events, nbfd, wait_time);
clock_update_date(wait_time, status);
sched_leaving_poll(wait_time, status);
clock_leaving_poll(wait_time, status);
thread_harmless_end();
thread_idle_end();

View File

@ -173,7 +173,7 @@ static void _do_poll(struct poller *p, int exp, int wake)
delta_ms = wake ? 0 : compute_poll_timeout(exp);
delta.tv_sec = (delta_ms / 1000);
delta.tv_usec = (delta_ms % 1000) * 1000;
sched_entering_poll();
clock_entering_poll();
activity_count_runtime();
status = select(maxfd,
readnotnull ? tmp_evts[DIR_RD] : NULL,
@ -181,7 +181,7 @@ static void _do_poll(struct poller *p, int exp, int wake)
NULL,
&delta);
clock_update_date(delta_ms, status);
sched_leaving_poll(delta_ms, status);
clock_leaving_poll(delta_ms, status);
thread_harmless_end();
thread_idle_end();

View File

@ -3448,7 +3448,7 @@ static void stats_dump_html_info(struct stream_interface *si, struct uri_auth *u
actconn, pipes_used, pipes_used+pipes_free, read_freq_ctr(&global.conn_per_sec),
bps >= 1000000000UL ? (bps / 1000000000.0) : bps >= 1000000UL ? (bps / 1000000.0) : (bps / 1000.0),
bps >= 1000000000UL ? 'G' : bps >= 1000000UL ? 'M' : 'k',
total_run_queues(), total_allocated_tasks(), sched_report_idle()
total_run_queues(), total_allocated_tasks(), clock_report_idle()
);
/* scope_txt = search query, appctx->ctx.stats.scope_len is always <= STAT_SCOPE_TXT_MAXLEN */
@ -4481,7 +4481,7 @@ int stats_fill_info(struct field *info, int len, uint flags)
#endif
info[INF_TASKS] = mkf_u32(0, total_allocated_tasks());
info[INF_RUN_QUEUE] = mkf_u32(0, total_run_queues());
info[INF_IDLE_PCT] = mkf_u32(FN_AVG, sched_report_idle());
info[INF_IDLE_PCT] = mkf_u32(FN_AVG, clock_report_idle());
info[INF_NODE] = mkf_str(FO_CONFIG|FN_OUTPUT|FS_SERVICE, global.node);
if (global.desc)
info[INF_DESCRIPTION] = mkf_str(FO_CONFIG|FN_OUTPUT|FS_SERVICE, global.desc);

View File

@ -38,10 +38,6 @@ DECLARE_POOL(pool_head_notification, "notification", sizeof(struct notification)
volatile unsigned long global_tasks_mask = 0; /* Mask of threads with tasks in the global runqueue */
unsigned int niced_tasks = 0; /* number of niced tasks in the run queue */
/* used for idle time calculation */
THREAD_LOCAL unsigned int samp_time = 0; /* total elapsed time over current sample */
THREAD_LOCAL unsigned int idle_time = 0; /* total idle time over current sample */
THREAD_LOCAL struct task_per_thread *sched = &task_per_thread[0]; /* scheduler context for the current thread */
__decl_aligned_spinlock(rq_lock); /* spin lock related to run queue */
@ -865,95 +861,6 @@ void process_runnable_tasks()
activity[tid].long_rq++;
}
/* report the average CPU idle percentage over all running threads, between 0 and 100 */
uint sched_report_idle()
{
uint total = 0;
uint rthr = 0;
uint thr;
for (thr = 0; thr < MAX_THREADS; thr++) {
if (!(all_threads_mask & (1UL << thr)))
continue;
total += HA_ATOMIC_LOAD(&ha_thread_info[thr].idle_pct);
rthr++;
}
return rthr ? total / rthr : 0;
}
/* Update the idle time value twice a second, to be called after
* clock_update_date() when called after poll(), and currently called only by
* sched_leaving_poll() below. It relies on <before_poll> to be updated to
* the system time before calling poll().
*/
static inline void sched_measure_idle()
{
/* Let's compute the idle to work ratio. We worked between after_poll
* and before_poll, and slept between before_poll and date. The idle_pct
* is updated at most twice every second. Note that the current second
* rarely changes so we avoid a multiply when not needed.
*/
int delta;
if ((delta = date.tv_sec - before_poll.tv_sec))
delta *= 1000000;
idle_time += delta + (date.tv_usec - before_poll.tv_usec);
if ((delta = date.tv_sec - after_poll.tv_sec))
delta *= 1000000;
samp_time += delta + (date.tv_usec - after_poll.tv_usec);
after_poll.tv_sec = date.tv_sec; after_poll.tv_usec = date.tv_usec;
if (samp_time < 500000)
return;
HA_ATOMIC_STORE(&ti->idle_pct, (100ULL * idle_time + samp_time / 2) / samp_time);
idle_time = samp_time = 0;
}
/* Collect date and time information after leaving poll(). <timeout> must be
* set to the maximum sleep time passed to poll (in milliseconds), and
* <interrupted> must be zero if the poller reached the timeout or non-zero
* otherwise, which generally is provided by the poller's return value.
*/
void sched_leaving_poll(int timeout, int interrupted)
{
sched_measure_idle();
ti->prev_cpu_time = now_cpu_time();
ti->prev_mono_time = now_mono_time();
}
/* Collect date and time information before calling poll(). This will be used
* to count the run time of the past loop and the sleep time of the next poll.
* It also compares the elasped and cpu times during the activity period to
* estimate the amount of stolen time, which is reported if higher than half
* a millisecond.
*/
void sched_entering_poll()
{
uint64_t new_mono_time;
uint64_t new_cpu_time;
int64_t stolen;
gettimeofday(&before_poll, NULL);
new_cpu_time = now_cpu_time();
new_mono_time = now_mono_time();
if (ti->prev_cpu_time && ti->prev_mono_time) {
new_cpu_time -= ti->prev_cpu_time;
new_mono_time -= ti->prev_mono_time;
stolen = new_mono_time - new_cpu_time;
if (unlikely(stolen >= 500000)) {
stolen /= 500000;
/* more than half a millisecond difference might
* indicate an undesired preemption.
*/
report_stolen_time(stolen);
}
}
}
/*
* Delete every tasks before running the master polling loop
*/