MINOR: tasks: do not keep cpu and latency times in struct task

It was a mistake to put these two fields in the struct task. This
was added in 1.9 via commit 9efd7456e ("MEDIUM: tasks: collect per-task
CPU time and latency"). These fields are used solely by streams in
order to report the measurements via the lat_ns* and cpu_ns* sample
fetch functions when task profiling is enabled. For the rest of the
tasks, this is pure CPU waste when profiling is enabled, and memory
waste 100% of the time, as the point where these latencies and usages
are measured is in the profiling array.

Let's move the fields to the stream instead, and have process_stream()
retrieve the relevant info from the thread's context.

The struct task is now back to 120 bytes, i.e. almost two cache lines,
with 32 bit still available.
This commit is contained in:
Willy Tarreau 2022-09-07 09:17:45 +02:00
parent beee600491
commit 6a28a30efa
6 changed files with 38 additions and 19 deletions

View File

@ -168,6 +168,8 @@ struct stream {
struct list back_refs; /* list of users tracking this stream */
struct buffer_wait buffer_wait; /* position in the list of objects waiting for a buffer */
uint64_t lat_time; /* total latency time experienced */
uint64_t cpu_time; /* total CPU time consumed */
struct freq_ctr call_rate; /* stream task call rate without making progress */
short store_count;

View File

@ -113,8 +113,6 @@ struct task {
short nice; /* task prio from -1024 to +1024 */
short tid; /* TID where it's allowed to run, <0 if anywhere */
uint32_t wake_date; /* date of the last task wakeup */
uint64_t lat_time; /* total latency time experienced */
uint64_t cpu_time; /* total CPU time consumed */
};
/* lightweight tasks, without priority, mainly used for I/Os */

View File

@ -498,8 +498,6 @@ static inline struct task *task_init(struct task *t, int tid)
t->nice = 0;
t->calls = 0;
t->wake_date = 0;
t->cpu_time = 0;
t->lat_time = 0;
t->expire = TICK_ETERNITY;
#ifdef DEBUG_TASK
t->debug.caller_idx = 0;

View File

@ -4136,7 +4136,7 @@ smp_fetch_cpu_ns_avg(const struct arg *args, struct sample *smp, const char *kw,
return 0;
smp->data.type = SMP_T_SINT;
smp->data.u.sint = smp->strm->task->calls ? smp->strm->task->cpu_time / smp->strm->task->calls : 0;
smp->data.u.sint = smp->strm->task->calls ? smp->strm->cpu_time / smp->strm->task->calls : 0;
return 1;
}
@ -4148,7 +4148,7 @@ smp_fetch_cpu_ns_tot(const struct arg *args, struct sample *smp, const char *kw,
return 0;
smp->data.type = SMP_T_SINT;
smp->data.u.sint = smp->strm->task->cpu_time;
smp->data.u.sint = smp->strm->cpu_time;
return 1;
}
@ -4160,7 +4160,7 @@ smp_fetch_lat_ns_avg(const struct arg *args, struct sample *smp, const char *kw,
return 0;
smp->data.type = SMP_T_SINT;
smp->data.u.sint = smp->strm->task->calls ? smp->strm->task->lat_time / smp->strm->task->calls : 0;
smp->data.u.sint = smp->strm->task->calls ? smp->strm->lat_time / smp->strm->task->calls : 0;
return 1;
}
@ -4172,7 +4172,7 @@ smp_fetch_lat_ns_tot(const struct arg *args, struct sample *smp, const char *kw,
return 0;
smp->data.type = SMP_T_SINT;
smp->data.u.sint = smp->strm->task->lat_time;
smp->data.u.sint = smp->strm->lat_time;
return 1;
}

View File

@ -407,6 +407,7 @@ struct stream *stream_new(struct session *sess, struct stconn *sc, struct buffer
s->buffer_wait.target = s;
s->buffer_wait.wakeup_cb = stream_buf_available;
s->lat_time = s->cpu_time = 0;
s->call_rate.curr_tick = s->call_rate.curr_ctr = s->call_rate.prev_ctr = 0;
s->pcli_next_pid = 0;
s->pcli_flags = 0;
@ -1556,6 +1557,17 @@ static void stream_update_both_sc(struct stream *s)
}
}
/* if the current task's wake_date was set, it's being profiled, thus we may
* report latencies and CPU usages in logs, so it's desirable to update the
* latency when entering process_stream().
*/
static void stream_cond_update_cpu_latency(struct stream *s)
{
uint32_t lat = th_ctx->sched_call_date - th_ctx->sched_wake_date;
s->lat_time += lat;
}
/* if the current task's wake_date was set, it's being profiled, thus we may
* report latencies and CPU usages in logs, so it's desirable to do that before
* logging in order to report accurate CPU usage. In this case we count that
@ -1573,11 +1585,23 @@ static void stream_cond_update_cpu_usage(struct stream *s)
return;
cpu = (uint32_t)now_mono_time() - th_ctx->sched_call_date;
s->task->cpu_time += cpu;
s->cpu_time += cpu;
HA_ATOMIC_ADD(&th_ctx->sched_profile_entry->cpu_time, cpu);
th_ctx->sched_wake_date = 0;
}
/* this functions is called directly by the scheduler for tasks whose
* ->process points to process_stream(), and is used to keep latencies
* and CPU usage measurements accurate.
*/
void stream_update_timings(struct task *t, uint64_t lat, uint64_t cpu)
{
struct stream *s = t->context;
s->lat_time += lat;
s->cpu_time += cpu;
}
/* This macro is very specific to the function below. See the comments in
* process_stream() below to understand the logic and the tests.
*/
@ -1649,6 +1673,7 @@ struct task *process_stream(struct task *t, void *context, unsigned int state)
DBG_TRACE_ENTER(STRM_EV_STRM_PROC, s);
activity[tid].stream_calls++;
stream_cond_update_cpu_latency(s);
req = &s->req;
res = &s->res;
@ -3376,14 +3401,13 @@ static int stats_dump_full_strm_to_buffer(struct stconn *sc, struct stream *strm
}
else if ((tmpctx = sc_appctx(scf)) != NULL) {
chunk_appendf(&trash,
" app0=%p st0=%d st1=%d applet=%s tid=%d nice=%d calls=%u rate=%u cpu=%llu lat=%llu\n",
" app0=%p st0=%d st1=%d applet=%s tid=%d nice=%d calls=%u rate=%u\n",
tmpctx,
tmpctx->st0,
tmpctx->st1,
tmpctx->applet->name,
tmpctx->t->tid,
tmpctx->t->nice, tmpctx->t->calls, read_freq_ctr(&tmpctx->call_rate),
(unsigned long long)tmpctx->t->cpu_time, (unsigned long long)tmpctx->t->lat_time);
tmpctx->t->nice, tmpctx->t->calls, read_freq_ctr(&tmpctx->call_rate));
}
scb = strm->scb;
@ -3419,14 +3443,13 @@ static int stats_dump_full_strm_to_buffer(struct stconn *sc, struct stream *strm
}
else if ((tmpctx = sc_appctx(scb)) != NULL) {
chunk_appendf(&trash,
" app1=%p st0=%d st1=%d applet=%s tid=%d nice=%d calls=%u rate=%u cpu=%llu lat=%llu\n",
" app1=%p st0=%d st1=%d applet=%s tid=%d nice=%d calls=%u rate=%u\n",
tmpctx,
tmpctx->st0,
tmpctx->st1,
tmpctx->applet->name,
tmpctx->t->tid,
tmpctx->t->nice, tmpctx->t->calls, read_freq_ctr(&tmpctx->call_rate),
(unsigned long long)tmpctx->t->cpu_time, (unsigned long long)tmpctx->t->lat_time);
tmpctx->t->nice, tmpctx->t->calls, read_freq_ctr(&tmpctx->call_rate));
}
chunk_appendf(&trash,
@ -3682,7 +3705,7 @@ static int cli_io_handler_dump_sess(struct appctx *appctx)
curr_strm->task->state, curr_strm->stream_epoch,
human_time(now.tv_sec - curr_strm->logs.tv_accept.tv_sec, 1),
curr_strm->task->calls, read_freq_ctr(&curr_strm->call_rate),
(unsigned long long)curr_strm->task->cpu_time, (unsigned long long)curr_strm->task->lat_time);
(unsigned long long)curr_strm->cpu_time, (unsigned long long)curr_strm->lat_time);
chunk_appendf(&trash,
" rq[f=%06xh,i=%u,an=%02xh,rx=%s",

View File

@ -25,6 +25,7 @@
#include <haproxy/tools.h>
extern struct task *process_stream(struct task *t, void *context, unsigned int state);
extern void stream_update_timings(struct task *t, uint64_t lat, uint64_t cpu);
DECLARE_POOL(pool_head_task, "task", sizeof(struct task));
DECLARE_POOL(pool_head_tasklet, "tasklet", sizeof(struct tasklet));
@ -611,7 +612,6 @@ unsigned int run_tasks_from_lists(unsigned int budgets[])
uint32_t now_ns = now_mono_time();
uint32_t lat = now_ns - t->wake_date;
t->lat_time += lat;
t->wake_date = 0;
th_ctx->sched_call_date = now_ns;
profile_entry = sched_activity_entry(sched_activity, t->process);
@ -669,8 +669,6 @@ unsigned int run_tasks_from_lists(unsigned int budgets[])
if (unlikely(th_ctx->sched_wake_date)) {
uint32_t cpu = (uint32_t)now_mono_time() - th_ctx->sched_call_date;
if (t)
t->cpu_time += cpu;
HA_ATOMIC_ADD(&profile_entry->cpu_time, cpu);
}