From 6a28a30efaa8d6f6355cc4b1e9c8b5aeded661e4 Mon Sep 17 00:00:00 2001
From: Willy Tarreau <w@1wt.eu>
Date: Wed, 7 Sep 2022 09:17:45 +0200
Subject: [PATCH] MINOR: tasks: do not keep cpu and latency times in struct
 task

It was a mistake to put these two fields in the struct task. This
was added in 1.9 via commit 9efd7456e ("MEDIUM: tasks: collect per-task
CPU time and latency"). These fields are used solely by streams in
order to report the measurements via the lat_ns* and cpu_ns* sample
fetch functions when task profiling is enabled. For the rest of the
tasks, this is pure CPU waste when profiling is enabled, and memory
waste 100% of the time, as the point where these latencies and usages
are measured is in the profiling array.

Let's move the fields to the stream instead, and have process_stream()
retrieve the relevant info from the thread's context.

The struct task is now back to 120 bytes, i.e. almost two cache lines,
with 32 bit still available.
---
 include/haproxy/stream-t.h |  2 ++
 include/haproxy/task-t.h   |  2 --
 include/haproxy/task.h     |  2 --
 src/sample.c               |  8 ++++----
 src/stream.c               | 39 ++++++++++++++++++++++++++++++--------
 src/task.c                 |  4 +---
 6 files changed, 38 insertions(+), 19 deletions(-)

diff --git a/include/haproxy/stream-t.h b/include/haproxy/stream-t.h
index a09e541cb7..a2928e5de9 100644
--- a/include/haproxy/stream-t.h
+++ b/include/haproxy/stream-t.h
@@ -168,6 +168,8 @@ struct stream {
 	struct list back_refs;          /* list of users tracking this stream */
 	struct buffer_wait buffer_wait; /* position in the list of objects waiting for a buffer */
 
+	uint64_t lat_time;		/* total latency time experienced */
+	uint64_t cpu_time;              /* total CPU time consumed */
 	struct freq_ctr call_rate;      /* stream task call rate without making progress */
 
 	short store_count;
diff --git a/include/haproxy/task-t.h b/include/haproxy/task-t.h
index 33a7f070ef..373ac297c3 100644
--- a/include/haproxy/task-t.h
+++ b/include/haproxy/task-t.h
@@ -113,8 +113,6 @@ struct task {
 	short nice;                     /* task prio from -1024 to +1024 */
 	short tid;                      /* TID where it's allowed to run, <0 if anywhere */
 	uint32_t wake_date;		/* date of the last task wakeup */
-	uint64_t lat_time;		/* total latency time experienced */
-	uint64_t cpu_time;              /* total CPU time consumed */
 };
 
 /* lightweight tasks, without priority, mainly used for I/Os */
diff --git a/include/haproxy/task.h b/include/haproxy/task.h
index 33be63f0c2..a711c2740d 100644
--- a/include/haproxy/task.h
+++ b/include/haproxy/task.h
@@ -498,8 +498,6 @@ static inline struct task *task_init(struct task *t, int tid)
 	t->nice = 0;
 	t->calls = 0;
 	t->wake_date = 0;
-	t->cpu_time = 0;
-	t->lat_time = 0;
 	t->expire = TICK_ETERNITY;
 #ifdef DEBUG_TASK
 	t->debug.caller_idx = 0;
diff --git a/src/sample.c b/src/sample.c
index 852cd0418f..cd9fccf85e 100644
--- a/src/sample.c
+++ b/src/sample.c
@@ -4136,7 +4136,7 @@ smp_fetch_cpu_ns_avg(const struct arg *args, struct sample *smp, const char *kw,
 		return 0;
 
 	smp->data.type = SMP_T_SINT;
-	smp->data.u.sint = smp->strm->task->calls ? smp->strm->task->cpu_time / smp->strm->task->calls : 0;
+	smp->data.u.sint = smp->strm->task->calls ? smp->strm->cpu_time / smp->strm->task->calls : 0;
 	return 1;
 }
 
@@ -4148,7 +4148,7 @@ smp_fetch_cpu_ns_tot(const struct arg *args, struct sample *smp, const char *kw,
 		return 0;
 
 	smp->data.type = SMP_T_SINT;
-	smp->data.u.sint = smp->strm->task->cpu_time;
+	smp->data.u.sint = smp->strm->cpu_time;
 	return 1;
 }
 
@@ -4160,7 +4160,7 @@ smp_fetch_lat_ns_avg(const struct arg *args, struct sample *smp, const char *kw,
 		return 0;
 
 	smp->data.type = SMP_T_SINT;
-	smp->data.u.sint = smp->strm->task->calls ? smp->strm->task->lat_time / smp->strm->task->calls : 0;
+	smp->data.u.sint = smp->strm->task->calls ? smp->strm->lat_time / smp->strm->task->calls : 0;
 	return 1;
 }
 
@@ -4172,7 +4172,7 @@ smp_fetch_lat_ns_tot(const struct arg *args, struct sample *smp, const char *kw,
 		return 0;
 
 	smp->data.type = SMP_T_SINT;
-	smp->data.u.sint = smp->strm->task->lat_time;
+	smp->data.u.sint = smp->strm->lat_time;
 	return 1;
 }
 
diff --git a/src/stream.c b/src/stream.c
index 8d62754054..25a50e7a1d 100644
--- a/src/stream.c
+++ b/src/stream.c
@@ -407,6 +407,7 @@ struct stream *stream_new(struct session *sess, struct stconn *sc, struct buffer
 	s->buffer_wait.target = s;
 	s->buffer_wait.wakeup_cb = stream_buf_available;
 
+	s->lat_time = s->cpu_time = 0;
 	s->call_rate.curr_tick = s->call_rate.curr_ctr = s->call_rate.prev_ctr = 0;
 	s->pcli_next_pid = 0;
 	s->pcli_flags = 0;
@@ -1556,6 +1557,17 @@ static void stream_update_both_sc(struct stream *s)
 	}
 }
 
+/* if the current task's wake_date was set, it's being profiled, thus we may
+ * report latencies and CPU usages in logs, so it's desirable to update the
+ * latency when entering process_stream().
+ */
+static void stream_cond_update_cpu_latency(struct stream *s)
+{
+	uint32_t lat = th_ctx->sched_call_date - th_ctx->sched_wake_date;
+
+	s->lat_time += lat;
+}
+
 /* if the current task's wake_date was set, it's being profiled, thus we may
  * report latencies and CPU usages in logs, so it's desirable to do that before
  * logging in order to report accurate CPU usage. In this case we count that
@@ -1573,11 +1585,23 @@ static void stream_cond_update_cpu_usage(struct stream *s)
 		return;
 
 	cpu = (uint32_t)now_mono_time() - th_ctx->sched_call_date;
-	s->task->cpu_time += cpu;
+	s->cpu_time += cpu;
 	HA_ATOMIC_ADD(&th_ctx->sched_profile_entry->cpu_time, cpu);
 	th_ctx->sched_wake_date = 0;
 }
 
+/* this functions is called directly by the scheduler for tasks whose
+ * ->process points to process_stream(), and is used to keep latencies
+ * and CPU usage measurements accurate.
+ */
+void stream_update_timings(struct task *t, uint64_t lat, uint64_t cpu)
+{
+	struct stream *s = t->context;
+	s->lat_time += lat;
+	s->cpu_time += cpu;
+}
+
+
 /* This macro is very specific to the function below. See the comments in
  * process_stream() below to understand the logic and the tests.
  */
@@ -1649,6 +1673,7 @@ struct task *process_stream(struct task *t, void *context, unsigned int state)
 	DBG_TRACE_ENTER(STRM_EV_STRM_PROC, s);
 
 	activity[tid].stream_calls++;
+	stream_cond_update_cpu_latency(s);
 
 	req = &s->req;
 	res = &s->res;
@@ -3376,14 +3401,13 @@ static int stats_dump_full_strm_to_buffer(struct stconn *sc, struct stream *strm
 		}
 		else if ((tmpctx = sc_appctx(scf)) != NULL) {
 			chunk_appendf(&trash,
-			              "      app0=%p st0=%d st1=%d applet=%s tid=%d nice=%d calls=%u rate=%u cpu=%llu lat=%llu\n",
+			              "      app0=%p st0=%d st1=%d applet=%s tid=%d nice=%d calls=%u rate=%u\n",
 				      tmpctx,
 				      tmpctx->st0,
 				      tmpctx->st1,
 			              tmpctx->applet->name,
 			              tmpctx->t->tid,
-			              tmpctx->t->nice, tmpctx->t->calls, read_freq_ctr(&tmpctx->call_rate),
-			              (unsigned long long)tmpctx->t->cpu_time, (unsigned long long)tmpctx->t->lat_time);
+			              tmpctx->t->nice, tmpctx->t->calls, read_freq_ctr(&tmpctx->call_rate));
 		}
 
 		scb = strm->scb;
@@ -3419,14 +3443,13 @@ static int stats_dump_full_strm_to_buffer(struct stconn *sc, struct stream *strm
 		}
 		else if ((tmpctx = sc_appctx(scb)) != NULL) {
 			chunk_appendf(&trash,
-			              "      app1=%p st0=%d st1=%d applet=%s tid=%d nice=%d calls=%u rate=%u cpu=%llu lat=%llu\n",
+			              "      app1=%p st0=%d st1=%d applet=%s tid=%d nice=%d calls=%u rate=%u\n",
 				      tmpctx,
 				      tmpctx->st0,
 				      tmpctx->st1,
 			              tmpctx->applet->name,
 			              tmpctx->t->tid,
-			              tmpctx->t->nice, tmpctx->t->calls, read_freq_ctr(&tmpctx->call_rate),
-			              (unsigned long long)tmpctx->t->cpu_time, (unsigned long long)tmpctx->t->lat_time);
+			              tmpctx->t->nice, tmpctx->t->calls, read_freq_ctr(&tmpctx->call_rate));
 		}
 
 		chunk_appendf(&trash,
@@ -3682,7 +3705,7 @@ static int cli_io_handler_dump_sess(struct appctx *appctx)
 		             curr_strm->task->state, curr_strm->stream_epoch,
 			     human_time(now.tv_sec - curr_strm->logs.tv_accept.tv_sec, 1),
 		             curr_strm->task->calls, read_freq_ctr(&curr_strm->call_rate),
-		             (unsigned long long)curr_strm->task->cpu_time, (unsigned long long)curr_strm->task->lat_time);
+		             (unsigned long long)curr_strm->cpu_time, (unsigned long long)curr_strm->lat_time);
 
 		chunk_appendf(&trash,
 			     " rq[f=%06xh,i=%u,an=%02xh,rx=%s",
diff --git a/src/task.c b/src/task.c
index 3d78680774..bb25d508af 100644
--- a/src/task.c
+++ b/src/task.c
@@ -25,6 +25,7 @@
 #include <haproxy/tools.h>
 
 extern struct task *process_stream(struct task *t, void *context, unsigned int state);
+extern void stream_update_timings(struct task *t, uint64_t lat, uint64_t cpu);
 
 DECLARE_POOL(pool_head_task,    "task",    sizeof(struct task));
 DECLARE_POOL(pool_head_tasklet, "tasklet", sizeof(struct tasklet));
@@ -611,7 +612,6 @@ unsigned int run_tasks_from_lists(unsigned int budgets[])
 			uint32_t now_ns = now_mono_time();
 			uint32_t lat = now_ns - t->wake_date;
 
-			t->lat_time += lat;
 			t->wake_date = 0;
 			th_ctx->sched_call_date = now_ns;
 			profile_entry = sched_activity_entry(sched_activity, t->process);
@@ -669,8 +669,6 @@ unsigned int run_tasks_from_lists(unsigned int budgets[])
 		if (unlikely(th_ctx->sched_wake_date)) {
 			uint32_t cpu = (uint32_t)now_mono_time() - th_ctx->sched_call_date;
 
-			if (t)
-				t->cpu_time += cpu;
 			HA_ATOMIC_ADD(&profile_entry->cpu_time, cpu);
 		}