MINOR: task: move profiling bit to per-thread

Instead of having a global mask of all the profiled threads, let's have
one flag per thread in each thread's flags. They are never accessed more
than one at a time an are better located inside the threads' contexts for
both performance and scalability.
This commit is contained in:
Willy Tarreau 2022-06-13 15:59:39 +02:00
parent 040955fb39
commit 680ed5f28b
7 changed files with 14 additions and 14 deletions

View File

@ -60,10 +60,11 @@ Task creation currently takes a thread mask of either tid_bit, a specific
mask, or MAX_THREADS_MASK. How to create a task able to run anywhere mask, or MAX_THREADS_MASK. How to create a task able to run anywhere
(checks, Lua, ...) ? (checks, Lua, ...) ?
Profiling Profiling -> completed
--------- ---------
There should be one task_profiling_mask per thread group. Enabling or There should be one task_profiling_mask per thread group. Enabling or
disabling profiling should be made per group (possibly by iterating). disabling profiling should be made per group (possibly by iterating).
-> not needed anymore, one flag per thread in each thread's context.
Thread isolation Thread isolation
---------------- ----------------
@ -405,7 +406,7 @@ the FD, and all those with a bit in it ought to do it.
- lock debugging must reproduce tgid - lock debugging must reproduce tgid
- task profiling must be made per-group (annoying), unless we want to add a * task profiling must be made per-group (annoying), unless we want to add a
per-thread TH_FL_* flag and have the rare places where the bit is changed per-thread TH_FL_* flag and have the rare places where the bit is changed
iterate over all threads if needed. Sounds preferable overall. iterate over all threads if needed. Sounds preferable overall.

View File

@ -26,7 +26,6 @@
#include <haproxy/api.h> #include <haproxy/api.h>
extern unsigned int profiling; extern unsigned int profiling;
extern unsigned long task_profiling_mask;
extern struct activity activity[MAX_THREADS]; extern struct activity activity[MAX_THREADS];
extern struct sched_activity sched_activity[256]; extern struct sched_activity sched_activity[256];

View File

@ -401,7 +401,7 @@ static inline void _tasklet_wakeup_on(struct tasklet *tl, int thr, const char *f
tl->debug.caller_idx = !tl->debug.caller_idx; tl->debug.caller_idx = !tl->debug.caller_idx;
tl->debug.caller_file[tl->debug.caller_idx] = file; tl->debug.caller_file[tl->debug.caller_idx] = file;
tl->debug.caller_line[tl->debug.caller_idx] = line; tl->debug.caller_line[tl->debug.caller_idx] = line;
if (task_profiling_mask & tid_bit) if (th_ctx->flags & TH_FL_TASK_PROFILING)
tl->call_date = now_mono_time(); tl->call_date = now_mono_time();
#endif #endif
__tasklet_wakeup_on(tl, thr); __tasklet_wakeup_on(tl, thr);
@ -452,7 +452,7 @@ static inline void _task_instant_wakeup(struct task *t, unsigned int f, const ch
tl->debug.caller_idx = !tl->debug.caller_idx; tl->debug.caller_idx = !tl->debug.caller_idx;
tl->debug.caller_file[tl->debug.caller_idx] = file; tl->debug.caller_file[tl->debug.caller_idx] = file;
tl->debug.caller_line[tl->debug.caller_idx] = line; tl->debug.caller_line[tl->debug.caller_idx] = line;
if (task_profiling_mask & tid_bit) if (th_ctx->flags & TH_FL_TASK_PROFILING)
tl->call_date = now_mono_time(); tl->call_date = now_mono_time();
#endif #endif
__tasklet_wakeup_on(tl, thr); __tasklet_wakeup_on(tl, thr);

View File

@ -37,6 +37,7 @@ enum {
/* thread_ctx flags, for ha_thread_ctx[].flags */ /* thread_ctx flags, for ha_thread_ctx[].flags */
#define TH_FL_STUCK 0x00000001 #define TH_FL_STUCK 0x00000001
#define TH_FL_TASK_PROFILING 0x00000002
/* Thread group information. This defines a base and a count of global thread /* Thread group information. This defines a base and a count of global thread
* IDs which belong to it, and which can be looked up into thread_info/ctx. It * IDs which belong to it, and which can be looked up into thread_info/ctx. It

View File

@ -42,7 +42,6 @@ struct show_prof_ctx {
/* bit field of profiling options. Beware, may be modified at runtime! */ /* bit field of profiling options. Beware, may be modified at runtime! */
unsigned int profiling __read_mostly = HA_PROF_TASKS_AOFF; unsigned int profiling __read_mostly = HA_PROF_TASKS_AOFF;
unsigned long task_profiling_mask __read_mostly = 0;
/* One struct per thread containing all collected measurements */ /* One struct per thread containing all collected measurements */
struct activity activity[MAX_THREADS] __attribute__((aligned(64))) = { }; struct activity activity[MAX_THREADS] __attribute__((aligned(64))) = { };
@ -384,16 +383,16 @@ void activity_count_runtime(uint32_t run_time)
* profiling to "on" when automatic, and going back below the "down" * profiling to "on" when automatic, and going back below the "down"
* threshold switches to off. The forced modes don't check the load. * threshold switches to off. The forced modes don't check the load.
*/ */
if (!(task_profiling_mask & tid_bit)) { if (!(th_ctx->flags & TH_FL_TASK_PROFILING)) {
if (unlikely((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_ON || if (unlikely((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_ON ||
((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_AON && ((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_AON &&
swrate_avg(run_time, TIME_STATS_SAMPLES) >= up))) swrate_avg(run_time, TIME_STATS_SAMPLES) >= up)))
_HA_ATOMIC_OR(&task_profiling_mask, tid_bit); _HA_ATOMIC_OR(&th_ctx->flags, TH_FL_TASK_PROFILING);
} else { } else {
if (unlikely((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_OFF || if (unlikely((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_OFF ||
((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_AOFF && ((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_AOFF &&
swrate_avg(run_time, TIME_STATS_SAMPLES) <= down))) swrate_avg(run_time, TIME_STATS_SAMPLES) <= down)))
_HA_ATOMIC_AND(&task_profiling_mask, ~tid_bit); _HA_ATOMIC_AND(&th_ctx->flags, ~TH_FL_TASK_PROFILING);
} }
} }

View File

@ -179,12 +179,12 @@ void ha_thread_dump(struct buffer *buf, int thr, int calling_tid)
ha_thread_ctx[thr].rq_total, ha_thread_ctx[thr].rq_total,
ha_thread_info[thr].tg->tgid, ha_thread_info[thr].ltid + 1, ha_thread_info[thr].tg->tgid, ha_thread_info[thr].ltid + 1,
stuck, stuck,
!!(task_profiling_mask & thr_bit)); !!(th_ctx->flags & TH_FL_TASK_PROFILING));
chunk_appendf(buf, chunk_appendf(buf,
" harmless=%d wantrdv=%d", " harmless=%d wantrdv=%d",
!!(threads_harmless_mask & thr_bit), !!(threads_harmless_mask & thr_bit),
!!(threads_want_rdv_mask & thr_bit)); !!(th_ctx->flags & TH_FL_TASK_PROFILING));
chunk_appendf(buf, "\n"); chunk_appendf(buf, "\n");
chunk_appendf(buf, " cpu_ns: poll=%llu now=%llu diff=%llu\n", p, n, n-p); chunk_appendf(buf, " cpu_ns: poll=%llu now=%llu diff=%llu\n", p, n, n-p);

View File

@ -218,7 +218,7 @@ void __task_wakeup(struct task *t)
t->rq.key += offset; t->rq.key += offset;
} }
if (task_profiling_mask & tid_bit) if (th_ctx->flags & TH_FL_TASK_PROFILING)
t->call_date = now_mono_time(); t->call_date = now_mono_time();
eb32sc_insert(root, &t->rq, t->thread_mask); eb32sc_insert(root, &t->rq, t->thread_mask);
@ -562,7 +562,7 @@ unsigned int run_tasks_from_lists(unsigned int budgets[])
LIST_DEL_INIT(&((struct tasklet *)t)->list); LIST_DEL_INIT(&((struct tasklet *)t)->list);
__ha_barrier_store(); __ha_barrier_store();
if (unlikely(task_profiling_mask & tid_bit)) { if (unlikely(th_ctx->flags & TH_FL_TASK_PROFILING)) {
profile_entry = sched_activity_entry(sched_activity, t->process); profile_entry = sched_activity_entry(sched_activity, t->process);
before = now_mono_time(); before = now_mono_time();
#ifdef DEBUG_TASK #ifdef DEBUG_TASK
@ -587,7 +587,7 @@ unsigned int run_tasks_from_lists(unsigned int budgets[])
continue; continue;
} }
if (unlikely(task_profiling_mask & tid_bit)) { if (unlikely(th_ctx->flags & TH_FL_TASK_PROFILING)) {
HA_ATOMIC_INC(&profile_entry->calls); HA_ATOMIC_INC(&profile_entry->calls);
HA_ATOMIC_ADD(&profile_entry->cpu_time, now_mono_time() - before); HA_ATOMIC_ADD(&profile_entry->cpu_time, now_mono_time() - before);
} }