mirror of
http://git.haproxy.org/git/haproxy.git/
synced 2025-04-21 06:25:43 +00:00
MINOR: threads: add a "stuck" flag to the thread_info struct
This flag is constantly cleared by the scheduler and will be set by the watchdog timer to detect stuck threads. It is also set by the "show threads" command so that it is easy to spot if the situation has evolved between two subsequent calls : if the first "show threads" shows no stuck thread and the second one shows such a stuck thread, it indicates that this thread didn't manage to make any forward progress since the previous call, which is extremely suspicious.
This commit is contained in:
parent
578ea8be55
commit
e6a02fa65a
@ -2525,11 +2525,18 @@ show threads
|
||||
an advanced dump mechanism involving thread signals is used so that each
|
||||
thread can dump its own state in turn. Without this option, the thread
|
||||
processing the command shows all its details but the other ones are less
|
||||
detailed. A stat ('*') is displayed in front of the thread handling the
|
||||
command. The output format is purposely not documented so that it can easily
|
||||
evolve as new needs are identified, without having to maintain any backwards
|
||||
compatibility, and just like with "show activity", the values are only
|
||||
meaningful with the code at hand.
|
||||
detailed. A star ('*') is displayed in front of the thread handling the
|
||||
command. A right angle bracket ('>') may also be displayed in front of
|
||||
threads which didn't make any progress since last invocation of this command,
|
||||
indicating a bug in the code which must absolutely be reported. When this
|
||||
happens between two threads it usually indicates a deadlock. If a thread is
|
||||
alone, it's a different bug like a corrupted list. In all cases the process
|
||||
needs is not fully functional anymore and needs to be restarted.
|
||||
|
||||
The output format is purposely not documented so that it can easily evolve as
|
||||
new needs are identified, without having to maintain any form of backwards
|
||||
compatibility, and just like with "show activity", the values are meaningless
|
||||
without the code at hand.
|
||||
|
||||
show tls-keys [id|*]
|
||||
Dump all loaded TLS ticket keys references. The TLS ticket key reference ID
|
||||
|
@ -38,6 +38,10 @@
|
||||
* only one thread is enabled, it equals 1.
|
||||
*/
|
||||
|
||||
/* thread info flags, for thread_info[].flags */
|
||||
#define TI_FL_STUCK 0x00000001
|
||||
|
||||
|
||||
#ifndef USE_THREAD
|
||||
|
||||
#define MAX_THREADS 1
|
||||
@ -57,6 +61,7 @@ extern struct thread_info {
|
||||
uint64_t prev_cpu_time; /* previous per thread CPU time */
|
||||
uint64_t prev_mono_time; /* previous system wide monotonic time */
|
||||
unsigned int idle_pct; /* idle to total ratio over last sample (percent) */
|
||||
unsigned int flags; /* thread info flags, TI_FL_* */
|
||||
/* pad to cache line (64B) */
|
||||
char __pad[0]; /* unused except to check remaining room */
|
||||
char __end[0] __attribute__((aligned(64)));
|
||||
@ -405,6 +410,7 @@ extern struct thread_info {
|
||||
uint64_t prev_cpu_time; /* previous per thread CPU time */
|
||||
uint64_t prev_mono_time; /* previous system wide monotonic time */
|
||||
unsigned int idle_pct; /* idle to total ratio over last sample (percent) */
|
||||
unsigned int flags; /* thread info flags, TI_FL_* */
|
||||
/* pad to cache line (64B) */
|
||||
char __pad[0]; /* unused except to check remaining room */
|
||||
char __end[0] __attribute__((aligned(64)));
|
||||
|
17
src/debug.c
17
src/debug.c
@ -33,18 +33,20 @@
|
||||
* optionally extra info for the current thread. The dump will be appended to
|
||||
* the buffer, so the caller is responsible for preliminary initializing it.
|
||||
* The calling thread ID needs to be passed in <calling_tid> to display a star
|
||||
* in front of the calling thread's line (usually it's tid).
|
||||
* in front of the calling thread's line (usually it's tid). Any stuck thread
|
||||
* is also prefixed with a '>'.
|
||||
*/
|
||||
void ha_thread_dump(struct buffer *buf, int thr, int calling_tid)
|
||||
{
|
||||
unsigned long thr_bit = 1UL << thr;
|
||||
unsigned long long p = thread_info[thr].prev_cpu_time;
|
||||
unsigned long long n = now_cpu_time_thread(&thread_info[thr]);
|
||||
int stuck = !!(thread_info[thr].flags & TI_FL_STUCK);
|
||||
|
||||
chunk_appendf(buf,
|
||||
"%c Thread %-2u: act=%d glob=%d wq=%d rq=%d tl=%d tlsz=%d rqsz=%d\n"
|
||||
" fdcache=%d prof=%d",
|
||||
(thr == calling_tid) ? '*' : ' ', thr + 1,
|
||||
"%c%cThread %-2u: act=%d glob=%d wq=%d rq=%d tl=%d tlsz=%d rqsz=%d\n"
|
||||
" stuck=%d fdcache=%d prof=%d",
|
||||
(thr == calling_tid) ? '*' : ' ', stuck ? '>' : ' ', thr + 1,
|
||||
!!(active_tasks_mask & thr_bit),
|
||||
!!(global_tasks_mask & thr_bit),
|
||||
!eb_is_empty(&task_per_thread[thr].timers),
|
||||
@ -52,6 +54,7 @@ void ha_thread_dump(struct buffer *buf, int thr, int calling_tid)
|
||||
!LIST_ISEMPTY(&task_per_thread[thr].task_list),
|
||||
task_per_thread[thr].task_list_size,
|
||||
task_per_thread[thr].rqueue_size,
|
||||
stuck,
|
||||
!!(fd_cache_mask & thr_bit),
|
||||
!!(task_profiling_mask & thr_bit));
|
||||
|
||||
@ -467,6 +470,12 @@ void debug_handler(int sig, siginfo_t *si, void *arg)
|
||||
else
|
||||
ha_thread_relax();
|
||||
}
|
||||
|
||||
/* mark the current thread as stuck to detect it upon next invocation
|
||||
* if it didn't move.
|
||||
*/
|
||||
if (!((threads_harmless_mask|sleeping_thread_mask) & tid_bit))
|
||||
ti->flags |= TI_FL_STUCK;
|
||||
}
|
||||
|
||||
static int init_debug_per_thread()
|
||||
|
@ -281,6 +281,8 @@ void process_runnable_tasks()
|
||||
struct task *t;
|
||||
int max_processed;
|
||||
|
||||
ti->flags &= ~TI_FL_STUCK; // this thread is still running
|
||||
|
||||
if (!(active_tasks_mask & tid_bit)) {
|
||||
activity[tid].empty_rq++;
|
||||
return;
|
||||
@ -372,6 +374,7 @@ void process_runnable_tasks()
|
||||
__ha_barrier_atomic_store();
|
||||
__task_remove_from_tasklet_list(t);
|
||||
|
||||
ti->flags &= ~TI_FL_STUCK; // this thread is still running
|
||||
activity[tid].ctxsw++;
|
||||
ctx = t->context;
|
||||
process = t->process;
|
||||
|
Loading…
Reference in New Issue
Block a user