mirror of
http://git.haproxy.org/git/haproxy.git/
synced 2025-04-21 14:35:45 +00:00
MINOR: threads: add a "stuck" flag to the thread_info struct
This flag is constantly cleared by the scheduler and will be set by the watchdog timer to detect stuck threads. It is also set by the "show threads" command so that it is easy to spot if the situation has evolved between two subsequent calls : if the first "show threads" shows no stuck thread and the second one shows such a stuck thread, it indicates that this thread didn't manage to make any forward progress since the previous call, which is extremely suspicious.
This commit is contained in:
parent
578ea8be55
commit
e6a02fa65a
@ -2525,11 +2525,18 @@ show threads
|
|||||||
an advanced dump mechanism involving thread signals is used so that each
|
an advanced dump mechanism involving thread signals is used so that each
|
||||||
thread can dump its own state in turn. Without this option, the thread
|
thread can dump its own state in turn. Without this option, the thread
|
||||||
processing the command shows all its details but the other ones are less
|
processing the command shows all its details but the other ones are less
|
||||||
detailed. A stat ('*') is displayed in front of the thread handling the
|
detailed. A star ('*') is displayed in front of the thread handling the
|
||||||
command. The output format is purposely not documented so that it can easily
|
command. A right angle bracket ('>') may also be displayed in front of
|
||||||
evolve as new needs are identified, without having to maintain any backwards
|
threads which didn't make any progress since last invocation of this command,
|
||||||
compatibility, and just like with "show activity", the values are only
|
indicating a bug in the code which must absolutely be reported. When this
|
||||||
meaningful with the code at hand.
|
happens between two threads it usually indicates a deadlock. If a thread is
|
||||||
|
alone, it's a different bug like a corrupted list. In all cases the process
|
||||||
|
needs is not fully functional anymore and needs to be restarted.
|
||||||
|
|
||||||
|
The output format is purposely not documented so that it can easily evolve as
|
||||||
|
new needs are identified, without having to maintain any form of backwards
|
||||||
|
compatibility, and just like with "show activity", the values are meaningless
|
||||||
|
without the code at hand.
|
||||||
|
|
||||||
show tls-keys [id|*]
|
show tls-keys [id|*]
|
||||||
Dump all loaded TLS ticket keys references. The TLS ticket key reference ID
|
Dump all loaded TLS ticket keys references. The TLS ticket key reference ID
|
||||||
|
@ -38,6 +38,10 @@
|
|||||||
* only one thread is enabled, it equals 1.
|
* only one thread is enabled, it equals 1.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/* thread info flags, for thread_info[].flags */
|
||||||
|
#define TI_FL_STUCK 0x00000001
|
||||||
|
|
||||||
|
|
||||||
#ifndef USE_THREAD
|
#ifndef USE_THREAD
|
||||||
|
|
||||||
#define MAX_THREADS 1
|
#define MAX_THREADS 1
|
||||||
@ -57,6 +61,7 @@ extern struct thread_info {
|
|||||||
uint64_t prev_cpu_time; /* previous per thread CPU time */
|
uint64_t prev_cpu_time; /* previous per thread CPU time */
|
||||||
uint64_t prev_mono_time; /* previous system wide monotonic time */
|
uint64_t prev_mono_time; /* previous system wide monotonic time */
|
||||||
unsigned int idle_pct; /* idle to total ratio over last sample (percent) */
|
unsigned int idle_pct; /* idle to total ratio over last sample (percent) */
|
||||||
|
unsigned int flags; /* thread info flags, TI_FL_* */
|
||||||
/* pad to cache line (64B) */
|
/* pad to cache line (64B) */
|
||||||
char __pad[0]; /* unused except to check remaining room */
|
char __pad[0]; /* unused except to check remaining room */
|
||||||
char __end[0] __attribute__((aligned(64)));
|
char __end[0] __attribute__((aligned(64)));
|
||||||
@ -405,6 +410,7 @@ extern struct thread_info {
|
|||||||
uint64_t prev_cpu_time; /* previous per thread CPU time */
|
uint64_t prev_cpu_time; /* previous per thread CPU time */
|
||||||
uint64_t prev_mono_time; /* previous system wide monotonic time */
|
uint64_t prev_mono_time; /* previous system wide monotonic time */
|
||||||
unsigned int idle_pct; /* idle to total ratio over last sample (percent) */
|
unsigned int idle_pct; /* idle to total ratio over last sample (percent) */
|
||||||
|
unsigned int flags; /* thread info flags, TI_FL_* */
|
||||||
/* pad to cache line (64B) */
|
/* pad to cache line (64B) */
|
||||||
char __pad[0]; /* unused except to check remaining room */
|
char __pad[0]; /* unused except to check remaining room */
|
||||||
char __end[0] __attribute__((aligned(64)));
|
char __end[0] __attribute__((aligned(64)));
|
||||||
|
17
src/debug.c
17
src/debug.c
@ -33,18 +33,20 @@
|
|||||||
* optionally extra info for the current thread. The dump will be appended to
|
* optionally extra info for the current thread. The dump will be appended to
|
||||||
* the buffer, so the caller is responsible for preliminary initializing it.
|
* the buffer, so the caller is responsible for preliminary initializing it.
|
||||||
* The calling thread ID needs to be passed in <calling_tid> to display a star
|
* The calling thread ID needs to be passed in <calling_tid> to display a star
|
||||||
* in front of the calling thread's line (usually it's tid).
|
* in front of the calling thread's line (usually it's tid). Any stuck thread
|
||||||
|
* is also prefixed with a '>'.
|
||||||
*/
|
*/
|
||||||
void ha_thread_dump(struct buffer *buf, int thr, int calling_tid)
|
void ha_thread_dump(struct buffer *buf, int thr, int calling_tid)
|
||||||
{
|
{
|
||||||
unsigned long thr_bit = 1UL << thr;
|
unsigned long thr_bit = 1UL << thr;
|
||||||
unsigned long long p = thread_info[thr].prev_cpu_time;
|
unsigned long long p = thread_info[thr].prev_cpu_time;
|
||||||
unsigned long long n = now_cpu_time_thread(&thread_info[thr]);
|
unsigned long long n = now_cpu_time_thread(&thread_info[thr]);
|
||||||
|
int stuck = !!(thread_info[thr].flags & TI_FL_STUCK);
|
||||||
|
|
||||||
chunk_appendf(buf,
|
chunk_appendf(buf,
|
||||||
"%c Thread %-2u: act=%d glob=%d wq=%d rq=%d tl=%d tlsz=%d rqsz=%d\n"
|
"%c%cThread %-2u: act=%d glob=%d wq=%d rq=%d tl=%d tlsz=%d rqsz=%d\n"
|
||||||
" fdcache=%d prof=%d",
|
" stuck=%d fdcache=%d prof=%d",
|
||||||
(thr == calling_tid) ? '*' : ' ', thr + 1,
|
(thr == calling_tid) ? '*' : ' ', stuck ? '>' : ' ', thr + 1,
|
||||||
!!(active_tasks_mask & thr_bit),
|
!!(active_tasks_mask & thr_bit),
|
||||||
!!(global_tasks_mask & thr_bit),
|
!!(global_tasks_mask & thr_bit),
|
||||||
!eb_is_empty(&task_per_thread[thr].timers),
|
!eb_is_empty(&task_per_thread[thr].timers),
|
||||||
@ -52,6 +54,7 @@ void ha_thread_dump(struct buffer *buf, int thr, int calling_tid)
|
|||||||
!LIST_ISEMPTY(&task_per_thread[thr].task_list),
|
!LIST_ISEMPTY(&task_per_thread[thr].task_list),
|
||||||
task_per_thread[thr].task_list_size,
|
task_per_thread[thr].task_list_size,
|
||||||
task_per_thread[thr].rqueue_size,
|
task_per_thread[thr].rqueue_size,
|
||||||
|
stuck,
|
||||||
!!(fd_cache_mask & thr_bit),
|
!!(fd_cache_mask & thr_bit),
|
||||||
!!(task_profiling_mask & thr_bit));
|
!!(task_profiling_mask & thr_bit));
|
||||||
|
|
||||||
@ -467,6 +470,12 @@ void debug_handler(int sig, siginfo_t *si, void *arg)
|
|||||||
else
|
else
|
||||||
ha_thread_relax();
|
ha_thread_relax();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* mark the current thread as stuck to detect it upon next invocation
|
||||||
|
* if it didn't move.
|
||||||
|
*/
|
||||||
|
if (!((threads_harmless_mask|sleeping_thread_mask) & tid_bit))
|
||||||
|
ti->flags |= TI_FL_STUCK;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int init_debug_per_thread()
|
static int init_debug_per_thread()
|
||||||
|
@ -281,6 +281,8 @@ void process_runnable_tasks()
|
|||||||
struct task *t;
|
struct task *t;
|
||||||
int max_processed;
|
int max_processed;
|
||||||
|
|
||||||
|
ti->flags &= ~TI_FL_STUCK; // this thread is still running
|
||||||
|
|
||||||
if (!(active_tasks_mask & tid_bit)) {
|
if (!(active_tasks_mask & tid_bit)) {
|
||||||
activity[tid].empty_rq++;
|
activity[tid].empty_rq++;
|
||||||
return;
|
return;
|
||||||
@ -372,6 +374,7 @@ void process_runnable_tasks()
|
|||||||
__ha_barrier_atomic_store();
|
__ha_barrier_atomic_store();
|
||||||
__task_remove_from_tasklet_list(t);
|
__task_remove_from_tasklet_list(t);
|
||||||
|
|
||||||
|
ti->flags &= ~TI_FL_STUCK; // this thread is still running
|
||||||
activity[tid].ctxsw++;
|
activity[tid].ctxsw++;
|
||||||
ctx = t->context;
|
ctx = t->context;
|
||||||
process = t->process;
|
process = t->process;
|
||||||
|
Loading…
Reference in New Issue
Block a user