From ff64d3b027d968a29b5bac55121bbce591fe54b2 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Fri, 1 May 2020 11:28:49 +0200 Subject: [PATCH] MINOR: threads: export the POSIX thread ID in panic dumps It is very difficult to map a panic dump against a gdb thread dump because the thread numbers do not match. However gdb provides the pthread ID but this one is supposed to be opaque and not to be cast to a scalar. This patch provides a fnuction, ha_get_pthread_id() which retrieves the pthread ID of the indicated thread and casts it to an unsigned long long so as to lose the least possible amount of information from it. This is done cleanly using a union to maintain alignment so as long as these IDs are stored on 1..8 bytes they will be properly reported. This ID is now presented in the panic dumps so it now becomes possible to map these threads. When threads are disabled, zero is returned. For example, this is a panic dump: Thread 1 is about to kill the process. *>Thread 1 : id=0x7fe92b825180 act=0 glob=0 wq=1 rq=0 tl=0 tlsz=0 rqsz=0 stuck=1 prof=0 harmless=0 wantrdv=0 cpu_ns: poll=5119122 now=2009446995 diff=2004327873 curr_task=0xc99bf0 (task) calls=4 last=0 fct=0x592440(task_run_applet) ctx=0xca9c50() strm=0xc996a0 src=unix fe=GLOBAL be=GLOBAL dst= rqf=848202 rqa=0 rpf=80048202 rpa=0 sif=EST,200008 sib=EST,204018 af=(nil),0 csf=0xc9ba40,8200 ab=0xca9c50,4 csb=(nil),0 cof=0xbf0e50,1300:PASS(0xc9cee0)/RAW((nil))/unix_stream(20) cob=(nil),0:NONE((nil))/NONE((nil))/NONE(0) call trace(20): | 0x59e4cf [48 83 c4 10 5b 5d 41 5c]: wdt_handler+0xff/0x10c | 0x7fe92c170690 [48 c7 c0 0f 00 00 00 0f]: libpthread:+0x13690 | 0x7ffce29519d9 [48 c1 e2 20 48 09 d0 48]: linux-vdso:+0x9d9 | 0x7ffce2951d54 [eb d9 f3 90 e9 1c ff ff]: linux-vdso:__vdso_gettimeofday+0x104/0x133 | 0x57b484 [48 89 e6 48 8d 7c 24 10]: main+0x157114 | 0x50ee6a [85 c0 75 76 48 8b 55 38]: main+0xeaafa | 0x50f69c [48 63 54 24 20 85 c0 0f]: main+0xeb32c | 0x59252c [48 c7 c6 d8 ff ff ff 44]: task_run_applet+0xec/0x88c Thread 2 : id=0x7fe92b6e6700 act=0 glob=0 wq=0 rq=0 tl=0 tlsz=0 rqsz=0 stuck=0 prof=0 harmless=1 wantrdv=0 cpu_ns: poll=786738 now=1086955 diff=300217 curr_task=0 Thread 3 : id=0x7fe92aee5700 act=0 glob=0 wq=0 rq=0 tl=0 tlsz=0 rqsz=0 stuck=0 prof=0 harmless=1 wantrdv=0 cpu_ns: poll=828056 now=1129738 diff=301682 curr_task=0 Thread 4 : id=0x7fe92a6e4700 act=0 glob=0 wq=0 rq=0 tl=0 tlsz=0 rqsz=0 stuck=0 prof=0 harmless=1 wantrdv=0 cpu_ns: poll=818900 now=1153551 diff=334651 curr_task=0 And this is the gdb output: (gdb) info thr Id Target Id Frame * 1 Thread 0x7fe92b825180 (LWP 15234) 0x00007fe92ba81d6b in raise () from /lib64/libc.so.6 2 Thread 0x7fe92b6e6700 (LWP 15235) 0x00007fe92bb56a56 in epoll_wait () from /lib64/libc.so.6 3 Thread 0x7fe92a6e4700 (LWP 15237) 0x00007fe92bb56a56 in epoll_wait () from /lib64/libc.so.6 4 Thread 0x7fe92aee5700 (LWP 15236) 0x00007fe92bb56a56 in epoll_wait () from /lib64/libc.so.6 We can clearly see that while threads 1 and 2 are the same, gdb's threads 3 and 4 respectively are haproxy's threads 4 and 3. This may be backported to 2.0 as it removes some confusion in github issues. --- include/common/hathreads.h | 36 ++++++++++++++++++++++++++++++++++++ src/debug.c | 3 ++- 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/include/common/hathreads.h b/include/common/hathreads.h index b3fff40836..82357d5bd3 100644 --- a/include/common/hathreads.h +++ b/include/common/hathreads.h @@ -170,6 +170,11 @@ static inline void ha_set_tid(unsigned int tid) ti = &ha_thread_info[tid]; } +static inline unsigned long ha_get_pthread_id(unsigned int thr) +{ + return 0; +} + static inline void ha_thread_relax(void) { #if _POSIX_PRIORITY_SCHEDULING @@ -483,6 +488,37 @@ static inline void ha_set_tid(unsigned int data) ti = &ha_thread_info[tid]; } +/* Retrieves the opaque pthread_t of thread cast to an unsigned long long + * since POSIX took great care of not specifying its representation, making it + * hard to export for post-mortem analysis. For this reason we copy it into a + * union and will use the smallest scalar type at least as large as its size, + * which will keep endianness and alignment for all regular sizes. As a last + * resort we end up with a long long ligned to the first bytes in memory, which + * will be endian-dependent if pthread_t is larger than a long long (not seen + * yet). + */ +static inline unsigned long long ha_get_pthread_id(unsigned int thr) +{ + union { + pthread_t t; + unsigned long long ll; + unsigned int i; + unsigned short s; + unsigned char c; + } u; + + memset(&u, 0, sizeof(u)); + u.t = ha_thread_info[thr].pthread; + + if (sizeof(u.t) <= sizeof(u.c)) + return u.c; + else if (sizeof(u.t) <= sizeof(u.s)) + return u.s; + else if (sizeof(u.t) <= sizeof(u.i)) + return u.i; + return u.ll; +} + static inline void ha_thread_relax(void) { #if _POSIX_PRIORITY_SCHEDULING diff --git a/src/debug.c b/src/debug.c index dd13aa4f74..a94fd28301 100644 --- a/src/debug.c +++ b/src/debug.c @@ -58,9 +58,10 @@ void ha_thread_dump(struct buffer *buf, int thr, int calling_tid) int stuck = !!(ha_thread_info[thr].flags & TI_FL_STUCK); chunk_appendf(buf, - "%c%cThread %-2u: act=%d glob=%d wq=%d rq=%d tl=%d tlsz=%d rqsz=%d\n" + "%c%cThread %-2u: id=0x%lx act=%d glob=%d wq=%d rq=%d tl=%d tlsz=%d rqsz=%d\n" " stuck=%d prof=%d", (thr == calling_tid) ? '*' : ' ', stuck ? '>' : ' ', thr + 1, + ha_get_pthread_id(thr), thread_has_tasks(), !!(global_tasks_mask & thr_bit), !eb_is_empty(&task_per_thread[thr].timers),