MEDIUM: debug: add support for dumping backtraces of stuck threads

When a panic() occurs due to a stuck thread, we'll try to dump a
backtrace of this thread if the config directive USE_BACKTRACE is
set (which is the case on linux+glibc). For this we use the
backtrace() call provided by glibc and iterate the pointers through
resolve_sym_name(). In order to minimize the output (which is limited
to one buffer), we only do this for stuck threads, and we start the
dump above ha_panic()/ha_thread_dump_all_to_trash(), and stop when
meeting known points such as main/run_tasks_from_list/run_poll_loop.

If enabled without USE_DL, the dump will be complete with no details
except that pointers will all be given relative to main, which is
still better than nothing.

The new USE_BACKTRACE config option is enabled by default on glibc since
it has been present for ages. When it is set, the export-dynamic linker
option is enabled so that all non-static symbols are properly resolved.
This commit is contained in:
Willy Tarreau 2020-03-03 15:40:23 +01:00
parent cf12f2ee66
commit f5b4e064dc
2 changed files with 76 additions and 2 deletions

View File

@ -49,6 +49,7 @@
# USE_NS : enable network namespace support. Supported on Linux >= 2.6.24. # USE_NS : enable network namespace support. Supported on Linux >= 2.6.24.
# USE_DL : enable it if your system requires -ldl. Automatic on Linux. # USE_DL : enable it if your system requires -ldl. Automatic on Linux.
# USE_RT : enable it if your system requires -lrt. Automatic on Linux. # USE_RT : enable it if your system requires -lrt. Automatic on Linux.
# USE_BACKTRACE : enable backtrace(). Automatic on Linux.
# USE_DEVICEATLAS : enable DeviceAtlas api. # USE_DEVICEATLAS : enable DeviceAtlas api.
# USE_51DEGREES : enable third party device detection library from 51Degrees # USE_51DEGREES : enable third party device detection library from 51Degrees
# USE_WURFL : enable WURFL detection library from Scientiamobile # USE_WURFL : enable WURFL detection library from Scientiamobile
@ -286,7 +287,7 @@ LDFLAGS = $(ARCH_FLAGS) -g
# the reported build options. # the reported build options.
use_opts = USE_EPOLL USE_KQUEUE USE_MY_EPOLL USE_MY_SPLICE USE_NETFILTER \ use_opts = USE_EPOLL USE_KQUEUE USE_MY_EPOLL USE_MY_SPLICE USE_NETFILTER \
USE_PCRE USE_PCRE_JIT USE_PCRE2 USE_PCRE2_JIT USE_POLL \ USE_PCRE USE_PCRE_JIT USE_PCRE2 USE_PCRE2_JIT USE_POLL \
USE_PRIVATE_CACHE USE_THREAD USE_PTHREAD_PSHARED \ USE_PRIVATE_CACHE USE_THREAD USE_PTHREAD_PSHARED USE_BACKTRACE \
USE_STATIC_PCRE USE_STATIC_PCRE2 USE_TPROXY USE_LINUX_TPROXY \ USE_STATIC_PCRE USE_STATIC_PCRE2 USE_TPROXY USE_LINUX_TPROXY \
USE_LINUX_SPLICE USE_LIBCRYPT USE_CRYPT_H USE_VSYSCALL \ USE_LINUX_SPLICE USE_LIBCRYPT USE_CRYPT_H USE_VSYSCALL \
USE_GETADDRINFO USE_OPENSSL USE_LUA USE_FUTEX USE_ACCEPT4 \ USE_GETADDRINFO USE_OPENSSL USE_LUA USE_FUTEX USE_ACCEPT4 \
@ -326,7 +327,7 @@ ifeq ($(TARGET),linux-glibc)
USE_POLL USE_TPROXY USE_LIBCRYPT USE_DL USE_RT USE_CRYPT_H USE_NETFILTER \ USE_POLL USE_TPROXY USE_LIBCRYPT USE_DL USE_RT USE_CRYPT_H USE_NETFILTER \
USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_FUTEX USE_LINUX_TPROXY \ USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_FUTEX USE_LINUX_TPROXY \
USE_ACCEPT4 USE_LINUX_SPLICE USE_PRCTL USE_THREAD_DUMP USE_NS USE_TFO \ USE_ACCEPT4 USE_LINUX_SPLICE USE_PRCTL USE_THREAD_DUMP USE_NS USE_TFO \
USE_GETADDRINFO) USE_GETADDRINFO USE_BACKTRACE)
endif endif
# For linux >= 2.6.28, glibc without new features # For linux >= 2.6.28, glibc without new features
@ -515,6 +516,10 @@ ifneq ($(USE_RT),)
OPTIONS_LDFLAGS += -lrt OPTIONS_LDFLAGS += -lrt
endif endif
ifneq ($(USE_BACKTRACE),)
OPTIONS_LDFLAGS += -Wl,$(if $(EXPORT_SYMBOL),$(EXPORT_SYMBOL),--export-dynamic)
endif
ifneq ($(USE_OPENSSL),) ifneq ($(USE_OPENSSL),)
# OpenSSL is packaged in various forms and with various dependencies. # OpenSSL is packaged in various forms and with various dependencies.
# In general -lssl is enough, but on some platforms, -lcrypto may be needed, # In general -lssl is enough, but on some platforms, -lcrypto may be needed,

View File

@ -10,6 +10,12 @@
* *
*/ */
#ifdef USE_BACKTRACE
#define _GNU_SOURCE
#include <execinfo.h>
#endif
#include <fcntl.h> #include <fcntl.h>
#include <signal.h> #include <signal.h>
#include <time.h> #include <time.h>
@ -87,6 +93,69 @@ void ha_thread_dump(struct buffer *buf, int thr, int calling_tid)
chunk_appendf(buf, " curr_task="); chunk_appendf(buf, " curr_task=");
ha_task_dump(buf, sched->current, " "); ha_task_dump(buf, sched->current, " ");
#ifdef USE_BACKTRACE
if (stuck) {
/* We only emit the backtrace for stuck threads in order not to
* waste precious output buffer space with non-interesting data.
*/
struct buffer bak;
void *callers[100];
int j, nptrs;
void *addr;
int dump = 0;
nptrs = backtrace(callers, sizeof(callers)/sizeof(*callers));
/* The call backtrace_symbols_fd(callers, nptrs, STDOUT_FILENO)
would produce similar output to the following: */
if (nptrs)
chunk_appendf(buf, " call trace:\n");
#ifndef USE_DL
/* if we can't rely on dladdr1() we won't figure what level is
* in ha_panic() or ha_thread_dump_all_to_trash(), so we want
* to immediately start the dump.
*/
dump = 2;
#endif
for (j = 0; j < nptrs; j++) {
bak = *buf;
dump_addr_and_bytes(buf, " | ", callers[j], 8);
addr = resolve_sym_name(buf, ": ", callers[j]);
if (dump == 0) {
/* dump not started, will start *after*
* ha_thread_dump_all_to_trash and ha_panic
*/
if (addr == ha_thread_dump_all_to_trash || addr == ha_panic)
dump = 1;
*buf = bak;
continue;
}
if (dump == 1) {
/* starting */
if (addr == ha_thread_dump_all_to_trash || addr == ha_panic) {
*buf = bak;
continue;
}
dump = 2;
}
if (dump == 2) {
/* dumping */
if (addr == run_poll_loop || addr == main || addr == run_tasks_from_list) {
dump = 3;
*buf = bak;
break;
}
}
/* OK, line dumped */
chunk_appendf(buf, "\n");
}
}
#endif
} }