From f5b4e064dcb1f7c97c87b68dbbbf7a4371e05bc7 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 3 Mar 2020 15:40:23 +0100 Subject: [PATCH] MEDIUM: debug: add support for dumping backtraces of stuck threads When a panic() occurs due to a stuck thread, we'll try to dump a backtrace of this thread if the config directive USE_BACKTRACE is set (which is the case on linux+glibc). For this we use the backtrace() call provided by glibc and iterate the pointers through resolve_sym_name(). In order to minimize the output (which is limited to one buffer), we only do this for stuck threads, and we start the dump above ha_panic()/ha_thread_dump_all_to_trash(), and stop when meeting known points such as main/run_tasks_from_list/run_poll_loop. If enabled without USE_DL, the dump will be complete with no details except that pointers will all be given relative to main, which is still better than nothing. The new USE_BACKTRACE config option is enabled by default on glibc since it has been present for ages. When it is set, the export-dynamic linker option is enabled so that all non-static symbols are properly resolved. --- Makefile | 9 +++++-- src/debug.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 1e6b3841cc..2d7e0e9728 100644 --- a/Makefile +++ b/Makefile @@ -49,6 +49,7 @@ # USE_NS : enable network namespace support. Supported on Linux >= 2.6.24. # USE_DL : enable it if your system requires -ldl. Automatic on Linux. # USE_RT : enable it if your system requires -lrt. Automatic on Linux. +# USE_BACKTRACE : enable backtrace(). Automatic on Linux. # USE_DEVICEATLAS : enable DeviceAtlas api. # USE_51DEGREES : enable third party device detection library from 51Degrees # USE_WURFL : enable WURFL detection library from Scientiamobile @@ -286,7 +287,7 @@ LDFLAGS = $(ARCH_FLAGS) -g # the reported build options. use_opts = USE_EPOLL USE_KQUEUE USE_MY_EPOLL USE_MY_SPLICE USE_NETFILTER \ USE_PCRE USE_PCRE_JIT USE_PCRE2 USE_PCRE2_JIT USE_POLL \ - USE_PRIVATE_CACHE USE_THREAD USE_PTHREAD_PSHARED \ + USE_PRIVATE_CACHE USE_THREAD USE_PTHREAD_PSHARED USE_BACKTRACE \ USE_STATIC_PCRE USE_STATIC_PCRE2 USE_TPROXY USE_LINUX_TPROXY \ USE_LINUX_SPLICE USE_LIBCRYPT USE_CRYPT_H USE_VSYSCALL \ USE_GETADDRINFO USE_OPENSSL USE_LUA USE_FUTEX USE_ACCEPT4 \ @@ -326,7 +327,7 @@ ifeq ($(TARGET),linux-glibc) USE_POLL USE_TPROXY USE_LIBCRYPT USE_DL USE_RT USE_CRYPT_H USE_NETFILTER \ USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_FUTEX USE_LINUX_TPROXY \ USE_ACCEPT4 USE_LINUX_SPLICE USE_PRCTL USE_THREAD_DUMP USE_NS USE_TFO \ - USE_GETADDRINFO) + USE_GETADDRINFO USE_BACKTRACE) endif # For linux >= 2.6.28, glibc without new features @@ -515,6 +516,10 @@ ifneq ($(USE_RT),) OPTIONS_LDFLAGS += -lrt endif +ifneq ($(USE_BACKTRACE),) +OPTIONS_LDFLAGS += -Wl,$(if $(EXPORT_SYMBOL),$(EXPORT_SYMBOL),--export-dynamic) +endif + ifneq ($(USE_OPENSSL),) # OpenSSL is packaged in various forms and with various dependencies. # In general -lssl is enough, but on some platforms, -lcrypto may be needed, diff --git a/src/debug.c b/src/debug.c index 4b7f65e724..6b41fc3910 100644 --- a/src/debug.c +++ b/src/debug.c @@ -10,6 +10,12 @@ * */ + +#ifdef USE_BACKTRACE +#define _GNU_SOURCE +#include +#endif + #include #include #include @@ -87,6 +93,69 @@ void ha_thread_dump(struct buffer *buf, int thr, int calling_tid) chunk_appendf(buf, " curr_task="); ha_task_dump(buf, sched->current, " "); + +#ifdef USE_BACKTRACE + if (stuck) { + /* We only emit the backtrace for stuck threads in order not to + * waste precious output buffer space with non-interesting data. + */ + struct buffer bak; + void *callers[100]; + int j, nptrs; + void *addr; + int dump = 0; + + nptrs = backtrace(callers, sizeof(callers)/sizeof(*callers)); + + /* The call backtrace_symbols_fd(callers, nptrs, STDOUT_FILENO) + would produce similar output to the following: */ + + if (nptrs) + chunk_appendf(buf, " call trace:\n"); + +#ifndef USE_DL + /* if we can't rely on dladdr1() we won't figure what level is + * in ha_panic() or ha_thread_dump_all_to_trash(), so we want + * to immediately start the dump. + */ + dump = 2; +#endif + for (j = 0; j < nptrs; j++) { + bak = *buf; + dump_addr_and_bytes(buf, " | ", callers[j], 8); + addr = resolve_sym_name(buf, ": ", callers[j]); + if (dump == 0) { + /* dump not started, will start *after* + * ha_thread_dump_all_to_trash and ha_panic + */ + if (addr == ha_thread_dump_all_to_trash || addr == ha_panic) + dump = 1; + *buf = bak; + continue; + } + + if (dump == 1) { + /* starting */ + if (addr == ha_thread_dump_all_to_trash || addr == ha_panic) { + *buf = bak; + continue; + } + dump = 2; + } + + if (dump == 2) { + /* dumping */ + if (addr == run_poll_loop || addr == main || addr == run_tasks_from_list) { + dump = 3; + *buf = bak; + break; + } + } + /* OK, line dumped */ + chunk_appendf(buf, "\n"); + } + } +#endif }