MEDIUM: debug: add support for dumping backtraces of stuck threads
When a panic() occurs due to a stuck thread, we'll try to dump a backtrace of this thread if the config directive USE_BACKTRACE is set (which is the case on linux+glibc). For this we use the backtrace() call provided by glibc and iterate the pointers through resolve_sym_name(). In order to minimize the output (which is limited to one buffer), we only do this for stuck threads, and we start the dump above ha_panic()/ha_thread_dump_all_to_trash(), and stop when meeting known points such as main/run_tasks_from_list/run_poll_loop. If enabled without USE_DL, the dump will be complete with no details except that pointers will all be given relative to main, which is still better than nothing. The new USE_BACKTRACE config option is enabled by default on glibc since it has been present for ages. When it is set, the export-dynamic linker option is enabled so that all non-static symbols are properly resolved.
This commit is contained in:
parent
cf12f2ee66
commit
f5b4e064dc
9
Makefile
9
Makefile
|
@ -49,6 +49,7 @@
|
|||
# USE_NS : enable network namespace support. Supported on Linux >= 2.6.24.
|
||||
# USE_DL : enable it if your system requires -ldl. Automatic on Linux.
|
||||
# USE_RT : enable it if your system requires -lrt. Automatic on Linux.
|
||||
# USE_BACKTRACE : enable backtrace(). Automatic on Linux.
|
||||
# USE_DEVICEATLAS : enable DeviceAtlas api.
|
||||
# USE_51DEGREES : enable third party device detection library from 51Degrees
|
||||
# USE_WURFL : enable WURFL detection library from Scientiamobile
|
||||
|
@ -286,7 +287,7 @@ LDFLAGS = $(ARCH_FLAGS) -g
|
|||
# the reported build options.
|
||||
use_opts = USE_EPOLL USE_KQUEUE USE_MY_EPOLL USE_MY_SPLICE USE_NETFILTER \
|
||||
USE_PCRE USE_PCRE_JIT USE_PCRE2 USE_PCRE2_JIT USE_POLL \
|
||||
USE_PRIVATE_CACHE USE_THREAD USE_PTHREAD_PSHARED \
|
||||
USE_PRIVATE_CACHE USE_THREAD USE_PTHREAD_PSHARED USE_BACKTRACE \
|
||||
USE_STATIC_PCRE USE_STATIC_PCRE2 USE_TPROXY USE_LINUX_TPROXY \
|
||||
USE_LINUX_SPLICE USE_LIBCRYPT USE_CRYPT_H USE_VSYSCALL \
|
||||
USE_GETADDRINFO USE_OPENSSL USE_LUA USE_FUTEX USE_ACCEPT4 \
|
||||
|
@ -326,7 +327,7 @@ ifeq ($(TARGET),linux-glibc)
|
|||
USE_POLL USE_TPROXY USE_LIBCRYPT USE_DL USE_RT USE_CRYPT_H USE_NETFILTER \
|
||||
USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_FUTEX USE_LINUX_TPROXY \
|
||||
USE_ACCEPT4 USE_LINUX_SPLICE USE_PRCTL USE_THREAD_DUMP USE_NS USE_TFO \
|
||||
USE_GETADDRINFO)
|
||||
USE_GETADDRINFO USE_BACKTRACE)
|
||||
endif
|
||||
|
||||
# For linux >= 2.6.28, glibc without new features
|
||||
|
@ -515,6 +516,10 @@ ifneq ($(USE_RT),)
|
|||
OPTIONS_LDFLAGS += -lrt
|
||||
endif
|
||||
|
||||
ifneq ($(USE_BACKTRACE),)
|
||||
OPTIONS_LDFLAGS += -Wl,$(if $(EXPORT_SYMBOL),$(EXPORT_SYMBOL),--export-dynamic)
|
||||
endif
|
||||
|
||||
ifneq ($(USE_OPENSSL),)
|
||||
# OpenSSL is packaged in various forms and with various dependencies.
|
||||
# In general -lssl is enough, but on some platforms, -lcrypto may be needed,
|
||||
|
|
69
src/debug.c
69
src/debug.c
|
@ -10,6 +10,12 @@
|
|||
*
|
||||
*/
|
||||
|
||||
|
||||
#ifdef USE_BACKTRACE
|
||||
#define _GNU_SOURCE
|
||||
#include <execinfo.h>
|
||||
#endif
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <signal.h>
|
||||
#include <time.h>
|
||||
|
@ -87,6 +93,69 @@ void ha_thread_dump(struct buffer *buf, int thr, int calling_tid)
|
|||
|
||||
chunk_appendf(buf, " curr_task=");
|
||||
ha_task_dump(buf, sched->current, " ");
|
||||
|
||||
#ifdef USE_BACKTRACE
|
||||
if (stuck) {
|
||||
/* We only emit the backtrace for stuck threads in order not to
|
||||
* waste precious output buffer space with non-interesting data.
|
||||
*/
|
||||
struct buffer bak;
|
||||
void *callers[100];
|
||||
int j, nptrs;
|
||||
void *addr;
|
||||
int dump = 0;
|
||||
|
||||
nptrs = backtrace(callers, sizeof(callers)/sizeof(*callers));
|
||||
|
||||
/* The call backtrace_symbols_fd(callers, nptrs, STDOUT_FILENO)
|
||||
would produce similar output to the following: */
|
||||
|
||||
if (nptrs)
|
||||
chunk_appendf(buf, " call trace:\n");
|
||||
|
||||
#ifndef USE_DL
|
||||
/* if we can't rely on dladdr1() we won't figure what level is
|
||||
* in ha_panic() or ha_thread_dump_all_to_trash(), so we want
|
||||
* to immediately start the dump.
|
||||
*/
|
||||
dump = 2;
|
||||
#endif
|
||||
for (j = 0; j < nptrs; j++) {
|
||||
bak = *buf;
|
||||
dump_addr_and_bytes(buf, " | ", callers[j], 8);
|
||||
addr = resolve_sym_name(buf, ": ", callers[j]);
|
||||
if (dump == 0) {
|
||||
/* dump not started, will start *after*
|
||||
* ha_thread_dump_all_to_trash and ha_panic
|
||||
*/
|
||||
if (addr == ha_thread_dump_all_to_trash || addr == ha_panic)
|
||||
dump = 1;
|
||||
*buf = bak;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (dump == 1) {
|
||||
/* starting */
|
||||
if (addr == ha_thread_dump_all_to_trash || addr == ha_panic) {
|
||||
*buf = bak;
|
||||
continue;
|
||||
}
|
||||
dump = 2;
|
||||
}
|
||||
|
||||
if (dump == 2) {
|
||||
/* dumping */
|
||||
if (addr == run_poll_loop || addr == main || addr == run_tasks_from_list) {
|
||||
dump = 3;
|
||||
*buf = bak;
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* OK, line dumped */
|
||||
chunk_appendf(buf, "\n");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue