mirror of
http://git.haproxy.org/git/haproxy.git/
synced 2025-03-02 17:41:47 +00:00
MAJOR: watchdog: implement a thread lockup detection mechanism
Since threads were introduced, we've naturally had a number of bugs related to locking issues. In addition we've also got some issues with corrupted lists in certain rare cases not necessarily involving threads. Not only these events cause a lot of trouble to the production as it is very hard to detect that the process is stuck in a loop and doesn't deliver the service anymore, but it's often difficult (or too late) to collect more debugging information. The patch presented here implements a lockup detection mechanism, also known as "watchdog". The principle is that (on systems supporting it), each thread will have its own CPU timer which progresses as the thread consumes CPU cycles, and when a deadline is met, a signal is delivered (SIGALRM here since it doesn't interrupt gdb by default). The thread handling this signal (which is not necessarily the one which triggered the timer) figures the thread ID from the signal arguments and checks if it's really stuck by looking at the time spent since last exit from poll() and by checking that the thread's scheduler is still alive (so that even when dealing with configuration issues resulting in insane amount of tasks being called in turn, it is not possible to accidently trigger it). Checking the scheduler's activity will usually result in a second chance, thus doubling the detecting time. In order not to incorrectly flag a thread as being the cause of the lockup, the thread_harmless_mask is checked : a thread could very well be spinning on itself waiting for all other threads to join (typically what happens when issuing "show sess"). In this case, once all threads but one (or two) have joined, all the innocent ones are marked harmless and will not trigger the timer. Only the ones not reacting will. The deadline is set to one second, which already appears impossible to reach, especially since it's 1 second of CPU usage, not elapsed time with the CPU being preempted by other threads/processes/hypervisor. In practice due to the scheduler's health verification it takes up to two seconds to decide to panic. Once all conditions are met, the goal is to crash from the offending thread. So if it's the current one, we call ha_panic() otherwise the signal is bounced to the offending thread which deals with it. This will result in all threads being woken up in turn to dump their context, the whole state is emitted on stderr in hope that it can be logged, and the process aborts, leaving a chance for a core to be dumped and for a service manager to restart it. An alternative mechanism could be implemented for systems unable to wake up a thread once its CPU clock reaches a deadline (e.g. FreeBSD). Instead of waking the timer each and every deadline, it is possible to use a standard timer which is reset each time we leave poll(). Since the signal handler rechecks the CPU consumption this will also work. However a totally idle process may trigger it from time to time which may or may not confuse some debugging sessions. The same is true for alarm() which could be another option for systems not having such a broad choice of timers (but it seems that in this case they will not have per-thread CPU measurements available either). The feature is currently implemented only when threads are enabled in order to keep the code clean, since the main purpose is to detect and address inter-thread deadlocks. But if it proves useful for other situations this condition might be relaxed.
This commit is contained in:
parent
430f590b5b
commit
2bfefdbaef
2
Makefile
2
Makefile
@ -819,7 +819,7 @@ OBJS = src/proto_http.o src/cfgparse-listen.o src/proto_htx.o src/stream.o \
|
||||
src/xxhash.o src/hpack-enc.o src/h2.o src/freq_ctr.o src/lru.o \
|
||||
src/protocol.o src/arg.o src/hpack-huff.o src/hdr_idx.o src/base64.o \
|
||||
src/hash.o src/mailers.o src/activity.o src/http_msg.o src/version.o \
|
||||
src/mworker.o src/mworker-prog.o src/debug.o
|
||||
src/mworker.o src/mworker-prog.o src/debug.o src/wdt.o
|
||||
|
||||
EBTREE_OBJS = $(EBTREE_DIR)/ebtree.o $(EBTREE_DIR)/eb32sctree.o \
|
||||
$(EBTREE_DIR)/eb32tree.o $(EBTREE_DIR)/eb64tree.o \
|
||||
|
186
src/wdt.c
Normal file
186
src/wdt.c
Normal file
@ -0,0 +1,186 @@
|
||||
/*
|
||||
* Thread lockup detection
|
||||
*
|
||||
* Copyright 2000-2019 Willy Tarreau <willy@haproxy.org>.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <signal.h>
|
||||
#include <time.h>
|
||||
|
||||
#include <common/config.h>
|
||||
#include <common/debug.h>
|
||||
#include <common/hathreads.h>
|
||||
#include <common/initcall.h>
|
||||
#include <common/standard.h>
|
||||
#include <types/global.h>
|
||||
#include <proto/log.h>
|
||||
|
||||
|
||||
/*
|
||||
* It relies on timer_create() and timer_settime() which are only available in
|
||||
* this case.
|
||||
*/
|
||||
#if defined(USE_THREAD) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME) && (_POSIX_C_SOURCE >= 199309L)
|
||||
|
||||
/* We'll deliver SIGALRM when we've run out of CPU as it's not intercepted by
|
||||
* gdb by default.
|
||||
*/
|
||||
#define WDTSIG SIGALRM
|
||||
|
||||
/* Setup (or ping) the watchdog timer for thread <thr>. Returns non-zero on
|
||||
* success, zero on failure. It interrupts once per second of CPU time. It
|
||||
* happens that timers based on the CPU time are not automatically re-armed
|
||||
* so we only use the value and leave the interval unset.
|
||||
*/
|
||||
int wdt_ping(int thr)
|
||||
{
|
||||
struct itimerspec its;
|
||||
|
||||
its.it_value.tv_sec = 1; its.it_value.tv_nsec = 0;
|
||||
its.it_interval.tv_sec = 0; its.it_interval.tv_nsec = 0;
|
||||
return timer_settime(thread_info[thr].wd_timer, 0, &its, NULL) == 0;
|
||||
}
|
||||
|
||||
/* This is the WDTSIG signal handler */
|
||||
void wdt_handler(int sig, siginfo_t *si, void *arg)
|
||||
{
|
||||
unsigned long long n, p;
|
||||
int thr;
|
||||
|
||||
switch (si->si_code) {
|
||||
case SI_TIMER:
|
||||
/* A thread's timer fired, the thread ID is in si_int. We have
|
||||
* no guarantee that the thread handling this signal is in any
|
||||
* way related to the one triggering it, so we need to retrieve
|
||||
* the thread number from there. Note: this thread might
|
||||
* continue to execute in parallel.
|
||||
*/
|
||||
thr = si->si_int;
|
||||
|
||||
/* cannot happen unless an unknown timer tries to play with our
|
||||
* nerves. Let's die for now if this happens.
|
||||
*/
|
||||
if (thr < 0 || thr >= global.nbthread)
|
||||
break;
|
||||
|
||||
p = thread_info[thr].prev_cpu_time;
|
||||
n = now_cpu_time_thread(&thread_info[thr]);
|
||||
|
||||
/* not yet reached the deadline of 1 sec */
|
||||
if (n - p < 1000000000UL)
|
||||
goto update_and_leave;
|
||||
|
||||
if ((threads_harmless_mask|sleeping_thread_mask) & (1UL << thr)) {
|
||||
/* This thread is currently doing exactly nothing
|
||||
* waiting in the poll loop (unlikely but possible),
|
||||
* waiting for all other threads to join the rendez-vous
|
||||
* point (common), or waiting for another thread to
|
||||
* finish an isolated operation (unlikely but possible).
|
||||
*/
|
||||
goto update_and_leave;
|
||||
}
|
||||
|
||||
/* So the thread indeed appears locked up. In order to be
|
||||
* certain that we're not witnessing an exceptional spike of
|
||||
* CPU usage due to a configuration issue (like running tens
|
||||
* of thousands of tasks in a single loop), we'll check if the
|
||||
* scheduler is still alive by setting the TI_FL_STUCK flag
|
||||
* that the scheduler clears when switching to the next task.
|
||||
* If it's already set, then it's our second call with no
|
||||
* progress and the thread is dead.
|
||||
*/
|
||||
if (!(thread_info[thr].flags & TI_FL_STUCK)) {
|
||||
_HA_ATOMIC_OR(&thread_info[thr].flags, TI_FL_STUCK);
|
||||
goto update_and_leave;
|
||||
}
|
||||
|
||||
/* No doubt now, there's no hop to recover, die loudly! */
|
||||
break;
|
||||
|
||||
case SI_TKILL:
|
||||
/* we got a pthread_kill, stop on it */
|
||||
thr = tid;
|
||||
break;
|
||||
|
||||
default:
|
||||
/* unhandled other conditions */
|
||||
return;
|
||||
}
|
||||
|
||||
/* By default we terminate. If we're not on the victim thread, better
|
||||
* bounce the signal there so that we produce a cleaner stack trace
|
||||
* with the other thread interrupted exactly where it was running and
|
||||
* the current one not involved in this.
|
||||
*/
|
||||
if (thr != tid)
|
||||
pthread_kill(thread_info[thr].pthread, sig);
|
||||
else
|
||||
ha_panic();
|
||||
return;
|
||||
|
||||
update_and_leave:
|
||||
wdt_ping(thr);
|
||||
}
|
||||
|
||||
int init_wdt_per_thread()
|
||||
{
|
||||
struct sigevent sev;
|
||||
sigset_t set;
|
||||
|
||||
/* unblock the WDTSIG signal we intend to use */
|
||||
sigemptyset(&set);
|
||||
sigaddset(&set, WDTSIG);
|
||||
ha_sigmask(SIG_UNBLOCK, &set, NULL);
|
||||
|
||||
/* this timer will signal WDTSIG when it fires, with tid in the si_int
|
||||
* field (important since any thread will receive the signal).
|
||||
*/
|
||||
sev.sigev_notify = SIGEV_SIGNAL;
|
||||
sev.sigev_signo = WDTSIG;
|
||||
sev.sigev_value.sival_int = tid;
|
||||
if (timer_create(ti->clock_id, &sev, &ti->wd_timer) == -1)
|
||||
goto fail1;
|
||||
|
||||
if (!wdt_ping(tid))
|
||||
goto fail2;
|
||||
|
||||
return 1;
|
||||
|
||||
fail2:
|
||||
timer_delete(ti->wd_timer);
|
||||
fail1:
|
||||
ti->wd_timer = TIMER_INVALID;
|
||||
ha_warning("Failed to setup watchdog timer for thread %u, disabling lockup detection.\n", tid);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void deinit_wdt_per_thread()
|
||||
{
|
||||
if (ti->wd_timer != TIMER_INVALID)
|
||||
timer_delete(ti->wd_timer);
|
||||
}
|
||||
|
||||
/* registers the watchdog signal handler and returns 0. This sets up the signal
|
||||
* handler for WDTSIG, so it must be called once per process.
|
||||
*/
|
||||
int init_wdt()
|
||||
{
|
||||
struct sigaction sa;
|
||||
|
||||
sa.sa_handler = NULL;
|
||||
sa.sa_sigaction = wdt_handler;
|
||||
sigemptyset(&sa.sa_mask);
|
||||
sa.sa_flags = SA_SIGINFO;
|
||||
sigaction(WDTSIG, &sa, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
REGISTER_POST_CHECK(init_wdt);
|
||||
REGISTER_PER_THREAD_INIT(init_wdt_per_thread);
|
||||
REGISTER_PER_THREAD_DEINIT(deinit_wdt_per_thread);
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user