From 01abd025084b4fe50e84189d1a83499cbf4825ed Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Thu, 28 Feb 2019 10:27:18 +0100 Subject: [PATCH] BUG/MEDIUM: listener: use a self-locked list for the dequeue lists There is a very difficult to reproduce race in the listener's accept code, which is much easier to reproduce once connection limits are properly enforced. It's an ABBA lock issue : - the following functions take l->lock then lq_lock : disable_listener, pause_listener, listener_full, limit_listener, do_unbind_listener - the following ones take lq_lock then l->lock : resume_listener, dequeue_all_listener This is because __resume_listener() only takes the listener's lock and expects to be called with lq_lock held. The problem can easily happen when listener_full() and limit_listener() are called a lot while in parallel another thread releases sessions for the same listener using listener_release() which in turn calls resume_listener(). This scenario is more prevalent in 2.0-dev since the removal of the accept lock in listener_accept(). However in 1.9 and before, a different but extremely unlikely scenario can happen : thread1 thread2 ............................ enter listener_accept() limit_listener() ............................ long pause before taking the lock session_free() dequeue_all_listeners() lock(lq_lock) [1] ............................ try_lock(l->lock) [2] __resume_listener() spin_lock(l->lock) =>WAIT[2] ............................ accept() l->accept() nbconn==maxconn => listener_full() state==LI_LIMITED => lock(lq_lock) =>DEADLOCK[1]! In practice it is almost impossible to trigger it because it requires to limit both on the listener's maxconn and the frontend's rate limit, at the same time, and to release the listener when the connection rate goes below the limit between poll() returns the FD and the lock is taken (a few nanoseconds). But maybe with threads competing on the same core it has more chances to appear. This patch removes the lq_lock and replaces it with a lockless queue for the listener's wait queue (well, technically speaking a self-locked queue) brought by commit a8434ec14 ("MINOR: lists: Implement locked variations.") and its few subsequent fixes. This relieves us from the need of the lq_lock and removes the deadlock. It also gets rid of the distinction between __resume_listener() and resume_listener() since the only difference was the lq_lock. All listener removals from the list are now unconditional to avoid races on the state. It's worth noting that the list used to never be initialized and that it used to work only thanks to the state tests, so the initialization has now been added. This patch must carefully be backported to 1.9 and very likely 1.8. It is mandatory to be careful about replacing all manipulations of l->wait_queue, global.listener_queue and p->listener_queue. --- include/common/hathreads.h | 2 -- src/listener.c | 58 ++++++++------------------------------ 2 files changed, 12 insertions(+), 48 deletions(-) diff --git a/include/common/hathreads.h b/include/common/hathreads.h index 0cd00fb00..74a47619c 100644 --- a/include/common/hathreads.h +++ b/include/common/hathreads.h @@ -388,7 +388,6 @@ enum lock_label { TASK_WQ_LOCK, POOL_LOCK, LISTENER_LOCK, - LISTENER_QUEUE_LOCK, PROXY_LOCK, SERVER_LOCK, LBPRM_LOCK, @@ -505,7 +504,6 @@ static inline const char *lock_label(enum lock_label label) case TASK_WQ_LOCK: return "TASK_WQ"; case POOL_LOCK: return "POOL"; case LISTENER_LOCK: return "LISTENER"; - case LISTENER_QUEUE_LOCK: return "LISTENER_QUEUE"; case PROXY_LOCK: return "PROXY"; case SERVER_LOCK: return "SERVER"; case LBPRM_LOCK: return "LBPRM"; diff --git a/src/listener.c b/src/listener.c index 504cbbf38..8512ed054 100644 --- a/src/listener.c +++ b/src/listener.c @@ -42,9 +42,6 @@ #include #include - /* listner_queue lock (same for global and per proxy queues) */ -__decl_spinlock(lq_lock); - /* List head of all known bind keywords */ static struct bind_kw_list bind_keywords = { .list = LIST_HEAD_INIT(bind_keywords.list) @@ -265,11 +262,7 @@ static void disable_listener(struct listener *listener) goto end; if (listener->state == LI_READY) fd_stop_recv(listener->fd); - if (listener->state == LI_LIMITED) { - HA_SPIN_LOCK(LISTENER_QUEUE_LOCK, &lq_lock); - LIST_DEL(&listener->wait_queue); - HA_SPIN_UNLOCK(LISTENER_QUEUE_LOCK, &lq_lock); - } + LIST_DEL_LOCKED(&listener->wait_queue); listener->state = LI_LISTEN; end: HA_SPIN_UNLOCK(LISTENER_LOCK, &listener->lock); @@ -305,11 +298,7 @@ int pause_listener(struct listener *l) goto end; } - if (l->state == LI_LIMITED) { - HA_SPIN_LOCK(LISTENER_QUEUE_LOCK, &lq_lock); - LIST_DEL(&l->wait_queue); - HA_SPIN_UNLOCK(LISTENER_QUEUE_LOCK, &lq_lock); - } + LIST_DEL_LOCKED(&l->wait_queue); fd_stop_recv(l->fd); l->state = LI_PAUSED; @@ -328,7 +317,7 @@ int pause_listener(struct listener *l) * stopped it. If the resume fails, 0 is returned and an error might be * displayed. */ -static int __resume_listener(struct listener *l) +int resume_listener(struct listener *l) { int ret = 1; @@ -369,8 +358,7 @@ static int __resume_listener(struct listener *l) if (l->state == LI_READY) goto end; - if (l->state == LI_LIMITED) - LIST_DEL(&l->wait_queue); + LIST_DEL_LOCKED(&l->wait_queue); if (l->nbconn >= l->maxconn) { l->state = LI_FULL; @@ -384,16 +372,6 @@ static int __resume_listener(struct listener *l) return ret; } -int resume_listener(struct listener *l) -{ - int ret; - - HA_SPIN_LOCK(LISTENER_QUEUE_LOCK, &lq_lock); - ret = __resume_listener(l); - HA_SPIN_UNLOCK(LISTENER_QUEUE_LOCK, &lq_lock); - return ret; -} - /* Marks a ready listener as full so that the stream code tries to re-enable * it upon next close() using resume_listener(). */ @@ -401,11 +379,7 @@ static void listener_full(struct listener *l) { HA_SPIN_LOCK(LISTENER_LOCK, &l->lock); if (l->state >= LI_READY) { - if (l->state == LI_LIMITED) { - HA_SPIN_LOCK(LISTENER_QUEUE_LOCK, &lq_lock); - LIST_DEL(&l->wait_queue); - HA_SPIN_UNLOCK(LISTENER_QUEUE_LOCK, &lq_lock); - } + LIST_DEL_LOCKED(&l->wait_queue); if (l->state != LI_FULL) { fd_stop_recv(l->fd); l->state = LI_FULL; @@ -421,9 +395,7 @@ static void limit_listener(struct listener *l, struct list *list) { HA_SPIN_LOCK(LISTENER_LOCK, &l->lock); if (l->state == LI_READY) { - HA_SPIN_LOCK(LISTENER_QUEUE_LOCK, &lq_lock); - LIST_ADDQ(list, &l->wait_queue); - HA_SPIN_UNLOCK(LISTENER_QUEUE_LOCK, &lq_lock); + LIST_ADDQ_LOCKED(list, &l->wait_queue); fd_stop_recv(l->fd); l->state = LI_LIMITED; } @@ -462,17 +434,14 @@ int disable_all_listeners(struct protocol *proto) /* Dequeues all of the listeners waiting for a resource in wait queue . */ void dequeue_all_listeners(struct list *list) { - struct listener *listener, *l_back; + struct listener *listener; - HA_SPIN_LOCK(LISTENER_QUEUE_LOCK, &lq_lock); - list_for_each_entry_safe(listener, l_back, list, wait_queue) { + while ((listener = LIST_POP_LOCKED(list, struct listener *, wait_queue))) { /* This cannot fail because the listeners are by definition in - * the LI_LIMITED state. The function also removes the entry - * from the queue. + * the LI_LIMITED state. */ - __resume_listener(listener); + resume_listener(listener); } - HA_SPIN_UNLOCK(LISTENER_QUEUE_LOCK, &lq_lock); } /* Must be called with the lock held. Depending on value, it does @@ -483,11 +452,7 @@ void do_unbind_listener(struct listener *listener, int do_close) if (listener->state == LI_READY) fd_stop_recv(listener->fd); - if (listener->state == LI_LIMITED) { - HA_SPIN_LOCK(LISTENER_QUEUE_LOCK, &lq_lock); - LIST_DEL(&listener->wait_queue); - HA_SPIN_UNLOCK(LISTENER_QUEUE_LOCK, &lq_lock); - } + LIST_DEL_LOCKED(&listener->wait_queue); if (listener->state >= LI_PAUSED) { if (do_close) { @@ -569,6 +534,7 @@ int create_listeners(struct bind_conf *bc, const struct sockaddr_storage *ss, l->fd = fd; memcpy(&l->addr, ss, sizeof(*ss)); + LIST_INIT(&l->wait_queue); l->state = LI_INIT; proto->add(l, port);