mirror of
http://git.haproxy.org/git/haproxy.git/
synced 2025-03-25 04:17:42 +00:00
In issue #958 Ashley Penney reported intermittent crashes on AWS's ARM nodes which would not happen on x86 nodes. After investigation it turned out that the Neoverse N1 CPU cores used in the Graviton2 CPU are much more aggressive than the usual Cortex A53/A72/A55 or any x86 regarding memory ordering. The issue that was triggered there is that if a tasklet_wakeup() call is made on a tasklet scheduled to run on a foreign thread and that tasklet is just being dequeued to be processed, there can be a race at two places: - if MT_LIST_TRY_ADDQ() happens between MT_LIST_BEHEAD() and LIST_SPLICE_END_DETACHED() if the tasklet is alone in the list, because the emptiness tests matches ; - if MT_LIST_TRY_ADDQ() happens during LIST_DEL_INIT() in run_tasks_from_lists(), then depending on how LIST_DEL_INIT() ends up being implemented, it may even corrupt the adjacent nodes while they're being reused for the in-tree storage. This issue was introduced in 2.2 when support for waking up remote tasklets was added. Initially the attachment of a tasklet to a list was enough to know its status and this used to be stable information. Now it's not sufficient to rely on this anymore, thus we need to use a different information. This patch solves this by adding a new task flag, TASK_IN_LIST, which is atomically set before attaching a tasklet to a list, and is only removed after the tasklet is detached from a list. It is checked by tasklet_wakeup_on() so that it may only be done while the tasklet is out of any list, and is cleared during the state switch when calling the tasklet. Note that the flag is not set for pure tasks as it's not needed. However this introduces a new special case: the function tasklet_remove_from_tasklet_list() needs to keep both states in sync and cannot check both the state and the attachment to a list at the same time. This function is already limited to being used by the thread owning the tasklet, so in this case the test remains reliable. However, just like its predecessors, this function is wrong by design and it should probably be replaced with a stricter one, a lazy one, or be totally removed (it's only used in checks to avoid calling a possibly scheduled event, and when freeing a tasklet). Regardless, for now the function exists so the flag is removed only if the deletion could be done, which covers all cases we're interested in regarding the insertion. This removal is safe against a concurrent tasklet_wakeup_on() since MT_LIST_DEL() guarantees the atomic test, and will ultimately clear the flag only if the task could be deleted, so the flag will always reflect the last state. This should be carefully be backported as far as 2.2 after some observation period. This patch depends on previous patch "MINOR: task: remove __tasklet_remove_from_tasklet_list()".
150 lines
6.4 KiB
C
150 lines
6.4 KiB
C
/*
|
|
* include/haproxy/task-t.h
|
|
* Macros, variables and structures for task management.
|
|
*
|
|
* Copyright (C) 2000-2010 Willy Tarreau - w@1wt.eu
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation, version 2.1
|
|
* exclusively.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#ifndef _HAPROXY_TASK_T_H
|
|
#define _HAPROXY_TASK_T_H
|
|
|
|
#include <sys/time.h>
|
|
|
|
#include <import/eb32sctree.h>
|
|
#include <import/eb32tree.h>
|
|
|
|
#include <haproxy/api-t.h>
|
|
#include <haproxy/thread-t.h>
|
|
|
|
/* values for task->state */
|
|
#define TASK_SLEEPING 0x0000 /* task sleeping */
|
|
#define TASK_RUNNING 0x0001 /* the task is currently running */
|
|
#define TASK_GLOBAL 0x0002 /* The task is currently in the global runqueue */
|
|
#define TASK_QUEUED 0x0004 /* The task has been (re-)added to the run queue */
|
|
#define TASK_SHARED_WQ 0x0008 /* The task's expiration may be updated by other
|
|
* threads, must be set before first queue/wakeup */
|
|
#define TASK_SELF_WAKING 0x0010 /* task/tasklet found waking itself */
|
|
#define TASK_KILLED 0x0020 /* task/tasklet killed, may now be freed */
|
|
#define TASK_IN_LIST 0x0040 /* tasklet is in a tasklet list */
|
|
|
|
#define TASK_WOKEN_INIT 0x0100 /* woken up for initialisation purposes */
|
|
#define TASK_WOKEN_TIMER 0x0200 /* woken up because of expired timer */
|
|
#define TASK_WOKEN_IO 0x0400 /* woken up because of completed I/O */
|
|
#define TASK_WOKEN_SIGNAL 0x0800 /* woken up by a system signal */
|
|
#define TASK_WOKEN_MSG 0x1000 /* woken up by another task's message */
|
|
#define TASK_WOKEN_RES 0x2000 /* woken up because of available resource */
|
|
#define TASK_WOKEN_OTHER 0x4000 /* woken up for an unspecified reason */
|
|
|
|
/* use this to check a task state or to clean it up before queueing */
|
|
#define TASK_WOKEN_ANY (TASK_WOKEN_OTHER|TASK_WOKEN_INIT|TASK_WOKEN_TIMER| \
|
|
TASK_WOKEN_IO|TASK_WOKEN_SIGNAL|TASK_WOKEN_MSG| \
|
|
TASK_WOKEN_RES)
|
|
|
|
enum {
|
|
TL_URGENT = 0, /* urgent tasklets (I/O callbacks) */
|
|
TL_NORMAL = 1, /* normal tasks */
|
|
TL_BULK = 2, /* bulk task/tasklets, streaming I/Os */
|
|
TL_CLASSES /* must be last */
|
|
};
|
|
|
|
struct notification {
|
|
struct list purge_me; /* Part of the list of signals to be purged in the
|
|
case of the LUA execution stack crash. */
|
|
struct list wake_me; /* Part of list of signals to be targeted if an
|
|
event occurs. */
|
|
struct task *task; /* The task to be wake if an event occurs. */
|
|
__decl_thread(HA_SPINLOCK_T lock);
|
|
};
|
|
|
|
/* force to split per-thread stuff into separate cache lines */
|
|
struct task_per_thread {
|
|
struct eb_root timers; /* tree constituting the per-thread wait queue */
|
|
struct eb_root rqueue; /* tree constituting the per-thread run queue */
|
|
struct mt_list shared_tasklet_list; /* Tasklet to be run, woken up by other threads */
|
|
struct list tasklets[TL_CLASSES]; /* tasklets (and/or tasks) to run, by class */
|
|
int task_list_size; /* Number of tasks among the tasklets */
|
|
int rqueue_size; /* Number of elements in the per-thread run queue */
|
|
struct task *current; /* current task (not tasklet) */
|
|
int current_queue; /* points to current tasklet list being run, -1 if none */
|
|
uint8_t tl_class_mask; /* bit mask of non-empty tasklets classes */
|
|
__attribute__((aligned(64))) char end[0];
|
|
};
|
|
|
|
/* This part is common between struct task and struct tasklet so that tasks
|
|
* can be used as-is as tasklets.
|
|
*/
|
|
#define TASK_COMMON \
|
|
struct { \
|
|
unsigned short state; /* task state : bitfield of TASK_ */ \
|
|
short nice; /* task prio from -1024 to +1024, or -32768 for tasklets */ \
|
|
unsigned int calls; /* number of times process was called */ \
|
|
struct task *(*process)(struct task *t, void *ctx, unsigned short state); /* the function which processes the task */ \
|
|
void *context; /* the task's context */ \
|
|
}
|
|
|
|
/* The base for all tasks */
|
|
struct task {
|
|
TASK_COMMON; /* must be at the beginning! */
|
|
struct eb32sc_node rq; /* ebtree node used to hold the task in the run queue */
|
|
struct eb32_node wq; /* ebtree node used to hold the task in the wait queue */
|
|
int expire; /* next expiration date for this task, in ticks */
|
|
unsigned long thread_mask; /* mask of thread IDs authorized to process the task */
|
|
uint64_t call_date; /* date of the last task wakeup or call */
|
|
uint64_t lat_time; /* total latency time experienced */
|
|
uint64_t cpu_time; /* total CPU time consumed */
|
|
};
|
|
|
|
/* lightweight tasks, without priority, mainly used for I/Os */
|
|
struct tasklet {
|
|
TASK_COMMON; /* must be at the beginning! */
|
|
struct list list;
|
|
int tid; /* TID of the tasklet owner, <0 if local */
|
|
};
|
|
|
|
/*
|
|
* The task callback (->process) is responsible for updating ->expire. It must
|
|
* return a pointer to the task itself, except if the task has been deleted, in
|
|
* which case it returns NULL so that the scheduler knows it must not check the
|
|
* expire timer. The scheduler will requeue the task at the proper location.
|
|
*/
|
|
|
|
|
|
/* A work_list is a thread-safe way to enqueue some work to be run on another
|
|
* thread. It consists of a list, a task and a general-purpose argument.
|
|
* A work is appended to the list by atomically adding a list element to the
|
|
* list and waking up the associated task, which is done using work_add(). The
|
|
* caller must be careful about how operations are run as it will definitely
|
|
* happen that the element being enqueued is processed by the other thread
|
|
* before the call returns. Some locking conventions between the caller and the
|
|
* callee might sometimes be necessary. The task is always woken up with reason
|
|
* TASK_WOKEN_OTHER and a context pointing to the work_list entry.
|
|
*/
|
|
struct work_list {
|
|
struct mt_list head;
|
|
struct task *task;
|
|
void *arg;
|
|
};
|
|
|
|
#endif /* _HAPROXY_TASK_T_H */
|
|
|
|
/*
|
|
* Local variables:
|
|
* c-indent-level: 8
|
|
* c-basic-offset: 8
|
|
* End:
|
|
*/
|