diff --git a/doc/internals/api/mt_list.txt b/doc/internals/api/mt_list.txt new file mode 100644 index 0000000000..8a6bd88ae7 --- /dev/null +++ b/doc/internals/api/mt_list.txt @@ -0,0 +1,668 @@ +MT_LIST: multi-thread aware doubly-linked lists + +Abstract +-------- + +mt_lists are a form of doubly-linked lists that support thread-safe standard +list operations such as insert / append / delete / pop, as well as a safe +iterator that supports deletion and concurrent use. + +Principles +---------- + +The lists are designed to minimize contention in environments where elements +may be concurrently manipulated at different locations. The principle is to +act on the links between the elements instead of the elements themselves. This +is achieved by temporarily "cutting" these links, which effectively consists in +replacing the ends of the links with special pointers serving as a lock, called +MT_LIST_BUSY. An element is considered locked when both its next and prev +pointers are equal to this MT_LIST_BUSY pointer. A link is locked when both of +its ends are equal to this MT_LIST_BUSY pointer, i.e. the next pointer of the +element at the source of the link and the prev pointer of the element the link +points to. It's worth noting that a locked link by definition no longer exists +since neither end knows where it was pointing to, unless a backup of it was +made prior to locking it. + +The next and prev pointers are replaced by the list manipulation functions +using atomic exchange. This means that the caller knows if the element it tries +to replace was already locked or if it owns it. In order to replace a link, +both ends of the link must be owned by the thread willing to replace it. +Similarly when adding or removing an element, both ends of the elements must be +owned by the thread trying to manipulate the element. + +Appending or inserting elements comes in two flavors: the standard one which +considers that the element is already owned by the thread and ignores its +contents; this is the most common usage for a link that was just allocated or +extracted from a list. The second flavor doesn't trust the thread's ownership +of the element and tries to own it prior to adding the element; this may be +used when this element is a shared one that needs to be placed into a list. + +Removing an element always consists in owning the two links surrounding it, +hence owning the 4 pointers. + +Scanning the list consists in locking the element to (re)start from, locking +the link used to jump to the next element, then locking that element and +unlocking the previous one. All types of concurrency issues are supported +there, including elements disappearing while trying to lock them. It is +perfectly possible to have multiple threads scan the same list at the same +time, and it's usually efficient. However, if those threads face a single +contention point (e.g. pause on a locked element), they may then restart +working from the same point all at the same time and compete for the same links +and elements for each step, which will become less efficient. However, it does +work fine. + +There's currently no support for shared locking (e.g. rwlocks), elements and +links are always exclusively locked. Since locks are attempted in a sequence, +this creates a nested lock pattern which could theoretically cause deadlocks +if adjacent elements were locked in parallel. This situation is handled using +a rollback mechanism: if any thread fails to lock any element or pointer, it +detects the conflict with another thread and entirely rolls back its operations +in order to let the other thread complete. This rollback is what aims at +guaranteeing forward progress. There is, however, a non-null risk that both +threads spend their time rolling back and trying again. This is covered using +exponential back-off that may grow to large enough values to let a thread lock +all the pointer it needs to complete an operation. Other mechanisms could be +implemented in the future such as rotating priorities or random lock numbers +to let both threads know which one must roll back and which one may continue. + +Due to certain operations applying to the type of an element (iterator, element +retrieval), some parts do require macros. In order to avoid keeping too +confusing an API, all operations are made accessible via macros. However, in +order to ease maintenance and improve error reporting when facing unexpected +arguments, all the code parts that were compatible have been implemented as +inlinable functions instead. And in order to help with performance profiling, +it is possible to prevent the compiler from inlining all the functions that +may loop. As a rule of thumb, operations which only exist as macros do modify +one or more of their arguments. + +All exposed functions are called "mt_list_something()", all exposed macros are +called "MT_LIST_SOMETHING()", possibly mapping 1-to-1 to the equivalent +function, and the list element type is called "mt_list". + + +Operations +---------- + +mt_list_append(el1, el2) + Adds el2 before el1, which means that if el1 is the list's head, el2 will + effectively be appended to the end of the list. + + before: + +---+ + |el2| + +---+ + V + +---+ +---+ +---+ +---+ +---+ +---+ + #=>|el1|<===>| B |<===>| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ +---+ # + #===========================================================# + + after: + +---+ +---+ +---+ +---+ +---+ +---+ +---+ + #=>|el1|<===>| B |<===>| C |<===>| D |<===>| E |<===>| F |<===>|el2|<=# + # +---+ +---+ +---+ +---+ +---+ +---+ +---+ # + #=====================================================================# + + +mt_list_try_append(el1, el2) + Tries to add el2 before el1, which means that if el1 is the list's head, + el2 will effectively be appended to the end of the list. el2 will only be + added if it's deleted (loops over itself). The operation will return zero if + this is not the case (el2 is not empty anymore) or non-zero on success. + + before: + #=========# + # +---+ # + #=>|el2|<=# + +---+ + V + +---+ +---+ +---+ +---+ +---+ +---+ + #=>|el1|<===>| B |<===>| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ +---+ # + #===========================================================# + + after: + +---+ +---+ +---+ +---+ +---+ +---+ +---+ + #=>|el1|<===>| B |<===>| C |<===>| D |<===>| E |<===>| F |<===>|el2|<=# + # +---+ +---+ +---+ +---+ +---+ +---+ +---+ # + #=====================================================================# + + +mt_list_insert(el1, el2) + Adds el2 after el1, which means that if el1 is the list's head, el2 will + effectively be insert at the beginning of the list. + + before: + +---+ + |el2| + +---+ + V + +---+ +---+ +---+ +---+ +---+ +---+ + #=>|el1|<===>| B |<===>| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ +---+ # + #===========================================================# + + after: + +---+ +---+ +---+ +---+ +---+ +---+ +---+ + #=>|el1|<===>|el2|<===>| B |<===>| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ +---+ +---+ # + #=====================================================================# + + +mt_list_try_insert(el1, el2) + Tries to add el2 after el1, which means that if el1 is the list's head, + el2 will effectively be inserted at the beginning of the list. el2 will only + be added if it's deleted (loops over itself). The operation will return zero + if this is not the case (el2 is not empty anymore) or non-zero on success. + + before: + #=========# + # +---+ # + #=>|el2|<=# + +---+ + V + +---+ +---+ +---+ +---+ +---+ +---+ + #=>|el1|<===>| B |<===>| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ +---+ # + #===========================================================# + + after: + +---+ +---+ +---+ +---+ +---+ +---+ +---+ + #=>|el1|<===>|el2|<===>| B |<===>| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ +---+ +---+ # + #=====================================================================# + + +mt_list_delete(el1) + Removes el1 from the list, and marks it as deleted, wherever it is. If + the element was already not part of a list anymore, 0 is returned, + otherwise non-zero is returned if the operation could be performed. + + before: + +---+ +---+ +---+ +---+ +---+ +---+ +---+ + #=>| A |<===>|el1|<===>| B |<===>| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ +---+ +---+ # + #=====================================================================# + + after: + +---+ +---+ +---+ +---+ +---+ +---+ + #=>| A |<===>| B |<===>| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ +---+ # + #===========================================================# + + +---+ + #=>|el1|<=# + # +---+ # + #=========# + + +mt_list_behead(l) + Detaches a list of elements from its head with the aim of reusing them to + do anything else. The head will be turned to an empty list, and the list + will be partially looped: the first element's prev will point to the last + one, and the last element's next will be NULL. The pointer to the first + element is returned, or NULL if the list was empty. This is essentially + used when recycling lists of unused elements, or to grab a lot of elements + at once for local processing. It is safe to be run concurrently with the + insert/append operations performed at the list's head, but not against + modifications performed at any other place, such as delete operation. + + before: + +---+ +---+ +---+ +---+ +---+ +---+ +---+ + #=>| L |<===>| A |<===>| B |<===>| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ +---+ +---+ # + #=====================================================================# + + after: + +---+ +---+ +---+ +---+ +---+ +---+ +---+ + #=>| L |<=# ,--| A |<===>| B |<===>| C |<===>| D |<===>| E |<===>| F |<-. + # +---+ # | +---+ +---+ +---+ +---+ +---+ +---+ | + #=========# `-----------------------------------------------------------' + + +mt_list_pop(l) + Removes the list's first element, returns it deleted. If the list was empty, + NULL is returned. When combined with mt_list_append() this can be used to + implement MPMC queues for example. A macro MT_LIST_POP() is provided for a + more convenient use; instead of returning the list element, it will return + the structure holding the element, taking care of preserving the NULL. + + before: + +---+ +---+ +---+ +---+ +---+ +---+ +---+ + #=>| L |<===>| A |<===>| B |<===>| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ +---+ +---+ # + #=====================================================================# + + after: + +---+ +---+ +---+ +---+ +---+ +---+ + #=>| L |<===>| B |<===>| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ +---+ # + #===========================================================# + + +---+ + #=>| A |<=# + # +---+ # + #=========# + + +_mt_list_lock_next(elt) + Locks the link that starts at the next pointer of the designated element. + The link is replaced by two locked pointers, and a pointer to the next + element is returned. The link must then be unlocked using + _mt_list_unlock_next() passing it this pointer, or mt_list_unlock_link(). + This function is not intended to be used by applications, and makes certain + assumptions about the state of the list pertaining to its use in iterators. + + before: + +---+ +---+ +---+ +---+ +---+ +---+ + #=>|elt|<===>| B |<===>| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ +---+ # + #===========================================================# + + after: + +---+ +---+ +---+ +---+ +---+ +---+ + #=>|elt|x x| B |<===>| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ +---+ # + #===========================================================# + + Return + value: &B + + +_mt_list_unlock_next(elt, back) + Unlocks the link that starts at the next pointer of the designated element + and is supposed to end at . This function is not intended to be used + by applications, and makes certain assumptions about the state of the list + pertaining to its use in iterators. + + before: back + \ + +---+ +---+ +---+ +---+ +---+ +---+ + #=>|elt|x x| B |<===>| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ +---+ # + #===========================================================# + + after: + +---+ +---+ +---+ +---+ +---+ +---+ + #=>|elt|<===>| B |<===>| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ +---+ # + #===========================================================# + + +_mt_list_lock_prev(elt) + Locks the link that starts at the prev pointer of the designated element. + The link is replaced by two locked pointers, and a pointer to the prev + element is returned. The link must then be unlocked using + _mt_list_unlock_prev() passing it this pointer, or mt_list_unlock_link(). + This function is not intended to be used by applications, and makes certain + assumptions about the state of the list pertaining to its use in iterators. + + before: + +---+ +---+ +---+ +---+ +---+ +---+ + #=>| A |<===>|elt|<===>| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ +---+ # + #===========================================================# + + after: + +---+ +---+ +---+ +---+ +---+ +---+ + #=>| A |x x|elt|<===>| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ +---+ # + #===========================================================# + + Return + value: &A + + +_mt_list_unlock_prev(elt, back) + Unlocks the link that starts at the prev pointer of the designated element + and is supposed to end at . This function is not intended to be used + by applications, and makes certain assumptions about the state of the list + pertaining to its use in iterators. + + before: back + / + +---+ +---+ +---+ +---+ +---+ +---+ + #=>| A |x x|elt|<===>| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ +---+ # + #===========================================================# + + after: + +---+ +---+ +---+ +---+ +---+ +---+ + #=>| A |<===>|elt|<===>| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ +---+ # + #===========================================================# + + +mt_list_lock_next(elt) + Cuts the list after the specified element. The link is replaced by two + locked pointers, and is returned as a list element. The list must then + be unlocked using mt_list_unlock_link() or mt_list_unlock_full() applied + to the returned list element. + + before: + +---+ +---+ +---+ +---+ +---+ +---+ + #=>|elt|<===>| B |<===>| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ +---+ # + #===========================================================# + + after: + +---+ +---+ +---+ +---+ +---+ +---+ + #=>|elt|x x| B |<===>| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ +---+ # + #===========================================================# + + Return elt B + value: <===> + + +mt_list_lock_prev(elt) + Cuts the list before the specified element. The link is replaced by two + locked pointers, and is returned as a list element. The list must then + be unlocked using mt_list_unlock_link() or mt_list_unlock_full() applied + to the returned list element. + + before: + +---+ +---+ +---+ +---+ +---+ +---+ + #=>| A |<===>|elt|<===>| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ +---+ # + #===========================================================# + + after: + +---+ +---+ +---+ +---+ +---+ +---+ + #=>| A |x x|elt|<===>| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ +---+ # + #===========================================================# + + Return A elt + value: <===> + + +mt_list_lock_elem(elt) + Locks the element only. Both of its pointers are replaced by two locked + pointers, and the previous ones are returned as a list element. It's not + possible to remove such an element from a list since neighbors are not + locked. The sole purpose of this operation is to prevent another thread + from visiting this element during an operation. The element must then be + unlocked using mt_list_unlock_elem() applied to the returned element. + + before: + +---+ +---+ +---+ +---+ +---+ +---+ + #=>| A |<===>|elt|<===>| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ +---+ # + #===========================================================# + + after: + +---+ +---+ +---+ +---+ +---+ +---+ + #=>| A |=> x|elt|x <=| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ +---+ # + #===========================================================# + + Return A C + value: <===> + + +mt_list_unlock_elem(elt, ends) + Unlocks the element only by restoring its backed up contents from , + as returned by a previous call to mt_list_lock_elem(elt). The ends of the + links are not affected, only the element is touched. This is intended to + terminate a critical section started by a call to mt_list_lock_elem(). It + may also be used on a fully locked element processed by mt_list_lock_full() + in which case it will leave the list still locked. + + before: + A C + ends: <===> + + +---+ +---+ +---+ +---+ +---+ +---+ + #=>| A |=> x|elt|x <=| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ +---+ # + #===========================================================# + + after: + +---+ +---+ +---+ +---+ +---+ +---+ + #=>| A |<===>|elt|<===>| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ +---+ # + #===========================================================# + + before: + A C + ends: <===> + + +---+ +---+ +---+ +---+ +---+ +---+ + #=>| A |x x|elt|x x| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ +---+ # + #===========================================================# + + after: + +---+ +---+ +---+ +---+ +---+ +---+ + #=>| A |x <=|elt|=> x| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ +---+ # + #===========================================================# + + +mt_list_unlock_self(elt) + Unlocks the element only by resetting it (i.e. making it loop over itself). + This is useful in the locked variant of iterators when the element is to be + removed from the list and first needs to be unlocked because it's shared + with other operations (such as a concurrent attempt to delete it from a + list), or simply in case it is to be recycled in a usable state. The ends + of the links are not affected, only the element is touched. This is + normally only used from within locked iterators, which perform a full lock + (both links are locked). + + before: + +---+ +---+ +---+ +---+ +---+ +---+ + #=>| A |x x|elt|x x| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ +---+ # + #===========================================================# + + after: + +---+ +---+ +---+ +---+ +---+ +---+ + #=>|elt|<=# #=>| A |x x| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ # # +---+ +---+ +---+ +---+ +---+ # + #=========# #=================================================# + + +mt_list_lock_full(elt) + Locks both the element and its surrounding links. The extremities of the + previous links are returned as a single list element (which corresponds to + the element's before locking). The list must then be unlocked using + mt_list_unlock_full() to reconnect the element to the list and unlock + both, or mt_list_unlock_link() to effectively remove the element. + + before: + +---+ +---+ +---+ +---+ +---+ +---+ + #=>| A |<===>|elt|<===>| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ +---+ # + #===========================================================# + + after: + +---+ +---+ +---+ +---+ +---+ +---+ + #=>| A |x x|elt|x x| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ +---+ # + #===========================================================# + + Return A C + value: <=============> + + +mt_list_unlock_link(ends) + Connects two ends in a list together, effectively unlocking the list if it + was locked. It takes a list head which contains a pointer to the prev and + next elements to connect together. It normally is a copy of a previous link + returned by functions such as mt_list_lock_next(), mt_list_lock_prev(), or + mt_list_lock_full(). If applied after mt_list_lock_full(), it will result + in the list being reconnected without the element, which remains locked, + effectively deleting it. Note that this is not meant to be used from within + iterators, as the iterator will automatically and safely reconnect ends + after each iteration. + + before: + A C + Ends: <===> + + +---+ +---+ +---+ +---+ +---+ + #=>| A |x x| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ # + #=================================================# + + after: + +---+ +---+ +---+ +---+ +---+ + #=>| A |<===>| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ # + #=================================================# + + +mt_list_unlock_full(elt, ends) + Connects the specified element to the elements pointed to by the specified + , which is a backup copy of the previous list member of the element + prior to locking it using mt_list_lock_full() or mt_list_lock_elem(). This + is normally used to unlock an element and a list, but may also be used to + manually insert an element into an opened list (which should still be + locked). The element's list member is technically assigned a copy of + and both sides point to the element. This must not be used inside an + iterator as it would also unlock the list itself and make the loop visit + nodes in an unknown state. + + before: + +---+ + elt: x|elt|x + +---+ + A C + ends: <=============> + + +---+ +---+ +---+ +---+ +---+ + #=>| A |x x| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ # + #===========================================================# + + after: + +---+ +---+ +---+ +---+ +---+ +---+ + #=>| A |<===>|elt|<===>| C |<===>| D |<===>| E |<===>| F |<=# + # +---+ +---+ +---+ +---+ +---+ +---+ # + #===========================================================# + + +MT_LIST_FOR_EACH_ENTRY_LOCKED(item, list_head, member, back) + Iterates through a list of items of type "typeof(*item)" which are + linked via a "struct mt_list" member named . A pointer to the head + of the list is passed in . is a temporary struct mt_list, + used internally. It contains a copy of the contents of the current item's + list member before locking it. This macro is implemented using two nested + loops, each defined as a separate macro for easier inspection. The inner + loop will run for each element in the list, and the outer loop will run + only once to do some cleanup and unlocking when the end of the list is + reached or user breaks from inner loop. It is safe to break from this macro + as the cleanup will be performed anyway, but it is strictly forbidden to + branch (goto or return) from the loop because skipping the cleanup will + lead to undefined behavior. During the scan of the list, the item has both + of its links locked, so concurrent operations on the list are safe. However + the thread holding the list locked must be careful not to perform other + locking operations. In order to remove the current element, setting + to NULL is sufficient to make the inner loop not try to re-attach it. It is + recommended to reinitialize it though if it is expected to be reused, so as + not to leave its pointers locked. Same if other threads are trying to + concurrently operate on the element. + + From within the loop, the list looks like this: + + MT_LIST_FOR_EACH_ENTRY_LOCKED(item, lh, list, back) { + // A C + // back: <=============> + // item->list + // +---+ +---+ +-V-+ +---+ +---+ +---+ + // #=>|lh |<===>| A |x x| |x x| C |<===>| D |<===>| E |<=# + // # +---+ +---+ +---+ +---+ +---+ +---+ # + // #===========================================================# + } + + This means that only the current item as well as its two neighbors are + locked. It is thus possible to act on any other part of the list in + parallel (other threads might have begun slightly earlier). However if + a thread is too slow to proceed, other threads may quickly reach its + position, and all of them will then wait on the same element, slowing + down the progress. + + +MT_LIST_FOR_EACH_ENTRY_UNLOCKED(item, list_head, member, back) + Iterates through a list of items of type "typeof(*item)" which are + linked via a "struct mt_list" member named . A pointer to the head + of the list is passed in . is a temporary struct mt_list, + used internally. It contains a copy of the contents of the current item's + list member before resetting it. This macro is implemented using two nested + loops, each defined as a separate macro for easier inspection. The inner + loop will run for each element in the list, and the outer loop will run + only once to do some cleanup and unlocking when the end of the list is + reached or user breaks from inner loop. It is safe to break from this macro + as the cleanup will be performed anyway, but it is strictly forbidden to + branch (goto or return) from the loop because skipping the cleanup will + lead to undefined behavior. During the scan of the list, the item has both + of its neighbours locked, with both of its ends pointing to itself. Thus, + concurrent walks on the list are safe, but not direct accesses to the + element. In order to remove the current element, setting to NULL is + sufficient to make the inner loop not try to re-attach it. There is no need + to reinitialize it since it is already done. If the element is left, it will + be re-attached to the list. This version is meant as a more user-friendly + method to walk over a list in which it is known by design that elements are + not directly accessed (e.g. a pure MPMC queue). The typical pattern which + corresponds to this case is when the first operation in the iterator's body + is a call to unlock the iterator, which is then no longer needed (though + harmless). + + From within the loop, the list looks like this: + + MT_LIST_FOR_EACH_ENTRY_UNLOCKED(item, lh, list, back) { + // back: A C + // item->list <===> + // +-V-+ +---+ +---+ +---+ +---+ +---+ + // #>| |<# #=>|lh |<===>| A |x x| C |<===>| D |<===>| E |<=# + // # +---+ # # +---+ +---+ +---+ +---+ +---+ # + // #=======# #=================================================# + } + + This means that only the current item's neighbors are locked. It is thus + possible to act on any other part of the list in parallel (other threads + might have begun slightly earlier) but not on the element. However if a + thread is too slow to proceed, other threads may quickly reach its + position, and all of them will then wait on the same element, slowing down + the progress. + + +Examples +-------- + +The example below collects up to 50 jobs from a shared list that are compatible +with the current thread, and moves them to a local list for later processing. +The same pointers are used for both lists and placed in an anonymous union. + + struct job { + union { + struct list list; + struct mt_list mt_list; + }; + unsigned long thread_mask; /* 1 bit per eligible thread */ + /* struct-specific stuff below */ + ... + }; + + extern struct mt_list global_job_queue; + extern struct list local_job_queue; + + struct mt_list back; + struct job *item; + int budget = 50; + + /* collect up to 50 shared items */ + MT_LIST_FOR_EACH_ENTRY_LOCKED(item, &global_job_queue, mt_list, back) { + if (!(item->thread_mask & current_thread_bit)) + continue; /* job not eligible for this thread */ + LIST_APPEND(&local_job_queue, &item->list); + item = NULL; + if (!--budget) + break; + } + + /* process extracted items */ + LIST_FOR_EACH(item, &local_job_queue, list) { + ... + } diff --git a/include/haproxy/list.h b/include/haproxy/list.h index b922bc19b2..5841488597 100644 --- a/include/haproxy/list.h +++ b/include/haproxy/list.h @@ -24,6 +24,7 @@ #include #include +#include /* First undefine some macros which happen to also be defined on OpenBSD, * in sys/queue.h, used by sys/event.h @@ -236,658 +237,6 @@ &item->member != (list_head); \ item = back, back = LIST_ELEM(back->member.p, typeof(back), member)) - -/* - * Locked version of list manipulation macros. - * It is OK to use those concurrently from multiple threads, as long as the - * list is only used with the locked variants. - */ -#define MT_LIST_BUSY ((struct mt_list *)1) - -/* - * Add an item at the beginning of a list. - * Returns 1 if we added the item, 0 otherwise (because it was already in a - * list). - */ -#define MT_LIST_TRY_INSERT(_lh, _el) \ - ({ \ - int _ret = 0; \ - struct mt_list *lh = (_lh), *el = (_el); \ - for (;;__ha_cpu_relax()) { \ - struct mt_list *n, *n2; \ - struct mt_list *p, *p2; \ - n = _HA_ATOMIC_XCHG(&(lh)->next, MT_LIST_BUSY); \ - if (n == MT_LIST_BUSY) \ - continue; \ - p = _HA_ATOMIC_XCHG(&n->prev, MT_LIST_BUSY); \ - if (p == MT_LIST_BUSY) { \ - (lh)->next = n; \ - __ha_barrier_store(); \ - continue; \ - } \ - n2 = _HA_ATOMIC_XCHG(&el->next, MT_LIST_BUSY); \ - if (n2 != el) { /* element already linked */ \ - if (n2 != MT_LIST_BUSY) \ - el->next = n2; \ - n->prev = p; \ - __ha_barrier_store(); \ - lh->next = n; \ - __ha_barrier_store(); \ - if (n2 == MT_LIST_BUSY) \ - continue; \ - break; \ - } \ - p2 = _HA_ATOMIC_XCHG(&el->prev, MT_LIST_BUSY); \ - if (p2 != el) { \ - if (p2 != MT_LIST_BUSY) \ - el->prev = p2; \ - n->prev = p; \ - el->next = el; \ - __ha_barrier_store(); \ - lh->next = n; \ - __ha_barrier_store(); \ - if (p2 == MT_LIST_BUSY) \ - continue; \ - break; \ - } \ - (el)->next = n; \ - (el)->prev = p; \ - __ha_barrier_store(); \ - n->prev = (el); \ - __ha_barrier_store(); \ - p->next = (el); \ - __ha_barrier_store(); \ - _ret = 1; \ - break; \ - } \ - (_ret); \ - }) - -/* - * Add an item at the end of a list. - * Returns 1 if we added the item, 0 otherwise (because it was already in a - * list). - */ -#define MT_LIST_TRY_APPEND(_lh, _el) \ - ({ \ - int _ret = 0; \ - struct mt_list *lh = (_lh), *el = (_el); \ - for (;;__ha_cpu_relax()) { \ - struct mt_list *n, *n2; \ - struct mt_list *p, *p2; \ - p = _HA_ATOMIC_XCHG(&(lh)->prev, MT_LIST_BUSY); \ - if (p == MT_LIST_BUSY) \ - continue; \ - n = _HA_ATOMIC_XCHG(&p->next, MT_LIST_BUSY); \ - if (n == MT_LIST_BUSY) { \ - (lh)->prev = p; \ - __ha_barrier_store(); \ - continue; \ - } \ - p2 = _HA_ATOMIC_XCHG(&el->prev, MT_LIST_BUSY); \ - if (p2 != el) { \ - if (p2 != MT_LIST_BUSY) \ - el->prev = p2; \ - p->next = n; \ - __ha_barrier_store(); \ - lh->prev = p; \ - __ha_barrier_store(); \ - if (p2 == MT_LIST_BUSY) \ - continue; \ - break; \ - } \ - n2 = _HA_ATOMIC_XCHG(&el->next, MT_LIST_BUSY); \ - if (n2 != el) { /* element already linked */ \ - if (n2 != MT_LIST_BUSY) \ - el->next = n2; \ - p->next = n; \ - el->prev = el; \ - __ha_barrier_store(); \ - lh->prev = p; \ - __ha_barrier_store(); \ - if (n2 == MT_LIST_BUSY) \ - continue; \ - break; \ - } \ - (el)->next = n; \ - (el)->prev = p; \ - __ha_barrier_store(); \ - p->next = (el); \ - __ha_barrier_store(); \ - n->prev = (el); \ - __ha_barrier_store(); \ - _ret = 1; \ - break; \ - } \ - (_ret); \ - }) - -/* - * Add an item at the beginning of a list. - * It is assumed the element can't already be in a list, so it isn't checked. - */ -#define MT_LIST_INSERT(_lh, _el) \ - ({ \ - int _ret = 0; \ - struct mt_list *lh = (_lh), *el = (_el); \ - for (;;__ha_cpu_relax()) { \ - struct mt_list *n; \ - struct mt_list *p; \ - n = _HA_ATOMIC_XCHG(&(lh)->next, MT_LIST_BUSY); \ - if (n == MT_LIST_BUSY) \ - continue; \ - p = _HA_ATOMIC_XCHG(&n->prev, MT_LIST_BUSY); \ - if (p == MT_LIST_BUSY) { \ - (lh)->next = n; \ - __ha_barrier_store(); \ - continue; \ - } \ - (el)->next = n; \ - (el)->prev = p; \ - __ha_barrier_store(); \ - n->prev = (el); \ - __ha_barrier_store(); \ - p->next = (el); \ - __ha_barrier_store(); \ - _ret = 1; \ - break; \ - } \ - (_ret); \ - }) - -/* - * Add an item at the end of a list. - * It is assumed the element can't already be in a list, so it isn't checked - */ -#define MT_LIST_APPEND(_lh, _el) \ - ({ \ - int _ret = 0; \ - struct mt_list *lh = (_lh), *el = (_el); \ - for (;;__ha_cpu_relax()) { \ - struct mt_list *n; \ - struct mt_list *p; \ - p = _HA_ATOMIC_XCHG(&(lh)->prev, MT_LIST_BUSY); \ - if (p == MT_LIST_BUSY) \ - continue; \ - n = _HA_ATOMIC_XCHG(&p->next, MT_LIST_BUSY); \ - if (n == MT_LIST_BUSY) { \ - (lh)->prev = p; \ - __ha_barrier_store(); \ - continue; \ - } \ - (el)->next = n; \ - (el)->prev = p; \ - __ha_barrier_store(); \ - p->next = (el); \ - __ha_barrier_store(); \ - n->prev = (el); \ - __ha_barrier_store(); \ - _ret = 1; \ - break; \ - } \ - (_ret); \ - }) - -/* - * Add an item at the end of a list. - * It is assumed the element can't already be in a list, so it isn't checked - * Item will be added in busy/locked state, so that it is already - * referenced in the list but no other thread can use it until we're ready. - * - * This returns a struct mt_list, that will be needed at unlock time. - * (using MT_LIST_UNLOCK_ELT) - */ -#define MT_LIST_APPEND_LOCKED(_lh, _el) \ - ({ \ - struct mt_list np; \ - struct mt_list *lh = (_lh), *el = (_el); \ - (el)->next = MT_LIST_BUSY; \ - (el)->prev = MT_LIST_BUSY; \ - for (;;__ha_cpu_relax()) { \ - struct mt_list *n; \ - struct mt_list *p; \ - p = _HA_ATOMIC_XCHG(&(lh)->prev, MT_LIST_BUSY); \ - if (p == MT_LIST_BUSY) \ - continue; \ - n = _HA_ATOMIC_XCHG(&p->next, MT_LIST_BUSY); \ - if (n == MT_LIST_BUSY) { \ - (lh)->prev = p; \ - __ha_barrier_store(); \ - continue; \ - } \ - np.prev = p; \ - np.next = n; \ - break; \ - } \ - (np); \ - }) - -/* - * Detach a list from its head. A pointer to the first element is returned - * and the list is closed. If the list was empty, NULL is returned. This may - * exclusively be used with lists modified by MT_LIST_TRY_INSERT/MT_LIST_TRY_APPEND. This - * is incompatible with MT_LIST_DELETE run concurrently. - * If there's at least one element, the next of the last element will always - * be NULL. - */ -#define MT_LIST_BEHEAD(_lh) ({ \ - struct mt_list *lh = (_lh); \ - struct mt_list *_n; \ - struct mt_list *_p; \ - for (;;__ha_cpu_relax()) { \ - _p = _HA_ATOMIC_XCHG(&(lh)->prev, MT_LIST_BUSY); \ - if (_p == MT_LIST_BUSY) \ - continue; \ - if (_p == (lh)) { \ - (lh)->prev = _p; \ - __ha_barrier_store(); \ - _n = NULL; \ - break; \ - } \ - _n = _HA_ATOMIC_XCHG(&(lh)->next, MT_LIST_BUSY); \ - if (_n == MT_LIST_BUSY) { \ - (lh)->prev = _p; \ - __ha_barrier_store(); \ - continue; \ - } \ - if (_n == (lh)) { \ - (lh)->next = _n; \ - (lh)->prev = _p; \ - __ha_barrier_store(); \ - _n = NULL; \ - break; \ - } \ - (lh)->next = (lh); \ - (lh)->prev = (lh); \ - __ha_barrier_store(); \ - _n->prev = _p; \ - __ha_barrier_store(); \ - _p->next = NULL; \ - __ha_barrier_store(); \ - break; \ - } \ - (_n); \ -}) - - -/* Remove an item from a list. - * Returns 1 if we removed the item, 0 otherwise (because it was in no list). - */ -#define MT_LIST_DELETE(_el) \ - ({ \ - int _ret = 0; \ - struct mt_list *el = (_el); \ - for (;;__ha_cpu_relax()) { \ - struct mt_list *n, *n2; \ - struct mt_list *p, *p2 = NULL; \ - n = _HA_ATOMIC_XCHG(&(el)->next, MT_LIST_BUSY); \ - if (n == MT_LIST_BUSY) \ - continue; \ - p = _HA_ATOMIC_XCHG(&(el)->prev, MT_LIST_BUSY); \ - if (p == MT_LIST_BUSY) { \ - (el)->next = n; \ - __ha_barrier_store(); \ - continue; \ - } \ - if (p != (el)) { \ - p2 = _HA_ATOMIC_XCHG(&p->next, MT_LIST_BUSY); \ - if (p2 == MT_LIST_BUSY) { \ - (el)->prev = p; \ - (el)->next = n; \ - __ha_barrier_store(); \ - continue; \ - } \ - } \ - if (n != (el)) { \ - n2 = _HA_ATOMIC_XCHG(&n->prev, MT_LIST_BUSY); \ - if (n2 == MT_LIST_BUSY) { \ - if (p2 != NULL) \ - p->next = p2; \ - (el)->prev = p; \ - (el)->next = n; \ - __ha_barrier_store(); \ - continue; \ - } \ - } \ - n->prev = p; \ - p->next = n; \ - if (p != (el) && n != (el)) \ - _ret = 1; \ - __ha_barrier_store(); \ - (el)->prev = (el); \ - (el)->next = (el); \ - __ha_barrier_store(); \ - break; \ - } \ - (_ret); \ - }) - - -/* Remove the first element from the list, and return it */ -#define MT_LIST_POP(_lh, pt, el) \ - ({ \ - void *_ret; \ - struct mt_list *lh = (_lh); \ - for (;;__ha_cpu_relax()) { \ - struct mt_list *n, *n2; \ - struct mt_list *p, *p2; \ - n = _HA_ATOMIC_XCHG(&(lh)->next, MT_LIST_BUSY); \ - if (n == MT_LIST_BUSY) \ - continue; \ - if (n == (lh)) { \ - (lh)->next = lh; \ - __ha_barrier_store(); \ - _ret = NULL; \ - break; \ - } \ - p = _HA_ATOMIC_XCHG(&n->prev, MT_LIST_BUSY); \ - if (p == MT_LIST_BUSY) { \ - (lh)->next = n; \ - __ha_barrier_store(); \ - continue; \ - } \ - n2 = _HA_ATOMIC_XCHG(&n->next, MT_LIST_BUSY); \ - if (n2 == MT_LIST_BUSY) { \ - n->prev = p; \ - __ha_barrier_store(); \ - (lh)->next = n; \ - __ha_barrier_store(); \ - continue; \ - } \ - p2 = _HA_ATOMIC_XCHG(&n2->prev, MT_LIST_BUSY); \ - if (p2 == MT_LIST_BUSY) { \ - n->next = n2; \ - n->prev = p; \ - __ha_barrier_store(); \ - (lh)->next = n; \ - __ha_barrier_store(); \ - continue; \ - } \ - (lh)->next = n2; \ - (n2)->prev = (lh); \ - __ha_barrier_store(); \ - (n)->prev = (n); \ - (n)->next = (n); \ - __ha_barrier_store(); \ - _ret = MT_LIST_ELEM(n, pt, el); \ - break; \ - } \ - (_ret); \ - }) - -#define MT_LIST_HEAD(a) ((void *)(&(a))) - -#define MT_LIST_INIT(l) ((l)->next = (l)->prev = (l)) - -#define MT_LIST_HEAD_INIT(l) { &l, &l } -/* returns a pointer of type to a structure containing a list head called - * at address . Note that can be the result of a function or macro - * since it's used only once. - * Example: MT_LIST_ELEM(cur_node->args.next, struct node *, args) - */ -#define MT_LIST_ELEM(lh, pt, el) ((pt)(((const char *)(lh)) - ((size_t)&((pt)NULL)->el))) - -/* checks if the list head is empty or not */ -#define MT_LIST_ISEMPTY(lh) ((lh)->next == (lh)) - -/* returns a pointer of type to a structure following the element - * which contains list head , which is known as element in - * struct pt. - * Example: MT_LIST_NEXT(args, struct node *, list) - */ -#define MT_LIST_NEXT(lh, pt, el) (MT_LIST_ELEM((lh)->next, pt, el)) - - -/* returns a pointer of type to a structure preceding the element - * which contains list head , which is known as element in - * struct pt. - */ -#undef MT_LIST_PREV -#define MT_LIST_PREV(lh, pt, el) (MT_LIST_ELEM((lh)->prev, pt, el)) - -/* checks if the list element was added to a list or not. This only - * works when detached elements are reinitialized (using LIST_DEL_INIT) - */ -#define MT_LIST_INLIST(el) ((el)->next != (el)) - -/* Lock an element in the list, to be sure it won't be removed nor - * accessed by another thread while the lock is held. - * Locking behavior is inspired from MT_LIST_DELETE macro, - * thus this macro can safely be used concurrently with MT_LIST_DELETE. - * This returns a struct mt_list, that will be needed at unlock time. - * (using MT_LIST_UNLOCK_ELT) - */ -#define MT_LIST_LOCK_ELT(_el) \ - ({ \ - struct mt_list ret; \ - struct mt_list *el = (_el); \ - for (;;__ha_cpu_relax()) { \ - struct mt_list *n, *n2; \ - struct mt_list *p, *p2 = NULL; \ - n = _HA_ATOMIC_XCHG(&(el)->next, MT_LIST_BUSY); \ - if (n == MT_LIST_BUSY) \ - continue; \ - p = _HA_ATOMIC_XCHG(&(el)->prev, MT_LIST_BUSY); \ - if (p == MT_LIST_BUSY) { \ - (el)->next = n; \ - __ha_barrier_store(); \ - continue; \ - } \ - if (p != (el)) { \ - p2 = _HA_ATOMIC_XCHG(&p->next, MT_LIST_BUSY);\ - if (p2 == MT_LIST_BUSY) { \ - (el)->prev = p; \ - (el)->next = n; \ - __ha_barrier_store(); \ - continue; \ - } \ - } \ - if (n != (el)) { \ - n2 = _HA_ATOMIC_XCHG(&n->prev, MT_LIST_BUSY);\ - if (n2 == MT_LIST_BUSY) { \ - if (p2 != NULL) \ - p->next = p2; \ - (el)->prev = p; \ - (el)->next = n; \ - __ha_barrier_store(); \ - continue; \ - } \ - } \ - ret.next = n; \ - ret.prev = p; \ - break; \ - } \ - ret; \ - }) - -/* Unlock an element previously locked by MT_LIST_LOCK_ELT. "np" is the - * struct mt_list returned by MT_LIST_LOCK_ELT(). - */ -#define MT_LIST_UNLOCK_ELT(_el, np) \ - do { \ - struct mt_list *n = (np).next, *p = (np).prev; \ - struct mt_list *el = (_el); \ - (el)->next = n; \ - (el)->prev = p; \ - if (n != (el)) \ - n->prev = (el); \ - if (p != (el)) \ - p->next = (el); \ - } while (0) - -/* Internal macroes for the foreach macroes */ -#define _MT_LIST_UNLOCK_NEXT(el, np) \ - do { \ - struct mt_list *n = (np); \ - (el)->next = n; \ - if (n != (el)) \ - n->prev = (el); \ - } while (0) - -/* Internal macroes for the foreach macroes */ -#define _MT_LIST_UNLOCK_PREV(el, np) \ - do { \ - struct mt_list *p = (np); \ - (el)->prev = p; \ - if (p != (el)) \ - p->next = (el); \ - } while (0) - -#define _MT_LIST_LOCK_NEXT(el) \ - ({ \ - struct mt_list *n = NULL; \ - for (;;__ha_cpu_relax()) { \ - struct mt_list *n2; \ - n = _HA_ATOMIC_XCHG(&((el)->next), MT_LIST_BUSY); \ - if (n == MT_LIST_BUSY) \ - continue; \ - if (n != (el)) { \ - n2 = _HA_ATOMIC_XCHG(&n->prev, MT_LIST_BUSY);\ - if (n2 == MT_LIST_BUSY) { \ - (el)->next = n; \ - __ha_barrier_store(); \ - continue; \ - } \ - } \ - break; \ - } \ - n; \ - }) - -#define _MT_LIST_LOCK_PREV(el) \ - ({ \ - struct mt_list *p = NULL; \ - for (;;__ha_cpu_relax()) { \ - struct mt_list *p2; \ - p = _HA_ATOMIC_XCHG(&((el)->prev), MT_LIST_BUSY); \ - if (p == MT_LIST_BUSY) \ - continue; \ - if (p != (el)) { \ - p2 = _HA_ATOMIC_XCHG(&p->next, MT_LIST_BUSY);\ - if (p2 == MT_LIST_BUSY) { \ - (el)->prev = p; \ - __ha_barrier_store(); \ - continue; \ - } \ - } \ - break; \ - } \ - p; \ - }) - -#define _MT_LIST_RELINK_DELETED(elt2) \ - do { \ - struct mt_list *n = elt2.next, *p = elt2.prev; \ - ALREADY_CHECKED(p); \ - n->prev = p; \ - p->next = n; \ - } while (0); - -/* Equivalent of MT_LIST_DELETE(), to be used when parsing the list with mt_list_entry_for_each_safe(). - * It should be the element currently parsed (tmpelt1) - */ -#define MT_LIST_DELETE_SAFE(_el) \ - do { \ - struct mt_list *el = (_el); \ - (el)->prev = (el); \ - (el)->next = (el); \ - (_el) = NULL; \ - } while (0) - -/* Safe as MT_LIST_DELETE_SAFE, but it won't reinit the element */ -#define MT_LIST_DELETE_SAFE_NOINIT(_el) \ - do { \ - (_el) = NULL; \ - } while (0) - -/* Iterates through a list of items of type "typeof(*item)" which are - * linked via a "struct mt_list" member named . A pointer to the head - * of the list is passed in . - * - * is a temporary struct mt_list *, and is a temporary - * struct mt_list, used internally, both are needed for MT_LIST_DELETE_SAFE. - * - * This macro is implemented using a nested loop. The inner loop will run for - * each element in the list, and the upper loop will run only once to do some - * cleanup when the end of the list is reached or user breaks from inner loop. - * It's safe to break from this macro as the cleanup will be performed anyway, - * but it is strictly forbidden to goto from the loop because skipping the - * cleanup will lead to undefined behavior. - * - * In order to remove the current element, please use MT_LIST_DELETE_SAFE. - * - * Example: - * mt_list_for_each_entry_safe(item, list_head, list_member, elt1, elt2) { - * ... - * } - */ -#define mt_list_for_each_entry_safe(item, list_head, member, tmpelt, tmpelt2) \ - for ((tmpelt) = NULL; (tmpelt) != MT_LIST_BUSY; ({ \ - /* post loop cleanup: \ - * gets executed only once to perform cleanup \ - * after child loop has finished \ - */ \ - if (tmpelt) { \ - /* last elem still exists, unlocking it */ \ - if (tmpelt2.prev) \ - MT_LIST_UNLOCK_ELT(tmpelt, tmpelt2); \ - else { \ - /* special case: child loop did not run \ - * so tmpelt2.prev == NULL \ - * (empty list) \ - */ \ - _MT_LIST_UNLOCK_NEXT(tmpelt, tmpelt2.next); \ - } \ - } else { \ - /* last elem was deleted by user, relink required: \ - * prev->next = next \ - * next->prev = prev \ - */ \ - _MT_LIST_RELINK_DELETED(tmpelt2); \ - } \ - /* break parent loop \ - * (this loop runs exactly one time) \ - */ \ - (tmpelt) = MT_LIST_BUSY; \ - })) \ - for ((tmpelt) = (list_head), (tmpelt2).prev = NULL, (tmpelt2).next = _MT_LIST_LOCK_NEXT(tmpelt); ({ \ - /* this gets executed before each user body loop */ \ - (item) = MT_LIST_ELEM((tmpelt2.next), typeof(item), member); \ - if (&item->member != (list_head)) { \ - /* did not reach end of list \ - * (back to list_head == end of list reached) \ - */ \ - if (tmpelt2.prev != &item->member) \ - tmpelt2.next = _MT_LIST_LOCK_NEXT(&item->member); \ - else { \ - /* FIXME: is this even supposed to happen?? \ - * I'm not understanding how \ - * tmpelt2.prev could be equal to &item->member. \ - * running 'test_list' multiple times with 8 \ - * concurrent threads: this never gets reached \ - */ \ - tmpelt2.next = tmpelt; \ - } \ - if (tmpelt != NULL) { \ - /* if tmpelt was not deleted by user */ \ - if (tmpelt2.prev) { \ - /* not executed on first run \ - * (tmpelt2.prev == NULL on first run) \ - */ \ - _MT_LIST_UNLOCK_PREV(tmpelt, tmpelt2.prev); \ - /* unlock_prev will implicitly relink: \ - * elt->prev = prev \ - * prev->next = elt \ - */ \ - } \ - tmpelt2.prev = tmpelt; \ - } \ - (tmpelt) = &item->member; \ - } \ - /* else: end of list reached (loop stop cond) */ \ - }), \ - &item->member != (list_head);) - static __inline struct list *mt_list_to_list(struct mt_list *list) { union { diff --git a/include/import/mt_list.h b/include/import/mt_list.h new file mode 100644 index 0000000000..2a07ea0318 --- /dev/null +++ b/include/import/mt_list.h @@ -0,0 +1,1197 @@ +/* + * include/mt_list.h + * + * Multi-thread aware circular lists. + * + * Copyright (C) 2018-2023 Willy Tarreau + * Copyright (C) 2018-2023 Olivier Houchard + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _MT_LIST_H +#define _MT_LIST_H + +#include +#include + +/* set NOINLINE to forcefully disable user functions inlining */ +#if defined(NOINLINE) +#define MT_INLINE __attribute__((noinline)) +#else +#define MT_INLINE inline +#endif + +// Note: already defined in list-t.h +#ifndef _HAPROXY_LIST_T_H +/* A list element, it's both a head or any element. Both pointers always point + * to a valid list element (possibly itself for a detached element or an empty + * list head), or are equal to MT_LIST_BUSY for a locked pointer indicating + * that the target element is about to be modified. + */ +struct mt_list { + struct mt_list *next; + struct mt_list *prev; +}; +#endif + +/* This is the value of the locked list pointer. It is assigned to an mt_list's + * ->next or ->prev pointer to lock the link to the other element while this + * element is being inspected or modified. + */ +#define MT_LIST_BUSY ((struct mt_list *)1) + +/* This is used to pre-initialize an mt_list element during its declaration. + * The argument is the name of the variable being declared and being assigned + * this value. Example: + * + * struct mt_list pool_head = MT_LIST_HEAD_INIT(pool_head); + */ +#define MT_LIST_HEAD_INIT(l) { .next = &l, .prev = &l } + + +/* Returns a pointer of type to the structure containing a member of type + * mt_list called that is accessible at address . Note that may be + * the result of a function or macro since it's used only once. Example: + * + * return MT_LIST_ELEM(cur_node->args.next, struct node *, args) + */ +#define MT_LIST_ELEM(a, t, m) ((t)(size_t)(((size_t)(a)) - ((size_t)&((t)NULL)->m))) + + +/* Returns a pointer of type to a structure following the element which + * contains the list element at address , which is known as member in + * struct t*. Example: + * + * return MT_LIST_NEXT(args, struct node *, list); + */ +#define MT_LIST_NEXT(a, t, m) (MT_LIST_ELEM((a)->next, t, m)) + + +/* Returns a pointer of type to a structure preceeding the element which + * contains the list element at address , which is known as member in + * struct t*. Example: + * + * return MT_LIST_PREV(args, struct node *, list); + */ +#define MT_LIST_PREV(a, t, m) (MT_LIST_ELEM((a)->prev, t, m)) + + +/* This is used to prevent the compiler from knowing the origin of the + * variable, and sometimes avoid being confused about possible null-derefs + * that it sometimes believes are possible after pointer casts. + */ +#define MT_ALREADY_CHECKED(p) do { asm("" : "=rm"(p) : "0"(p)); } while (0) + + +/* Returns a pointer of type to the structure containing a member of type + * mt_list called that comes from the first element in list , that is + * atomically detached. If the list is empty, NULL is returned instead. + * Example: + * + * while ((conn = MT_LIST_POP(queue, struct conn *, list))) ... + */ +#define MT_LIST_POP(lh, t, m) \ + ({ \ + struct mt_list *_n = mt_list_pop(lh); \ + (_n ? MT_LIST_ELEM(_n, t, m) : NULL); \ + }) + +/* Iterates through a list of items of type "typeof(*item)" which are + * linked via a "struct mt_list" member named . A pointer to the head + * of the list is passed in . + * + * is a temporary struct mt_list, used internally to store the current + * element's ends while it is locked. + * + * This macro is implemented using two nested loops, each defined as a separate + * macro for easier inspection. The inner loop will run for each element in the + * list, and the outer loop will run only once to do some cleanup when the end + * of the list is reached or user breaks from inner loop. It's safe to break + * from this macro as the cleanup will be performed anyway, but it is strictly + * forbidden to branch (goto or return) from the loop because skipping the + * cleanup will lead to undefined behavior. + * + * The current element is detached from the list while being visited, with both + * links locked, and re-attached when switching to the next item. As such in + * order to delete the current item, it's sufficient to set it to NULL to + * prevent the inner loop from attaching it back. In this case it's recommended + * to re-init the item before reusing it in order to clear the locks, in case + * this element is being waited upon from a concurrent thread, or is intended + * to be reused later (e.g. stored into a pool). + * + * Example: + * MT_LIST_FOR_EACH_ENTRY_LOCKED(item, list_head, list_member, back) { + * ... + * } + */ +#define MT_LIST_FOR_EACH_ENTRY_LOCKED(item, list_head, member, back) \ + _MT_LIST_FOR_EACH_ENTRY_LOCKED_OUTER(item, list_head, member, back) \ + _MT_LIST_FOR_EACH_ENTRY_LOCKED_INNER(item, list_head, member, back) + +/* The same as above, except that the item is returned unlocked. The caller + * thus never has to worry about unlocking it, however it must be certain that + * no other thread is trying to use the element in parallel. This is useful for + * constructs such as FIFOs or MPMC queues, where there is no possibility for + * an element to be removed via a direct access, as it saves the caller from + * having to care about the unlock operation when deleting it. The simpler + * usage has a small cost of two extra memory writes per iteration. + */ +#define MT_LIST_FOR_EACH_ENTRY_UNLOCKED(item, list_head, member, back) \ + _MT_LIST_FOR_EACH_ENTRY_UNLOCKED_OUTER(item, list_head, member, back) \ + _MT_LIST_FOR_EACH_ENTRY_UNLOCKED_INNER(item, list_head, member, back) + + +/* The macros below directly map to their function equivalent. They are + * provided for ease of use. Please refer to the equivalent functions + * for their decription. + */ +#define MT_LIST_INIT(e) (mt_list_init(e)) +#define MT_LIST_ISEMPTY(e) (mt_list_isempty(e)) +#define MT_LIST_INLIST(e) (mt_list_inlist(e)) +#define MT_LIST_TRY_INSERT(l, e) (mt_list_try_insert(l, e)) +#define MT_LIST_TRY_APPEND(l, e) (mt_list_try_append(l, e)) +#define MT_LIST_BEHEAD(l) (mt_list_behead(l)) +#define MT_LIST_INSERT(l, e) (mt_list_insert(l, e)) +#define MT_LIST_APPEND(l, e) (mt_list_append(l, e)) +#define MT_LIST_DELETE(e) (mt_list_delete(e)) +#define MT_LIST_LOCK_NEXT(el) (mt_list_lock_next(el)) +#define MT_LIST_LOCK_PREV(el) (mt_list_lock_prev(el)) +#define MT_LIST_LOCK_FULL(el) (mt_list_lock_full(el)) +#define MT_LIST_UNLOCK_LINK(ends) (mt_list_unlock_link(ends)) +#define MT_LIST_UNLOCK_FULL(el, ends) (mt_list_unlock_full(el, ends)) + + +/* This is a Xorshift-based thread-local PRNG aimed at reducing the risk of + * resonance between competing threads during exponential back-off. Threads + * quickly become out of sync and use completely different values. + */ +static __thread unsigned int _prng_state = 0xEDCBA987; +static inline unsigned int mt_list_prng() +{ + unsigned int x = _prng_state; + + x ^= x << 13; + x ^= x >> 17; + x ^= x << 5; + return _prng_state = x; +} + +static inline unsigned int mt_list_wait(unsigned factor) +{ + //return ((uint64_t)factor * mt_list_prng() + factor) >> 32; + return mt_list_prng() & factor; +} + +/* This function relaxes the CPU during contention. It is meant to be + * architecture-specific and may even be OS-specific, and always exists in a + * generic version. It should return a non-null integer value that can be used + * as a boolean in while() loops. The argument indicates the maximum number of + * loops to be performed before returning. + */ +static inline __attribute__((always_inline)) unsigned long mt_list_cpu_relax(unsigned long loop) +{ + /* limit maximum wait time for unlucky threads */ + loop = mt_list_wait(loop); + + for (loop &= 0x7fffff; loop >= 32; loop--) { +#if defined(__x86_64__) + /* This is a PAUSE instruction on x86_64 */ + asm volatile("rep;nop\n"); +#elif defined(__aarch64__) + /* This was shown to improve fairness on modern ARMv8 + * such as Cortex A72 or Neoverse N1. + */ + asm volatile("isb"); +#else + /* Generic implementation */ + asm volatile(""); +#endif + } + /* faster ending */ + while (loop--) + asm volatile(""); + return 1; +} + + +/* Initialize list element . It will point to itself, matching a list head + * or a detached list element. The list element is returned. + */ +static inline struct mt_list *mt_list_init(struct mt_list *el) +{ + el->next = el->prev = el; + return el; +} + + +/* Returns true if the list element corresponds to an empty list head or a + * detached element, false otherwise. Only the member is checked. + */ +static inline long mt_list_isempty(const struct mt_list *el) +{ + return el->next == el; +} + + +/* Returns true if the list element corresponds to a non-empty list head or + * to an element that is part of a list, false otherwise. Only the member + * is checked. + */ +static inline long mt_list_inlist(const struct mt_list *el) +{ + return el->next != el; +} + + +/* Adds element at the beginning of list , which means that element + * is added immediately after element (nothing strictly requires that + * is effectively the list's head, any valid element will work). Returns + * non-zero if the element was added, otherwise zero (because the element was + * already part of a list). + */ +static MT_INLINE long mt_list_try_insert(struct mt_list *lh, struct mt_list *el) +{ + struct mt_list *n, *n2; + struct mt_list *p, *p2; + unsigned long loops = 0; + long ret = 0; + + /* Note that the first element checked is the most likely to face + * contention, particularly on the list's head/tail. That's why we + * perform a prior load there: if the element is being modified by + * another thread, requesting a read-only access only leaves the + * other thread's cache line in shared mode, which will impact it + * less than if we attempted a change that would invalidate it. + */ + for (;; mt_list_cpu_relax(loops = loops * 8 + 7)) { + n = __atomic_exchange_n(&lh->next, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (n == MT_LIST_BUSY) + continue; + + p = __atomic_exchange_n(&n->prev, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (p == MT_LIST_BUSY) { + lh->next = n; + __atomic_thread_fence(__ATOMIC_RELEASE); + continue; + } + + n2 = __atomic_exchange_n(&el->next, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (n2 != el) { + /* This element was already attached elsewhere */ + if (n2 != MT_LIST_BUSY) + el->next = n2; + n->prev = p; + __atomic_thread_fence(__ATOMIC_RELEASE); + + lh->next = n; + __atomic_thread_fence(__ATOMIC_RELEASE); + + if (n2 == MT_LIST_BUSY) + continue; + break; + } + + p2 = __atomic_exchange_n(&el->prev, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (p2 != el) { + /* This element was already attached elsewhere */ + if (p2 != MT_LIST_BUSY) + el->prev = p2; + n->prev = p; + el->next = el; + __atomic_thread_fence(__ATOMIC_RELEASE); + + lh->next = n; + __atomic_thread_fence(__ATOMIC_RELEASE); + + if (p2 == MT_LIST_BUSY) + continue; + break; + } + + el->next = n; + el->prev = p; + __atomic_thread_fence(__ATOMIC_RELEASE); + + n->prev = el; + __atomic_thread_fence(__ATOMIC_RELEASE); + + p->next = el; + __atomic_thread_fence(__ATOMIC_RELEASE); + + ret = 1; + break; + } + return ret; +} + + +/* Adds element at the end of list , which means that element is + * added immediately before element (nothing strictly requires that + * is effectively the list's head, any valid element will work). Returns non- + * zero if the element was added, otherwise zero (because the element was + * already part of a list). + */ +static MT_INLINE long mt_list_try_append(struct mt_list *lh, struct mt_list *el) +{ + struct mt_list *n, *n2; + struct mt_list *p, *p2; + unsigned long loops = 0; + long ret = 0; + + /* Note that the first element checked is the most likely to face + * contention, particularly on the list's head/tail. That's why we + * perform a prior load there: if the element is being modified by + * another thread, requesting a read-only access only leaves the + * other thread's cache line in shared mode, which will impact it + * less than if we attempted a change that would invalidate it. + */ + for (;; mt_list_cpu_relax(loops = loops * 8 + 7)) { + p = __atomic_exchange_n(&lh->prev, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (p == MT_LIST_BUSY) + continue; + + n = __atomic_exchange_n(&p->next, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (n == MT_LIST_BUSY) { + lh->prev = p; + __atomic_thread_fence(__ATOMIC_RELEASE); + continue; + } + + p2 = __atomic_exchange_n(&el->prev, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (p2 != el) { + /* This element was already attached elsewhere */ + if (p2 != MT_LIST_BUSY) + el->prev = p2; + p->next = n; + __atomic_thread_fence(__ATOMIC_RELEASE); + + lh->prev = p; + __atomic_thread_fence(__ATOMIC_RELEASE); + + if (p2 == MT_LIST_BUSY) + continue; + break; + } + + n2 = __atomic_exchange_n(&el->next, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (n2 != el) { + /* This element was already attached elsewhere */ + if (n2 != MT_LIST_BUSY) + el->next = n2; + p->next = n; + el->prev = el; + __atomic_thread_fence(__ATOMIC_RELEASE); + + lh->prev = p; + __atomic_thread_fence(__ATOMIC_RELEASE); + + if (n2 == MT_LIST_BUSY) + continue; + break; + } + + el->next = n; + el->prev = p; + __atomic_thread_fence(__ATOMIC_RELEASE); + + p->next = el; + __atomic_thread_fence(__ATOMIC_RELEASE); + + n->prev = el; + __atomic_thread_fence(__ATOMIC_RELEASE); + + ret = 1; + break; + } + return ret; +} + + +/* Detaches a list from its head. A pointer to the first element is returned + * and the list is closed. If the list was empty, NULL is returned. This may + * exclusively be used with lists manipulated using mt_list_try_insert() and + * mt_list_try_append(). This is incompatible with mt_list_delete() run + * concurrently. If there's at least one element, the next of the last element + * will always be NULL. + */ +static MT_INLINE struct mt_list *mt_list_behead(struct mt_list *lh) +{ + struct mt_list *n; + struct mt_list *p; + unsigned long loops = 0; + + for (;; mt_list_cpu_relax(loops = loops * 8 + 7)) { + p = __atomic_exchange_n(&lh->prev, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (p == MT_LIST_BUSY) + continue; + if (p == lh) { + lh->prev = p; + __atomic_thread_fence(__ATOMIC_RELEASE); + n = NULL; + break; + } + + n = __atomic_exchange_n(&lh->next, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (n == MT_LIST_BUSY) { + lh->prev = p; + __atomic_thread_fence(__ATOMIC_RELEASE); + continue; + } + if (n == lh) { + lh->next = n; + lh->prev = p; + __atomic_thread_fence(__ATOMIC_RELEASE); + n = NULL; + break; + } + + lh->next = lh->prev = lh; + __atomic_thread_fence(__ATOMIC_RELEASE); + + n->prev = p; + __atomic_thread_fence(__ATOMIC_RELEASE); + + p->next = NULL; + __atomic_thread_fence(__ATOMIC_RELEASE); + break; + } + return n; +} + + +/* Adds element at the beginning of list , which means that element + * is added immediately after element (nothing strictly requires that + * is effectively the list's head, any valid element will work). It is + * assumed that the element cannot already be part of a list so it isn't + * checked for this. + */ +static MT_INLINE void mt_list_insert(struct mt_list *lh, struct mt_list *el) +{ + struct mt_list *n; + struct mt_list *p; + unsigned long loops = 0; + + for (;; mt_list_cpu_relax(loops = loops * 8 + 7)) { + n = __atomic_exchange_n(&lh->next, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (n == MT_LIST_BUSY) + continue; + + p = __atomic_exchange_n(&n->prev, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (p == MT_LIST_BUSY) { + lh->next = n; + __atomic_thread_fence(__ATOMIC_RELEASE); + continue; + } + + el->next = n; + el->prev = p; + __atomic_thread_fence(__ATOMIC_RELEASE); + + n->prev = el; + __atomic_thread_fence(__ATOMIC_RELEASE); + + p->next = el; + __atomic_thread_fence(__ATOMIC_RELEASE); + break; + } +} + + +/* Adds element at the end of list , which means that element is + * added immediately after element (nothing strictly requires that is + * effectively the list's head, any valid element will work). It is assumed + * that the element cannot already be part of a list so it isn't checked for + * this. + */ +static MT_INLINE void mt_list_append(struct mt_list *lh, struct mt_list *el) +{ + struct mt_list *n; + struct mt_list *p; + unsigned long loops = 0; + + for (;; mt_list_cpu_relax(loops = loops * 8 + 7)) { + p = __atomic_exchange_n(&lh->prev, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (p == MT_LIST_BUSY) + continue; + + n = __atomic_exchange_n(&p->next, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (n == MT_LIST_BUSY) { + lh->prev = p; + __atomic_thread_fence(__ATOMIC_RELEASE); + continue; + } + + el->next = n; + el->prev = p; + __atomic_thread_fence(__ATOMIC_RELEASE); + + p->next = el; + __atomic_thread_fence(__ATOMIC_RELEASE); + + n->prev = el; + __atomic_thread_fence(__ATOMIC_RELEASE); + break; + } +} + + +/* Removes element from the list it belongs to. The function returns + * non-zero if the element could be removed, otherwise zero if the element + * could not be removed, because it was already not in a list anymore. This is + * functionally equivalent to the following except that it also returns a + * success status: + * link = mt_list_lock_full(el); + * mt_list_unlock_link(link); + * mt_list_unlock_self(link); + */ +static MT_INLINE long mt_list_delete(struct mt_list *el) +{ + struct mt_list *n, *n2; + struct mt_list *p, *p2; + unsigned long loops = 0; + long ret = 0; + + for (;; mt_list_cpu_relax(loops = loops * 8 + 7)) { + p2 = NULL; + n = __atomic_exchange_n(&el->next, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (n == MT_LIST_BUSY) + continue; + + p = __atomic_exchange_n(&el->prev, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (p == MT_LIST_BUSY) { + el->next = n; + __atomic_thread_fence(__ATOMIC_RELEASE); + continue; + } + + if (p != el) { + p2 = __atomic_exchange_n(&p->next, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (p2 == MT_LIST_BUSY) { + el->prev = p; + el->next = n; + __atomic_thread_fence(__ATOMIC_RELEASE); + continue; + } + } + + if (n != el) { + n2 = __atomic_exchange_n(&n->prev, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (n2 == MT_LIST_BUSY) { + if (p2 != NULL) + p->next = p2; + el->prev = p; + el->next = n; + __atomic_thread_fence(__ATOMIC_RELEASE); + continue; + } + } + + n->prev = p; + p->next = n; + __atomic_thread_fence(__ATOMIC_RELEASE); + + el->prev = el->next = el; + __atomic_thread_fence(__ATOMIC_RELEASE); + + if (p != el && n != el) + ret = 1; + break; + } + return ret; +} + + +/* Removes the first element from the list , and returns it in detached + * form. If the list is already empty, NULL is returned instead. + */ +static MT_INLINE struct mt_list *mt_list_pop(struct mt_list *lh) +{ + struct mt_list *n, *n2; + struct mt_list *p, *p2; + unsigned long loops = 0; + + for (;; mt_list_cpu_relax(loops = loops * 8 + 7)) { + n = __atomic_exchange_n(&lh->next, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (n == MT_LIST_BUSY) + continue; + + if (n == lh) { + /* list is empty */ + lh->next = lh; + __atomic_thread_fence(__ATOMIC_RELEASE); + n = NULL; + break; + } + + p = __atomic_exchange_n(&n->prev, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (p == MT_LIST_BUSY) { + lh->next = n; + __atomic_thread_fence(__ATOMIC_RELEASE); + continue; + } + + n2 = __atomic_exchange_n(&n->next, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (n2 == MT_LIST_BUSY) { + n->prev = p; + __atomic_thread_fence(__ATOMIC_RELEASE); + + lh->next = n; + __atomic_thread_fence(__ATOMIC_RELEASE); + continue; + } + + p2 = __atomic_exchange_n(&n2->prev, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (p2 == MT_LIST_BUSY) { + n->next = n2; + n->prev = p; + __atomic_thread_fence(__ATOMIC_RELEASE); + + lh->next = n; + __atomic_thread_fence(__ATOMIC_RELEASE); + continue; + } + + lh->next = n2; + n2->prev = lh; + __atomic_thread_fence(__ATOMIC_RELEASE); + + n->prev = n->next = n; + __atomic_thread_fence(__ATOMIC_RELEASE); + + /* return n */ + break; + } + return n; +} + + +/* Opens the list just after which usually is the list's head, but not + * necessarily. The link between and its next element is cut and replaced + * with an MT_LIST_BUSY lock. The ends of the removed link are returned as an + * mt_list entry. The operation can be cancelled using mt_list_unlock_link() + * on the returned value, which will restore the link and unlock the list, or + * using mt_list_unlock_full() which will replace the link with another + * element and also unlock the list, effectively resulting in inserting that + * element after . Example: + * + * struct mt_list *list_insert(struct mt_list *list) + * { + * struct mt_list tmp = mt_list_lock_next(list); + * struct mt_list *el = alloc_element_to_insert(); + * if (el) + * mt_list_unlock_full(el, tmp); + * else + * mt_list_unlock_link(tmp); + * return el; + * } + */ +static MT_INLINE struct mt_list mt_list_lock_next(struct mt_list *lh) +{ + struct mt_list el; + unsigned long loops = 0; + + for (;; mt_list_cpu_relax(loops = loops * 8 + 7)) { + el.next = __atomic_exchange_n(&lh->next, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (el.next == MT_LIST_BUSY) + continue; + + el.prev = __atomic_exchange_n(&el.next->prev, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (el.prev == MT_LIST_BUSY) { + lh->next = el.next; + __atomic_thread_fence(__ATOMIC_RELEASE); + continue; + } + break; + } + return el; +} + + +/* Opens the list just before which usually is the list's head, but not + * necessarily. The link between and its prev element is cut and replaced + * with an MT_LIST_BUSY lock. The ends of the removed link are returned as an + * mt_list entry. The operation can be cancelled using mt_list_unlock_link() + * on the returned value, which will restore the link and unlock the list, or + * using mt_list_unlock_full() which will replace the link with another + * element and also unlock the list, effectively resulting in inserting that + * element before . Example: + * + * struct mt_list *list_append(struct mt_list *list) + * { + * struct mt_list tmp = mt_list_lock_prev(list); + * struct mt_list *el = alloc_element_to_insert(); + * if (el) + * mt_list_unlock_full(el, tmp); + * else + * mt_list_unlock_link(tmp); + * return el; + * } + */ +static MT_INLINE struct mt_list mt_list_lock_prev(struct mt_list *lh) +{ + struct mt_list el; + unsigned long loops = 0; + + for (;; mt_list_cpu_relax(loops = loops * 8 + 7)) { + el.prev = __atomic_exchange_n(&lh->prev, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (el.prev == MT_LIST_BUSY) + continue; + + el.next = __atomic_exchange_n(&el.prev->next, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (el.next == MT_LIST_BUSY) { + lh->prev = el.prev; + __atomic_thread_fence(__ATOMIC_RELEASE); + continue; + } + break; + } + return el; +} + + +/* Element is locked on both sides, but the list around it isn't touched. + * A copy of the previous element is returned, and may be used to pass to + * mt_list_unlock_elem() to unlock and reconnect the element. + */ +static MT_INLINE struct mt_list mt_list_lock_elem(struct mt_list *el) +{ + unsigned long loops = 0; + struct mt_list ret; + + for (;; mt_list_cpu_relax(loops = loops * 8 + 7)) { + ret.next = __atomic_exchange_n(&el->next, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (ret.next == MT_LIST_BUSY) + continue; + + ret.prev = __atomic_exchange_n(&el->prev, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (ret.prev == MT_LIST_BUSY) { + el->next = ret.next; + __atomic_thread_fence(__ATOMIC_RELEASE); + continue; + } + break; + } + return ret; +} + + +/* Restores element to its previous copy , effectively unlocking it. + * This is to be used with the returned element from mt_list_lock_elem(). + */ +static inline void mt_list_unlock_elem(struct mt_list *el, struct mt_list back) +{ + *el = back; + __atomic_thread_fence(__ATOMIC_RELEASE); +} + + +/* Atomically resets element by connecting it onto itself ignoring + * previous contents. This is used to unlock a locked element inside iterators + * so that the inner block sees an unlocked iterator. + */ +static inline void mt_list_unlock_self(struct mt_list *el) +{ + el->next = el; + el->prev = el; + __atomic_thread_fence(__ATOMIC_RELEASE); +} + + +/* Opens the list around element . Both the links between and its prev + * element and between and its next element are cut and replaced with an + * MT_LIST_BUSY lock. The element itself also has its ends replaced with a + * lock, and the ends of the element are returned as an mt_list entry. This + * results in the element being detached from the list and both the element and + * the list being locked. The operation can be terminated by calling + * mt_list_unlock_link() on the returned value, which will unlock the list and + * effectively result in the removal of the element from the list, or by + * calling mt_list_unlock_full() to reinstall the element at its place in the + * list, effectively consisting in a temporary lock of this element. Example: + * + * struct mt_list *grow_shrink_remove(struct mt_list *el, size_t new_size) + * { + * struct mt_list tmp = mt_list_lock_full(&node->list); + * struct mt_list *new = new_size ? realloc(el, new_size) : NULL; + * if (new_size) { + * mt_list_unlock_full(new ? new : el, tmp); + * } else { + * free(el); + * mt_list_unlock_link(tmp); + * } + * return new; + * } + */ +static MT_INLINE struct mt_list mt_list_lock_full(struct mt_list *el) +{ + struct mt_list *n2; + struct mt_list *p2; + struct mt_list ret; + unsigned long loops = 0; + + for (;; mt_list_cpu_relax(loops = loops * 8 + 7)) { + p2 = NULL; + ret.next = __atomic_exchange_n(&el->next, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (ret.next == MT_LIST_BUSY) + continue; + + ret.prev = __atomic_exchange_n(&el->prev, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (ret.prev == MT_LIST_BUSY) { + el->next = ret.next; + __atomic_thread_fence(__ATOMIC_RELEASE); + continue; + } + + if (ret.prev != el) { + p2 = __atomic_exchange_n(&ret.prev->next, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (p2 == MT_LIST_BUSY) { + *el = ret; + __atomic_thread_fence(__ATOMIC_RELEASE); + continue; + } + } + + if (ret.next != el) { + n2 = __atomic_exchange_n(&ret.next->prev, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (n2 == MT_LIST_BUSY) { + if (p2 != NULL) + ret.prev->next = p2; + *el = ret; + __atomic_thread_fence(__ATOMIC_RELEASE); + continue; + } + } + break; + } + return ret; +} + +/* Connects two ends in a list together, effectively unlocking the list if it + * was locked. It takes a list head which contains a pointer to the prev and + * next elements to connect together. It normally is a copy of a previous link + * returned by functions such as mt_list_lock_next(), mt_list_lock_prev(), or + * mt_list_lock_full(). If applied after mt_list_lock_full(), it will result + * in the list being reconnected without the element, which remains locked, + * effectively deleting it. Note that this is not meant to be used from within + * iterators, as the iterator will automatically and safely reconnect ends + * after each iteration. See examples above. + */ +static inline void mt_list_unlock_link(struct mt_list ends) +{ + /* make sure any previous writes to are seen */ + __atomic_thread_fence(__ATOMIC_RELEASE); + ends.next->prev = ends.prev; + ends.prev->next = ends.next; +} + + +/* Connects element at both ends of a list which is still locked + * hence has the link between these endpoints cut. This automatically unlocks + * both the element and the list, and effectively results in inserting or + * appending the element to that list if the ends were just after or just + * before the list's head. It is mainly used to unlock an element previously + * locked with mt_list_lock_full() by passing this function's return value as + * . After the operation, no locked pointer remains. This must not be + * used inside iterators as it would result in also unlocking the list itself. + * The element doesn't need to be previously initialized as it gets blindly + * overwritten with . See examples above. + */ +static inline void mt_list_unlock_full(struct mt_list *el, struct mt_list ends) +{ + *el = ends; + __atomic_thread_fence(__ATOMIC_RELEASE); + + if (__builtin_expect(ends.next != el, 1)) + ends.next->prev = el; + if (__builtin_expect(ends.prev != el, 1)) + ends.prev->next = el; +} + + +/***************************************************************************** + * The macros and functions below are only used by the iterators. These must * + * not be used for other purposes unless the caller 100% complies with their * + * specific validity domain! * + *****************************************************************************/ + + +/* Unlocks element from the backup copy of previous next pointer . + * It supports the special case where the list was empty and the element locked + * while looping over itself (we don't need/want to overwrite ->prev in this + * case). + */ +static inline void _mt_list_unlock_next(struct mt_list *el, struct mt_list *back) +{ + el->next = back; + __atomic_thread_fence(__ATOMIC_RELEASE); + + if (back != el) + back->prev = el; +} + + +/* Unlocks element from the backup copy of previous prev pointer . + * It's the caller's responsibility to make sure that is not equal to + * here (this is OK in iterators because if the list is empty, the list's + * head is not locked for prev and the caller has NULL in back.prev, thus does + * not call this function). + */ +static inline void _mt_list_unlock_prev(struct mt_list *el, struct mt_list *back) +{ + el->prev = back; + __atomic_thread_fence(__ATOMIC_RELEASE); + + back->next = el; +} + + +/* Locks the link designated by element 's next pointer and returns its + * previous value. If the element does not loop over itself (empty list head), + * its reciprocal prev pointer is locked as well. This check is necessary + * because we don't want to lock the head twice. + */ +static MT_INLINE struct mt_list *_mt_list_lock_next(struct mt_list *el) +{ + struct mt_list *n, *n2; + unsigned long loops = 0; + + for (;; mt_list_cpu_relax(loops = loops * 8 + 7)) { + n = __atomic_exchange_n(&el->next, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (n == MT_LIST_BUSY) + continue; + + if (n != el) { + n2 = __atomic_exchange_n(&n->prev, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (n2 == MT_LIST_BUSY) { + el->next = n; + __atomic_thread_fence(__ATOMIC_RELEASE); + continue; + } + } + break; + } + return n; +} + + +/* Locks the link designated by element 's prev pointer and returns its + * previous value. The caller must ensure that the element does not loop over + * itself (which is OK in iterators because the caller will only lock the prev + * pointer on an non-empty list). + */ +static MT_INLINE struct mt_list *_mt_list_lock_prev(struct mt_list *el) +{ + struct mt_list *p, *p2; + unsigned long loops = 0; + + for (;; mt_list_cpu_relax(loops = loops * 8 + 7)) { + p = __atomic_exchange_n(&el->prev, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (p == MT_LIST_BUSY) + continue; + + p2 = __atomic_exchange_n(&p->next, MT_LIST_BUSY, __ATOMIC_RELAXED); + if (p2 == MT_LIST_BUSY) { + el->prev = p; + __atomic_thread_fence(__ATOMIC_RELEASE); + continue; + } + break; + } + return p; +} + + +/* Outer loop of MT_LIST_FOR_EACH_ENTRY_LOCKED(). Do not use directly! + * This loop is only used to unlock the last item after the end of the inner + * loop is reached or if we break out of it. + * + * Trick: item starts with the impossible and unused value MT_LIST_BUSY that is + * detected as the looping condition to force to enter the loop. The inner loop + * will first replace it, making the compiler notice that this condition cannot + * happen after the first iteration, and making it implement exactly one round + * and no more. + */ +#define _MT_LIST_FOR_EACH_ENTRY_LOCKED_OUTER(item, lh, lm, back) \ + for (/* init-expr: preset for one iteration */ \ + (back).prev = NULL, \ + (back).next = _mt_list_lock_next(lh), \ + (item) = (void*)MT_LIST_BUSY; \ + /* condition-expr: only one iteration */ \ + (void*)(item) == (void*)MT_LIST_BUSY; \ + /* loop-expr */ \ + ({ \ + /* post loop cleanup: \ + * gets executed only once to perform cleanup \ + * after child loop has finished, or a break happened \ + */ \ + if (item != NULL) { \ + /* last visited item still exists or is the list's head \ + * so we have to unlock it. back.prev may be null if \ + * the list is empty and the inner loop did not run. \ + */ \ + if (back.prev) \ + _mt_list_unlock_prev(&item->lm, back.prev); \ + _mt_list_unlock_next(&item->lm, back.next); \ + } else { \ + /* last item was deleted by user, relink is required: \ + * prev->next = next \ + * next->prev = prev \ + * Note that gcc may believe that back.prev may be null \ + * which is not possible by construction. \ + */ \ + MT_ALREADY_CHECKED(back.prev); \ + mt_list_unlock_link(back); \ + } \ + }) \ + ) + + +/* Inner loop of MT_LIST_FOR_EACH_ENTRY_LOCKED(). Do not use directly! + * This loop iterates over all list elements and unlocks the previously visited + * element. It stops when reaching the list's head, without unlocking the last + * element, which is left to the outer loop to deal with, just like when hitting + * a break. In order to preserve the locking, the loop takes care of always + * locking the next element before unlocking the previous one. During the first + * iteration, the prev element might be NULL since the head is singly-locked. + * Inside the execution block, the element is fully locked. The caller does not + * need to unlock it, unless other parts of the code expect it to be unlocked + * (concurrent watcher or element placed back into a pool for example). + */ +#define _MT_LIST_FOR_EACH_ENTRY_LOCKED_INNER(item, lh, lm, back) \ + for (/* init-expr */ \ + item = MT_LIST_ELEM(lh, typeof(item), lm); \ + /* cond-expr (thus executed before the body of the loop) */ \ + (back.next != lh) && ({ \ + struct mt_list *__tmp_next = back.next; \ + /* did not reach end of list yet */ \ + back.next = _mt_list_lock_next(back.next); \ + if (item != NULL) { \ + /* previous item was not deleted, we must unlock it */ \ + if (back.prev) { \ + /* not executed on first run \ + * (back.prev == NULL on first run) \ + */ \ + _mt_list_unlock_prev(&item->lm, back.prev); \ + /* unlock_prev will implicitly relink: \ + * item->lm.prev = prev \ + * prev->next = &item->lm \ + */ \ + } \ + back.prev = &item->lm; \ + } \ + (item) = MT_LIST_ELEM(__tmp_next, typeof(item), lm); \ + 1; /* end of list not reached, we must execute */ \ + }); \ + /* empty loop-expr */ \ + ) + +/* Outer loop of MT_LIST_FOR_EACH_ENTRY_UNLOCKED(). Do not use directly! + * This loop is only used to unlock the last item after the end of the inner + * loop is reached or if we break out of it. + * + * Trick: item starts with the impossible and unused value MT_LIST_BUSY that is + * detected as the looping condition to force to enter the loop. The inner loop + * will first replace it, making the compiler notice that this condition cannot + * happen after the first iteration, and making it implement exactly one round + * and no more. + */ +#define _MT_LIST_FOR_EACH_ENTRY_UNLOCKED_OUTER(item, lh, lm, back) \ + for (/* init-expr: preset for one iteration */ \ + (back).prev = NULL, \ + (back).next = _mt_list_lock_next(lh), \ + (item) = (void*)MT_LIST_BUSY; \ + /* condition-expr: only one iteration */ \ + (void*)(item) == (void*)MT_LIST_BUSY; \ + /* loop-expr */ \ + ({ \ + /* post loop cleanup: \ + * gets executed only once to perform cleanup \ + * after child loop has finished, or a break happened \ + */ \ + if (item != NULL) { \ + /* last visited item still exists or is the list's head \ + * so we have to unlock it. back.prev may be null if \ + * the list is empty and the inner loop did not run. \ + */ \ + if (back.prev) { \ + item->lm.next = (void*)MT_LIST_BUSY; \ + __atomic_thread_fence(__ATOMIC_RELEASE); \ + _mt_list_unlock_prev(&item->lm, back.prev); \ + } \ + _mt_list_unlock_next(&item->lm, back.next); \ + } else { \ + /* last item was deleted by user, relink is required: \ + * prev->next = next \ + * next->prev = prev \ + * Note that gcc may believe that back.prev may be null \ + * which is not possible by construction. \ + */ \ + MT_ALREADY_CHECKED(back.prev); \ + mt_list_unlock_link(back); \ + } \ + }) \ + ) + + +/* Inner loop of MT_LIST_FOR_EACH_ENTRY_UNLOCKED(). Do not use directly! + * This loop iterates over all list elements and unlocks the previously visited + * element. It stops when reaching the list's head, without unlocking the last + * element, which is left to the outer loop to deal with, just like when hitting + * a break. In order to preserve the locking, the loop takes care of always + * locking the next element before unlocking the previous one. During the first + * iteration, the prev element might be NULL since the head is singly-locked. + * Inside the execution block, the element is unlocked (but its neighbors are + * still locked). The caller never needs to unlock it. However this must not be + * used in situations where direct access to the element is possible (without + * passing via the iterator). + */ +#define _MT_LIST_FOR_EACH_ENTRY_UNLOCKED_INNER(item, lh, lm, back) \ + for (/* init-expr */ \ + item = MT_LIST_ELEM(lh, typeof(item), lm); \ + /* cond-expr (thus executed before the body of the loop) */ \ + (back.next != lh) && ({ \ + struct mt_list *__tmp_next = back.next; \ + /* did not reach end of list yet */ \ + back.next = _mt_list_lock_next(back.next); \ + if (item != NULL) { \ + /* previous item was not deleted, we must unlock it */ \ + if (back.prev) { \ + /* not executed on first run \ + * (back.prev == NULL on first run) \ + */ \ + item->lm.next = (void*)MT_LIST_BUSY; \ + __atomic_thread_fence(__ATOMIC_RELEASE); \ + _mt_list_unlock_prev(&item->lm, back.prev); \ + /* unlock_prev will implicitly relink: \ + * item->lm.prev = prev \ + * prev->next = &item->lm \ + */ \ + } \ + back.prev = &item->lm; \ + } \ + mt_list_unlock_self(__tmp_next); \ + (item) = MT_LIST_ELEM(__tmp_next, typeof(item), lm); \ + 1; /* end of list not reached, we must execute */ \ + }); \ + /* empty loop-expr */ \ + ) + +#endif /* _MT_LIST_H */ diff --git a/src/dns_ring.c b/src/dns_ring.c index 01ce59331c..c6d7203a16 100644 --- a/src/dns_ring.c +++ b/src/dns_ring.c @@ -94,7 +94,7 @@ ssize_t dns_ring_write(struct dns_ring *ring, size_t maxlen, const struct ist pf size_t lenlen; uint64_t dellen; int dellenlen; - struct mt_list *elt1, elt2; + struct mt_list back; ssize_t sent = 0; int i; @@ -166,7 +166,7 @@ ssize_t dns_ring_write(struct dns_ring *ring, size_t maxlen, const struct ist pf sent = lenlen + totlen + 1; /* notify potential readers */ - mt_list_for_each_entry_safe(appctx, &ring->waiters, wait_entry, elt1, elt2) + MT_LIST_FOR_EACH_ENTRY_LOCKED(appctx, &ring->waiters, wait_entry, back) appctx_wakeup(appctx); done_buf: diff --git a/src/event_hdl.c b/src/event_hdl.c index f4f7b19e4d..caef654840 100644 --- a/src/event_hdl.c +++ b/src/event_hdl.c @@ -71,14 +71,13 @@ static void _event_hdl_sub_list_destroy(event_hdl_sub_list *sub_list); static void event_hdl_deinit(struct sig_handler *sh) { event_hdl_sub_list *cur_list; - struct mt_list *elt1, elt2; + struct mt_list back; /* destroy all known subscription lists */ - mt_list_for_each_entry_safe(cur_list, &known_event_hdl_sub_list, known, elt1, elt2) { - /* remove cur elem from list */ - MT_LIST_DELETE_SAFE(elt1); - /* then destroy it */ + MT_LIST_FOR_EACH_ENTRY_UNLOCKED(cur_list, &known_event_hdl_sub_list, known, back) { + /* remove cur elem from list and free it */ _event_hdl_sub_list_destroy(cur_list); + cur_list = NULL; } } @@ -299,11 +298,11 @@ static inline struct event_hdl_sub_type _event_hdl_getsub_async(struct event_hdl struct mt_list lock; struct event_hdl_sub_type type = EVENT_HDL_SUB_NONE; - lock = MT_LIST_LOCK_ELT(&cur_sub->mt_list); + lock = mt_list_lock_full(&cur_sub->mt_list); if (lock.next != &cur_sub->mt_list) type = _event_hdl_getsub(cur_sub); // else already removed - MT_LIST_UNLOCK_ELT(&cur_sub->mt_list, lock); + mt_list_unlock_full(&cur_sub->mt_list, lock); return type; } @@ -320,11 +319,11 @@ static inline int _event_hdl_resub_async(struct event_hdl_sub *cur_sub, struct e int status = 0; struct mt_list lock; - lock = MT_LIST_LOCK_ELT(&cur_sub->mt_list); + lock = mt_list_lock_full(&cur_sub->mt_list); if (lock.next != &cur_sub->mt_list) status = _event_hdl_resub(cur_sub, type); // else already removed - MT_LIST_UNLOCK_ELT(&cur_sub->mt_list, lock); + mt_list_unlock_full(&cur_sub->mt_list, lock); return status; } @@ -343,13 +342,14 @@ static inline void _event_hdl_unsubscribe(struct event_hdl_sub *del_sub) * kill themselves (ie: normal async mode) when they receive such event */ HA_ATOMIC_INC(&del_sub->hdl.async_equeue->size); - lock = MT_LIST_APPEND_LOCKED(&del_sub->hdl.async_equeue->head, &del_sub->async_end->mt_list); + mt_list_lock_elem(&del_sub->async_end->mt_list); + lock = mt_list_lock_prev(&del_sub->hdl.async_equeue->head); /* wake up the task */ event_hdl_task_wakeup(del_sub->hdl.async_task); /* unlock END EVENT (we're done, the task is now free to consume it) */ - MT_LIST_UNLOCK_ELT(&del_sub->async_end->mt_list, lock); + mt_list_unlock_full(&del_sub->async_end->mt_list, lock); /* we don't free sub here * freeing will be performed by async task so it can safely rely @@ -419,8 +419,7 @@ static void event_hdl_unsubscribe_sync(const struct event_hdl_sub_mgmt *mgmt) return; /* already removed from sync ctx */ /* assuming that publish sync code will notice that mgmt->this is NULL - * and will perform the list removal using MT_LIST_DELETE_SAFE and - * _event_hdl_unsubscribe() + * and will perform the list removal and _event_hdl_unsubscribe() * while still owning the lock */ ((struct event_hdl_sub_mgmt *)mgmt)->this = NULL; @@ -447,7 +446,7 @@ struct event_hdl_sub *event_hdl_subscribe_ptr(event_hdl_sub_list *sub_list, struct event_hdl_sub_type e_type, struct event_hdl hdl) { struct event_hdl_sub *new_sub = NULL; - struct mt_list *elt1, elt2; + struct mt_list back; struct event_hdl_async_task_default_ctx *task_ctx = NULL; struct mt_list lock; @@ -523,14 +522,14 @@ struct event_hdl_sub *event_hdl_subscribe_ptr(event_hdl_sub_list *sub_list, /* ready for registration */ MT_LIST_INIT(&new_sub->mt_list); - lock = MT_LIST_LOCK_ELT(&sub_list->known); + lock = mt_list_lock_full(&sub_list->known); /* check if such identified hdl is not already registered */ if (hdl.id) { struct event_hdl_sub *cur_sub; uint8_t found = 0; - mt_list_for_each_entry_safe(cur_sub, &sub_list->head, mt_list, elt1, elt2) { + MT_LIST_FOR_EACH_ENTRY_LOCKED(cur_sub, &sub_list->head, mt_list, back) { if (hdl.id == cur_sub->hdl.id) { /* we found matching registered hdl */ found = 1; @@ -539,7 +538,7 @@ struct event_hdl_sub *event_hdl_subscribe_ptr(event_hdl_sub_list *sub_list, } if (found) { /* error already registered */ - MT_LIST_UNLOCK_ELT(&sub_list->known, lock); + mt_list_unlock_full(&sub_list->known, lock); event_hdl_report_hdl_state(ha_alert, &hdl, "SUB", "could not subscribe: subscription with this id already exists"); goto cleanup; } @@ -551,7 +550,7 @@ struct event_hdl_sub *event_hdl_subscribe_ptr(event_hdl_sub_list *sub_list, * it is a memory/IO error since it should not be long before haproxy * enters the deinit() function anyway */ - MT_LIST_UNLOCK_ELT(&sub_list->known, lock); + mt_list_unlock_full(&sub_list->known, lock); goto cleanup; } @@ -575,7 +574,7 @@ struct event_hdl_sub *event_hdl_subscribe_ptr(event_hdl_sub_list *sub_list, MT_LIST_APPEND(&sub_list->head, &new_sub->mt_list); } - MT_LIST_UNLOCK_ELT(&sub_list->known, lock); + mt_list_unlock_full(&sub_list->known, lock); return new_sub; @@ -629,11 +628,11 @@ void event_hdl_pause(struct event_hdl_sub *cur_sub) { struct mt_list lock; - lock = MT_LIST_LOCK_ELT(&cur_sub->mt_list); + lock = mt_list_lock_full(&cur_sub->mt_list); if (lock.next != &cur_sub->mt_list) _event_hdl_pause(cur_sub); // else already removed - MT_LIST_UNLOCK_ELT(&cur_sub->mt_list, lock); + mt_list_unlock_full(&cur_sub->mt_list, lock); } void _event_hdl_resume(struct event_hdl_sub *cur_sub) @@ -645,11 +644,11 @@ void event_hdl_resume(struct event_hdl_sub *cur_sub) { struct mt_list lock; - lock = MT_LIST_LOCK_ELT(&cur_sub->mt_list); + lock = mt_list_lock_full(&cur_sub->mt_list); if (lock.next != &cur_sub->mt_list) _event_hdl_resume(cur_sub); // else already removed - MT_LIST_UNLOCK_ELT(&cur_sub->mt_list, lock); + mt_list_unlock_full(&cur_sub->mt_list, lock); } void event_hdl_unsubscribe(struct event_hdl_sub *del_sub) @@ -678,17 +677,18 @@ int event_hdl_lookup_unsubscribe(event_hdl_sub_list *sub_list, uint64_t lookup_id) { struct event_hdl_sub *del_sub = NULL; - struct mt_list *elt1, elt2; + struct mt_list back; int found = 0; if (!sub_list) sub_list = &global_event_hdl_sub_list; /* fall back to global list */ - mt_list_for_each_entry_safe(del_sub, &sub_list->head, mt_list, elt1, elt2) { + MT_LIST_FOR_EACH_ENTRY_LOCKED(del_sub, &sub_list->head, mt_list, back) { if (lookup_id == del_sub->hdl.id) { /* we found matching registered hdl */ - MT_LIST_DELETE_SAFE(elt1); + mt_list_unlock_self(&del_sub->mt_list); _event_hdl_unsubscribe(del_sub); + del_sub = NULL; found = 1; break; /* id is unique, stop searching */ } @@ -700,13 +700,13 @@ int event_hdl_lookup_resubscribe(event_hdl_sub_list *sub_list, uint64_t lookup_id, struct event_hdl_sub_type type) { struct event_hdl_sub *cur_sub = NULL; - struct mt_list *elt1, elt2; + struct mt_list back; int status = 0; if (!sub_list) sub_list = &global_event_hdl_sub_list; /* fall back to global list */ - mt_list_for_each_entry_safe(cur_sub, &sub_list->head, mt_list, elt1, elt2) { + MT_LIST_FOR_EACH_ENTRY_LOCKED(cur_sub, &sub_list->head, mt_list, back) { if (lookup_id == cur_sub->hdl.id) { /* we found matching registered hdl */ status = _event_hdl_resub(cur_sub, type); @@ -720,13 +720,13 @@ int event_hdl_lookup_pause(event_hdl_sub_list *sub_list, uint64_t lookup_id) { struct event_hdl_sub *cur_sub = NULL; - struct mt_list *elt1, elt2; + struct mt_list back; int found = 0; if (!sub_list) sub_list = &global_event_hdl_sub_list; /* fall back to global list */ - mt_list_for_each_entry_safe(cur_sub, &sub_list->head, mt_list, elt1, elt2) { + MT_LIST_FOR_EACH_ENTRY_LOCKED(cur_sub, &sub_list->head, mt_list, back) { if (lookup_id == cur_sub->hdl.id) { /* we found matching registered hdl */ _event_hdl_pause(cur_sub); @@ -741,13 +741,13 @@ int event_hdl_lookup_resume(event_hdl_sub_list *sub_list, uint64_t lookup_id) { struct event_hdl_sub *cur_sub = NULL; - struct mt_list *elt1, elt2; + struct mt_list back; int found = 0; if (!sub_list) sub_list = &global_event_hdl_sub_list; /* fall back to global list */ - mt_list_for_each_entry_safe(cur_sub, &sub_list->head, mt_list, elt1, elt2) { + MT_LIST_FOR_EACH_ENTRY_LOCKED(cur_sub, &sub_list->head, mt_list, back) { if (lookup_id == cur_sub->hdl.id) { /* we found matching registered hdl */ _event_hdl_resume(cur_sub); @@ -762,13 +762,13 @@ struct event_hdl_sub *event_hdl_lookup_take(event_hdl_sub_list *sub_list, uint64_t lookup_id) { struct event_hdl_sub *cur_sub = NULL; - struct mt_list *elt1, elt2; + struct mt_list back; uint8_t found = 0; if (!sub_list) sub_list = &global_event_hdl_sub_list; /* fall back to global list */ - mt_list_for_each_entry_safe(cur_sub, &sub_list->head, mt_list, elt1, elt2) { + MT_LIST_FOR_EACH_ENTRY_LOCKED(cur_sub, &sub_list->head, mt_list, back) { if (lookup_id == cur_sub->hdl.id) { /* we found matching registered hdl */ event_hdl_take(cur_sub); @@ -787,11 +787,11 @@ static int _event_hdl_publish(event_hdl_sub_list *sub_list, struct event_hdl_sub const struct event_hdl_cb_data *data) { struct event_hdl_sub *cur_sub; - struct mt_list *elt1, elt2; + struct mt_list back; struct event_hdl_async_event_data *async_data = NULL; /* reuse async data for multiple async hdls */ int error = 0; - mt_list_for_each_entry_safe(cur_sub, &sub_list->head, mt_list, elt1, elt2) { + MT_LIST_FOR_EACH_ENTRY_LOCKED(cur_sub, &sub_list->head, mt_list, back) { /* notify each function that has subscribed to sub_family.type, unless paused */ if ((cur_sub->sub.family == e_type.family) && ((cur_sub->sub.subtype & e_type.subtype) == e_type.subtype) && @@ -821,10 +821,11 @@ static int _event_hdl_publish(event_hdl_sub_list *sub_list, struct event_hdl_sub if (!sub_mgmt.this) { /* user has performed hdl unsub * we must remove it from the list + * then free it. */ - MT_LIST_DELETE_SAFE(elt1); - /* then free it */ + mt_list_unlock_self(&cur_sub->mt_list); _event_hdl_unsubscribe(cur_sub); + cur_sub = NULL; } } else { /* async mode: here we need to prepare event data @@ -952,13 +953,12 @@ void event_hdl_sub_list_init(event_hdl_sub_list *sub_list) static void _event_hdl_sub_list_destroy(event_hdl_sub_list *sub_list) { struct event_hdl_sub *cur_sub; - struct mt_list *elt1, elt2; + struct mt_list back; - mt_list_for_each_entry_safe(cur_sub, &sub_list->head, mt_list, elt1, elt2) { - /* remove cur elem from list */ - MT_LIST_DELETE_SAFE(elt1); - /* then free it */ + MT_LIST_FOR_EACH_ENTRY_UNLOCKED(cur_sub, &sub_list->head, mt_list, back) { + /* remove cur elem from list and free it */ _event_hdl_unsubscribe(cur_sub); + cur_sub = NULL; } } diff --git a/src/hlua_fcn.c b/src/hlua_fcn.c index 0340ce1f3c..b13856e3d4 100644 --- a/src/hlua_fcn.c +++ b/src/hlua_fcn.c @@ -555,7 +555,7 @@ static int hlua_queue_push(lua_State *L) { struct hlua_queue *queue = hlua_check_queue(L, 1); struct hlua_queue_item *item; - struct mt_list *elt1, elt2; + struct mt_list back; struct hlua_queue_wait *waiter; if (lua_gettop(L) != 2 || lua_isnoneornil(L, 2)) { @@ -581,7 +581,7 @@ static int hlua_queue_push(lua_State *L) MT_LIST_APPEND(&queue->list, &item->list); /* notify tasks waiting on queue:pop_wait() (if any) */ - mt_list_for_each_entry_safe(waiter, &queue->wait_tasks, entry, elt1, elt2) { + MT_LIST_FOR_EACH_ENTRY_LOCKED(waiter, &queue->wait_tasks, entry, back) { task_wakeup(waiter->task, TASK_WOKEN_MSG); } diff --git a/src/quic_sock.c b/src/quic_sock.c index a3f37684ef..06b65e083d 100644 --- a/src/quic_sock.c +++ b/src/quic_sock.c @@ -1041,15 +1041,19 @@ void quic_accept_push_qc(struct quic_conn *qc) struct task *quic_accept_run(struct task *t, void *ctx, unsigned int i) { struct li_per_thread *lthr; - struct mt_list *elt1, elt2; + struct mt_list back; struct quic_accept_queue *queue = &quic_accept_queues[tid]; - mt_list_for_each_entry_safe(lthr, &queue->listeners, quic_accept.list, elt1, elt2) { + MT_LIST_FOR_EACH_ENTRY_LOCKED(lthr, &queue->listeners, quic_accept.list, back) { listener_accept(lthr->li); - if (!MT_LIST_ISEMPTY(<hr->quic_accept.conns)) + if (!MT_LIST_ISEMPTY(<hr->quic_accept.conns)) { + /* entry is left in queue */ tasklet_wakeup((struct tasklet*)t); - else - MT_LIST_DELETE_SAFE(elt1); + } + else { + mt_list_unlock_self(<hr->quic_accept.list); + lthr = NULL; /* delete it */ + } } return NULL; diff --git a/src/ring.c b/src/ring.c index a5800509d1..1c32bd8eb7 100644 --- a/src/ring.c +++ b/src/ring.c @@ -435,11 +435,11 @@ ssize_t ring_write(struct ring *ring, size_t maxlen, const struct ist pfx[], siz if (sent && HA_ATOMIC_LOAD(&ring->readers_count)) { HA_ATOMIC_INC(&ring->pending); while (HA_ATOMIC_LOAD(&ring->pending) && HA_ATOMIC_XCHG(&ring->waking, 1) == 0) { - struct mt_list *elt1, elt2; + struct mt_list back; struct appctx *appctx; HA_ATOMIC_STORE(&ring->pending, 0); - mt_list_for_each_entry_safe(appctx, &ring->waiters, wait_entry, elt1, elt2) + MT_LIST_FOR_EACH_ENTRY_LOCKED(appctx, &ring->waiters, wait_entry, back) appctx_wakeup(appctx); HA_ATOMIC_STORE(&ring->waking, 0); } diff --git a/src/server.c b/src/server.c index 1cfcb6bf22..3a667095b4 100644 --- a/src/server.c +++ b/src/server.c @@ -1994,11 +1994,11 @@ static int srv_has_streams(struct server *srv) void srv_shutdown_streams(struct server *srv, int why) { struct stream *stream; - struct mt_list *elt1, elt2; + struct mt_list back; int thr; for (thr = 0; thr < global.nbthread; thr++) - mt_list_for_each_entry_safe(stream, &srv->per_thr[thr].streams, by_srv, elt1, elt2) + MT_LIST_FOR_EACH_ENTRY_LOCKED(stream, &srv->per_thr[thr].streams, by_srv, back) if (stream->srv_conn == srv) stream_shutdown(stream, why); } @@ -5998,7 +5998,7 @@ static int cli_parse_delete_server(char **args, char *payload, struct appctx *ap struct server *srv; struct server *prev_del; struct ist be_name, sv_name; - struct mt_list *elt1, elt2; + struct mt_list back; struct sess_priv_conns *sess_conns = NULL; const char *msg; int ret, i; @@ -6062,7 +6062,7 @@ static int cli_parse_delete_server(char **args, char *payload, struct appctx *ap BUG_ON(srv->curr_idle_conns); /* Close idle private connections attached to this server. */ - mt_list_for_each_entry_safe(sess_conns, &srv->sess_conns, srv_el, elt1, elt2) { + MT_LIST_FOR_EACH_ENTRY_LOCKED(sess_conns, &srv->sess_conns, srv_el, back) { struct connection *conn, *conn_back; list_for_each_entry_safe(conn, conn_back, &sess_conns->conn_list, sess_el) { @@ -6082,8 +6082,8 @@ static int cli_parse_delete_server(char **args, char *payload, struct appctx *ap } LIST_DELETE(&sess_conns->sess_el); - MT_LIST_DELETE_SAFE(elt1); pool_free(pool_head_sess_priv_conns, sess_conns); + sess_conns = NULL; } /* removing cannot fail anymore when we reach this: diff --git a/tests/unit/test-list.c b/tests/unit/test-list.c index 9e6ac38386..c643c4f65c 100644 --- a/tests/unit/test-list.c +++ b/tests/unit/test-list.c @@ -2,11 +2,10 @@ #include #include #define USE_THREAD -#include +#include -/* Stress test the mt_lists. - * Compile from the haproxy directory with : - * cc -I../../include test-list.c -pthread -O2 -o test-list +/* Stress test for mt_lists. Compile this way: + * cc -O2 -o test-list test-list.c -I../include -pthread * The only argument it takes is the number of threads to be used. * ./test-list 4 */ @@ -19,17 +18,31 @@ struct pouet_lol { struct mt_list list_elt; }; +/* Fixed RNG sequence to ease reproduction of measurements (will be offset by + * the thread number). + */ +__thread uint32_t rnd32_state = 2463534242U; + +/* Xorshift RNG from http://www.jstatsoft.org/v08/i14/paper */ +static inline uint32_t rnd32() +{ + rnd32_state ^= rnd32_state << 13; + rnd32_state ^= rnd32_state >> 17; + rnd32_state ^= rnd32_state << 5; + return rnd32_state; +} + void *thread(void *pouet) { struct pouet_lol *lol; - struct mt_list *elt1, elt2; + struct mt_list elt2; tid = (uintptr_t)pouet; - int i = 0; + int i; - for (int i = 0; i < MAX_ACTION; i++) { - struct pouet_lol *lol; - struct mt_list *elt1, elt2; - switch (random() % 4) { + rnd32_state += tid; + + for (i = 0; i < MAX_ACTION; i++) { + switch (rnd32() % 4) { case 0: lol = malloc(sizeof(*lol)); MT_LIST_INIT(&lol->list_elt); @@ -47,15 +60,12 @@ void *thread(void *pouet) free(lol); break; case 3: - - mt_list_for_each_entry_safe(lol, &pouet_list, list_elt, elt1, elt2) - -{ - if (random() % 2) { - MT_LIST_DELETE_SAFE(elt1); + MT_LIST_FOR_EACH_ENTRY_LOCKED(lol, &pouet_list, list_elt, elt2) { + if (rnd32() % 2) { free(lol); + lol = NULL; } - if (random() % 2) { + if (rnd32() % 2) { break; } } @@ -63,7 +73,10 @@ void *thread(void *pouet) default: break; } + if ((i) / (MAX_ACTION/10) != (i+1) / (MAX_ACTION/10)) + printf("%u: %d\n", tid, i+1); } + return NULL; } int main(int argc, char *argv[])