2006-06-15 19:48:13 +00:00
|
|
|
/*
|
2012-08-09 10:11:58 +00:00
|
|
|
* include/proto/fd.h
|
|
|
|
* File descriptors states.
|
|
|
|
*
|
MAJOR: polling: rework the whole polling system
This commit heavily changes the polling system in order to definitely
fix the frequent breakage of SSL which needs to remember the last
EAGAIN before deciding whether to poll or not. Now we have a state per
direction for each FD, as opposed to a previous and current state
previously. An FD can have up to 8 different states for each direction,
each of which being the result of a 3-bit combination. These 3 bits
indicate a wish to access the FD, the readiness of the FD and the
subscription of the FD to the polling system.
This means that it will now be possible to remember the state of a
file descriptor across disable/enable sequences that generally happen
during forwarding, where enabling reading on a previously disabled FD
would result in forgetting the EAGAIN flag it met last time.
Several new state manipulation functions have been introduced or
adapted :
- fd_want_{recv,send} : enable receiving/sending on the FD regardless
of its state (sets the ACTIVE flag) ;
- fd_stop_{recv,send} : stop receiving/sending on the FD regardless
of its state (clears the ACTIVE flag) ;
- fd_cant_{recv,send} : report a failure to receive/send on the FD
corresponding to EAGAIN (clears the READY flag) ;
- fd_may_{recv,send} : report the ability to receive/send on the FD
as reported by poll() (sets the READY flag) ;
Some functions are used to report the current FD status :
- fd_{recv,send}_active
- fd_{recv,send}_ready
- fd_{recv,send}_polled
Some functions were removed :
- fd_ev_clr(), fd_ev_set(), fd_ev_rem(), fd_ev_wai()
The POLLHUP/POLLERR flags are now reported as ready so that the I/O layers
knows it can try to access the file descriptor to get this information.
In order to simplify the conditions to add/remove cache entries, a new
function fd_alloc_or_release_cache_entry() was created to be used from
pollers while scanning for updates.
The following pollers have been updated :
ev_select() : done, built, tested on Linux 3.10
ev_poll() : done, built, tested on Linux 3.10
ev_epoll() : done, built, tested on Linux 3.10 & 3.13
ev_kqueue() : done, built, tested on OpenBSD 5.2
2014-01-10 15:58:45 +00:00
|
|
|
* Copyright (C) 2000-2014 Willy Tarreau - w@1wt.eu
|
2012-08-09 10:11:58 +00:00
|
|
|
*
|
|
|
|
* This library is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License as published by the Free Software Foundation, version 2.1
|
|
|
|
* exclusively.
|
|
|
|
*
|
|
|
|
* This library is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* Lesser General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
|
|
* License along with this library; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
*/
|
2006-06-15 19:48:13 +00:00
|
|
|
|
2006-06-26 00:48:02 +00:00
|
|
|
#ifndef _PROTO_FD_H
|
|
|
|
#define _PROTO_FD_H
|
|
|
|
|
2007-04-09 17:29:56 +00:00
|
|
|
#include <stdio.h>
|
2006-06-26 00:48:02 +00:00
|
|
|
#include <sys/time.h>
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
|
2020-05-27 10:58:42 +00:00
|
|
|
#include <haproxy/api.h>
|
2020-06-02 16:15:32 +00:00
|
|
|
#include <haproxy/ticks.h>
|
2020-06-01 09:05:15 +00:00
|
|
|
#include <haproxy/time.h>
|
2006-06-26 00:48:02 +00:00
|
|
|
#include <types/fd.h>
|
2020-06-02 08:29:48 +00:00
|
|
|
#include <haproxy/activity.h>
|
2006-06-26 00:48:02 +00:00
|
|
|
|
2012-11-11 14:02:54 +00:00
|
|
|
/* public variables */
|
MAJOR: threads/fd: Make fd stuffs thread-safe
Many changes have been made to do so. First, the fd_updt array, where all
pending FDs for polling are stored, is now a thread-local array. Then 3 locks
have been added to protect, respectively, the fdtab array, the fd_cache array
and poll information. In addition, a lock for each entry in the fdtab array has
been added to protect all accesses to a specific FD or its information.
For pollers, according to the poller, the way to manage the concurrency is
different. There is a poller loop on each thread. So the set of monitored FDs
may need to be protected. epoll and kqueue are thread-safe per-se, so there few
things to do to protect these pollers. This is not possible with select and
poll, so there is no sharing between the threads. The poller on each thread is
independant from others.
Finally, per-thread init/deinit functions are used for each pollers and for FD
part for manage thread-local ressources.
Now, you must be carefull when a FD is created during the HAProxy startup. All
update on the FD state must be made in the threads context and never before
their creation. This is mandatory because fd_updt array is thread-local and
initialized only for threads. Because there is no pollers for the main one, this
array remains uninitialized in this context. For this reason, listeners are now
enabled in run_thread_poll_loop function, just like the worker pipe.
2017-05-29 08:40:41 +00:00
|
|
|
|
2018-04-25 14:58:25 +00:00
|
|
|
extern volatile struct fdlist update_list;
|
|
|
|
|
2019-07-25 14:00:18 +00:00
|
|
|
|
|
|
|
extern struct polled_mask {
|
|
|
|
unsigned long poll_recv;
|
|
|
|
unsigned long poll_send;
|
|
|
|
} *polled_mask;
|
2018-04-26 12:23:07 +00:00
|
|
|
|
MAJOR: threads/fd: Make fd stuffs thread-safe
Many changes have been made to do so. First, the fd_updt array, where all
pending FDs for polling are stored, is now a thread-local array. Then 3 locks
have been added to protect, respectively, the fdtab array, the fd_cache array
and poll information. In addition, a lock for each entry in the fdtab array has
been added to protect all accesses to a specific FD or its information.
For pollers, according to the poller, the way to manage the concurrency is
different. There is a poller loop on each thread. So the set of monitored FDs
may need to be protected. epoll and kqueue are thread-safe per-se, so there few
things to do to protect these pollers. This is not possible with select and
poll, so there is no sharing between the threads. The poller on each thread is
independant from others.
Finally, per-thread init/deinit functions are used for each pollers and for FD
part for manage thread-local ressources.
Now, you must be carefull when a FD is created during the HAProxy startup. All
update on the FD state must be made in the threads context and never before
their creation. This is mandatory because fd_updt array is thread-local and
initialized only for threads. Because there is no pollers for the main one, this
array remains uninitialized in this context. For this reason, listeners are now
enabled in run_thread_poll_loop function, just like the worker pipe.
2017-05-29 08:40:41 +00:00
|
|
|
extern THREAD_LOCAL int *fd_updt; // FD updates list
|
|
|
|
extern THREAD_LOCAL int fd_nbupdt; // number of updates in the list
|
|
|
|
|
2018-07-26 15:55:11 +00:00
|
|
|
extern int poller_wr_pipe[MAX_THREADS];
|
|
|
|
|
2019-04-16 16:37:05 +00:00
|
|
|
extern volatile int ha_used_fds; // Number of FDs we're currently using
|
|
|
|
|
2018-01-26 20:48:23 +00:00
|
|
|
/* Deletes an FD from the fdsets.
|
2006-06-26 00:48:02 +00:00
|
|
|
* The file descriptor is also closed.
|
|
|
|
*/
|
|
|
|
void fd_delete(int fd);
|
2006-06-15 19:48:13 +00:00
|
|
|
|
2018-01-26 20:48:23 +00:00
|
|
|
/* Deletes an FD from the fdsets.
|
2017-04-05 23:05:05 +00:00
|
|
|
* The file descriptor is kept open.
|
|
|
|
*/
|
|
|
|
void fd_remove(int fd);
|
|
|
|
|
2020-03-05 17:10:51 +00:00
|
|
|
/*
|
|
|
|
* Take over a FD belonging to another thread.
|
|
|
|
* Returns 0 on success, and -1 on failure.
|
|
|
|
*/
|
|
|
|
int fd_takeover(int fd, void *expected_owner);
|
|
|
|
|
|
|
|
#ifndef HA_HAVE_CAS_DW
|
2020-06-05 06:40:51 +00:00
|
|
|
__decl_thread(extern HA_RWLOCK_T fd_mig_lock);
|
2020-03-05 17:10:51 +00:00
|
|
|
#endif
|
|
|
|
|
2019-08-27 09:08:17 +00:00
|
|
|
ssize_t fd_write_frag_line(int fd, size_t maxlen, const struct ist pfx[], size_t npfx, const struct ist msg[], size_t nmsg, int nl);
|
|
|
|
|
2019-02-21 21:19:17 +00:00
|
|
|
/* close all FDs starting from <start> */
|
|
|
|
void my_closefrom(int start);
|
|
|
|
|
2007-04-08 14:39:58 +00:00
|
|
|
/* disable the specified poller */
|
|
|
|
void disable_poller(const char *poller_name);
|
2006-06-15 19:48:13 +00:00
|
|
|
|
2018-07-26 15:55:11 +00:00
|
|
|
void poller_pipe_io_handler(int fd);
|
|
|
|
|
2006-10-15 12:52:29 +00:00
|
|
|
/*
|
2007-04-08 14:39:58 +00:00
|
|
|
* Initialize the pollers till the best one is found.
|
|
|
|
* If none works, returns 0, otherwise 1.
|
2007-04-15 22:25:25 +00:00
|
|
|
* The pollers register themselves just before main() is called.
|
2006-10-15 12:52:29 +00:00
|
|
|
*/
|
2007-04-08 14:39:58 +00:00
|
|
|
int init_pollers();
|
2006-10-15 12:52:29 +00:00
|
|
|
|
[MEDIUM] Fix memory freeing at exit
New functions implemented:
- deinit_pollers: called at the end of deinit())
- prune_acl: called via list_for_each_entry_safe
Add missing pool_destroy2 calls:
- p->hdr_idx_pool
- pool2_tree64
Implement all task stopping:
- health-check: needs new "struct task" in the struct server
- queue processing: queue_mgt
- appsess_refresh: appsession_refresh
before (idle system):
==6079== LEAK SUMMARY:
==6079== definitely lost: 1,112 bytes in 75 blocks.
==6079== indirectly lost: 53,356 bytes in 2,090 blocks.
==6079== possibly lost: 52 bytes in 1 blocks.
==6079== still reachable: 150,996 bytes in 504 blocks.
==6079== suppressed: 0 bytes in 0 blocks.
after (idle system):
==6945== LEAK SUMMARY:
==6945== definitely lost: 7,644 bytes in 137 blocks.
==6945== indirectly lost: 9,913 bytes in 587 blocks.
==6945== possibly lost: 0 bytes in 0 blocks.
==6945== still reachable: 0 bytes in 0 blocks.
==6945== suppressed: 0 bytes in 0 blocks.
before (running system for ~2m):
==9343== LEAK SUMMARY:
==9343== definitely lost: 1,112 bytes in 75 blocks.
==9343== indirectly lost: 54,199 bytes in 2,122 blocks.
==9343== possibly lost: 52 bytes in 1 blocks.
==9343== still reachable: 151,128 bytes in 509 blocks.
==9343== suppressed: 0 bytes in 0 blocks.
after (running system for ~2m):
==11616== LEAK SUMMARY:
==11616== definitely lost: 7,644 bytes in 137 blocks.
==11616== indirectly lost: 9,981 bytes in 591 blocks.
==11616== possibly lost: 0 bytes in 0 blocks.
==11616== still reachable: 4 bytes in 1 blocks.
==11616== suppressed: 0 bytes in 0 blocks.
Still not perfect but significant improvement.
2008-05-29 21:53:44 +00:00
|
|
|
/*
|
|
|
|
* Deinitialize the pollers.
|
|
|
|
*/
|
|
|
|
void deinit_pollers();
|
|
|
|
|
2007-04-09 17:29:56 +00:00
|
|
|
/*
|
|
|
|
* Some pollers may lose their connection after a fork(). It may be necessary
|
|
|
|
* to create initialize part of them again. Returns 0 in case of failure,
|
|
|
|
* otherwise 1. The fork() function may be NULL if unused. In case of error,
|
|
|
|
* the the current poller is destroyed and the caller is responsible for trying
|
|
|
|
* another one by calling init_pollers() again.
|
|
|
|
*/
|
|
|
|
int fork_poller();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Lists the known pollers on <out>.
|
|
|
|
* Should be performed only before initialization.
|
|
|
|
*/
|
|
|
|
int list_pollers(FILE *out);
|
|
|
|
|
2007-04-08 14:39:58 +00:00
|
|
|
/*
|
|
|
|
* Runs the polling loop
|
|
|
|
*/
|
|
|
|
void run_poller();
|
|
|
|
|
2018-04-25 13:10:30 +00:00
|
|
|
void fd_add_to_fd_list(volatile struct fdlist *list, int fd, int off);
|
|
|
|
void fd_rm_from_fd_list(volatile struct fdlist *list, int fd, int off);
|
2019-09-04 11:25:41 +00:00
|
|
|
void updt_fd_polling(const int fd);
|
2018-01-17 21:57:54 +00:00
|
|
|
|
2020-03-10 07:06:11 +00:00
|
|
|
/* Called from the poller to acknowledge we read an entry from the global
|
2018-04-25 14:58:25 +00:00
|
|
|
* update list, to remove our bit from the update_mask, and remove it from
|
|
|
|
* the list if we were the last one.
|
|
|
|
*/
|
|
|
|
static inline void done_update_polling(int fd)
|
|
|
|
{
|
|
|
|
unsigned long update_mask;
|
|
|
|
|
2019-03-08 17:47:42 +00:00
|
|
|
update_mask = _HA_ATOMIC_AND(&fdtab[fd].update_mask, ~tid_bit);
|
2018-04-25 14:58:25 +00:00
|
|
|
while ((update_mask & all_threads_mask)== 0) {
|
|
|
|
/* If we were the last one that had to update that entry, remove it from the list */
|
|
|
|
fd_rm_from_fd_list(&update_list, fd, offsetof(struct fdtab, update));
|
|
|
|
update_mask = (volatile unsigned long)fdtab[fd].update_mask;
|
|
|
|
if ((update_mask & all_threads_mask) != 0) {
|
|
|
|
/* Maybe it's been re-updated in the meanwhile, and we
|
|
|
|
* wrongly removed it from the list, if so, re-add it
|
|
|
|
*/
|
|
|
|
fd_add_to_fd_list(&update_list, fd, offsetof(struct fdtab, update));
|
|
|
|
update_mask = (volatile unsigned long)(fdtab[fd].update_mask);
|
|
|
|
/* And then check again, just in case after all it
|
|
|
|
* should be removed, even if it's very unlikely, given
|
|
|
|
* the current thread wouldn't have been able to take
|
|
|
|
* care of it yet */
|
|
|
|
} else
|
|
|
|
break;
|
|
|
|
|
|
|
|
}
|
2012-11-11 14:02:54 +00:00
|
|
|
}
|
|
|
|
|
MAJOR: polling: rework the whole polling system
This commit heavily changes the polling system in order to definitely
fix the frequent breakage of SSL which needs to remember the last
EAGAIN before deciding whether to poll or not. Now we have a state per
direction for each FD, as opposed to a previous and current state
previously. An FD can have up to 8 different states for each direction,
each of which being the result of a 3-bit combination. These 3 bits
indicate a wish to access the FD, the readiness of the FD and the
subscription of the FD to the polling system.
This means that it will now be possible to remember the state of a
file descriptor across disable/enable sequences that generally happen
during forwarding, where enabling reading on a previously disabled FD
would result in forgetting the EAGAIN flag it met last time.
Several new state manipulation functions have been introduced or
adapted :
- fd_want_{recv,send} : enable receiving/sending on the FD regardless
of its state (sets the ACTIVE flag) ;
- fd_stop_{recv,send} : stop receiving/sending on the FD regardless
of its state (clears the ACTIVE flag) ;
- fd_cant_{recv,send} : report a failure to receive/send on the FD
corresponding to EAGAIN (clears the READY flag) ;
- fd_may_{recv,send} : report the ability to receive/send on the FD
as reported by poll() (sets the READY flag) ;
Some functions are used to report the current FD status :
- fd_{recv,send}_active
- fd_{recv,send}_ready
- fd_{recv,send}_polled
Some functions were removed :
- fd_ev_clr(), fd_ev_set(), fd_ev_rem(), fd_ev_wai()
The POLLHUP/POLLERR flags are now reported as ready so that the I/O layers
knows it can try to access the file descriptor to get this information.
In order to simplify the conditions to add/remove cache entries, a new
function fd_alloc_or_release_cache_entry() was created to be used from
pollers while scanning for updates.
The following pollers have been updated :
ev_select() : done, built, tested on Linux 3.10
ev_poll() : done, built, tested on Linux 3.10
ev_epoll() : done, built, tested on Linux 3.10 & 3.13
ev_kqueue() : done, built, tested on OpenBSD 5.2
2014-01-10 15:58:45 +00:00
|
|
|
/*
|
|
|
|
* returns true if the FD is active for recv
|
2012-11-11 15:05:19 +00:00
|
|
|
*/
|
MAJOR: polling: rework the whole polling system
This commit heavily changes the polling system in order to definitely
fix the frequent breakage of SSL which needs to remember the last
EAGAIN before deciding whether to poll or not. Now we have a state per
direction for each FD, as opposed to a previous and current state
previously. An FD can have up to 8 different states for each direction,
each of which being the result of a 3-bit combination. These 3 bits
indicate a wish to access the FD, the readiness of the FD and the
subscription of the FD to the polling system.
This means that it will now be possible to remember the state of a
file descriptor across disable/enable sequences that generally happen
during forwarding, where enabling reading on a previously disabled FD
would result in forgetting the EAGAIN flag it met last time.
Several new state manipulation functions have been introduced or
adapted :
- fd_want_{recv,send} : enable receiving/sending on the FD regardless
of its state (sets the ACTIVE flag) ;
- fd_stop_{recv,send} : stop receiving/sending on the FD regardless
of its state (clears the ACTIVE flag) ;
- fd_cant_{recv,send} : report a failure to receive/send on the FD
corresponding to EAGAIN (clears the READY flag) ;
- fd_may_{recv,send} : report the ability to receive/send on the FD
as reported by poll() (sets the READY flag) ;
Some functions are used to report the current FD status :
- fd_{recv,send}_active
- fd_{recv,send}_ready
- fd_{recv,send}_polled
Some functions were removed :
- fd_ev_clr(), fd_ev_set(), fd_ev_rem(), fd_ev_wai()
The POLLHUP/POLLERR flags are now reported as ready so that the I/O layers
knows it can try to access the file descriptor to get this information.
In order to simplify the conditions to add/remove cache entries, a new
function fd_alloc_or_release_cache_entry() was created to be used from
pollers while scanning for updates.
The following pollers have been updated :
ev_select() : done, built, tested on Linux 3.10
ev_poll() : done, built, tested on Linux 3.10
ev_epoll() : done, built, tested on Linux 3.10 & 3.13
ev_kqueue() : done, built, tested on OpenBSD 5.2
2014-01-10 15:58:45 +00:00
|
|
|
static inline int fd_recv_active(const int fd)
|
2012-11-11 15:05:19 +00:00
|
|
|
{
|
MAJOR: polling: rework the whole polling system
This commit heavily changes the polling system in order to definitely
fix the frequent breakage of SSL which needs to remember the last
EAGAIN before deciding whether to poll or not. Now we have a state per
direction for each FD, as opposed to a previous and current state
previously. An FD can have up to 8 different states for each direction,
each of which being the result of a 3-bit combination. These 3 bits
indicate a wish to access the FD, the readiness of the FD and the
subscription of the FD to the polling system.
This means that it will now be possible to remember the state of a
file descriptor across disable/enable sequences that generally happen
during forwarding, where enabling reading on a previously disabled FD
would result in forgetting the EAGAIN flag it met last time.
Several new state manipulation functions have been introduced or
adapted :
- fd_want_{recv,send} : enable receiving/sending on the FD regardless
of its state (sets the ACTIVE flag) ;
- fd_stop_{recv,send} : stop receiving/sending on the FD regardless
of its state (clears the ACTIVE flag) ;
- fd_cant_{recv,send} : report a failure to receive/send on the FD
corresponding to EAGAIN (clears the READY flag) ;
- fd_may_{recv,send} : report the ability to receive/send on the FD
as reported by poll() (sets the READY flag) ;
Some functions are used to report the current FD status :
- fd_{recv,send}_active
- fd_{recv,send}_ready
- fd_{recv,send}_polled
Some functions were removed :
- fd_ev_clr(), fd_ev_set(), fd_ev_rem(), fd_ev_wai()
The POLLHUP/POLLERR flags are now reported as ready so that the I/O layers
knows it can try to access the file descriptor to get this information.
In order to simplify the conditions to add/remove cache entries, a new
function fd_alloc_or_release_cache_entry() was created to be used from
pollers while scanning for updates.
The following pollers have been updated :
ev_select() : done, built, tested on Linux 3.10
ev_poll() : done, built, tested on Linux 3.10
ev_epoll() : done, built, tested on Linux 3.10 & 3.13
ev_kqueue() : done, built, tested on OpenBSD 5.2
2014-01-10 15:58:45 +00:00
|
|
|
return (unsigned)fdtab[fd].state & FD_EV_ACTIVE_R;
|
2012-11-11 15:05:19 +00:00
|
|
|
}
|
|
|
|
|
MAJOR: polling: rework the whole polling system
This commit heavily changes the polling system in order to definitely
fix the frequent breakage of SSL which needs to remember the last
EAGAIN before deciding whether to poll or not. Now we have a state per
direction for each FD, as opposed to a previous and current state
previously. An FD can have up to 8 different states for each direction,
each of which being the result of a 3-bit combination. These 3 bits
indicate a wish to access the FD, the readiness of the FD and the
subscription of the FD to the polling system.
This means that it will now be possible to remember the state of a
file descriptor across disable/enable sequences that generally happen
during forwarding, where enabling reading on a previously disabled FD
would result in forgetting the EAGAIN flag it met last time.
Several new state manipulation functions have been introduced or
adapted :
- fd_want_{recv,send} : enable receiving/sending on the FD regardless
of its state (sets the ACTIVE flag) ;
- fd_stop_{recv,send} : stop receiving/sending on the FD regardless
of its state (clears the ACTIVE flag) ;
- fd_cant_{recv,send} : report a failure to receive/send on the FD
corresponding to EAGAIN (clears the READY flag) ;
- fd_may_{recv,send} : report the ability to receive/send on the FD
as reported by poll() (sets the READY flag) ;
Some functions are used to report the current FD status :
- fd_{recv,send}_active
- fd_{recv,send}_ready
- fd_{recv,send}_polled
Some functions were removed :
- fd_ev_clr(), fd_ev_set(), fd_ev_rem(), fd_ev_wai()
The POLLHUP/POLLERR flags are now reported as ready so that the I/O layers
knows it can try to access the file descriptor to get this information.
In order to simplify the conditions to add/remove cache entries, a new
function fd_alloc_or_release_cache_entry() was created to be used from
pollers while scanning for updates.
The following pollers have been updated :
ev_select() : done, built, tested on Linux 3.10
ev_poll() : done, built, tested on Linux 3.10
ev_epoll() : done, built, tested on Linux 3.10 & 3.13
ev_kqueue() : done, built, tested on OpenBSD 5.2
2014-01-10 15:58:45 +00:00
|
|
|
/*
|
|
|
|
* returns true if the FD is ready for recv
|
2012-11-11 15:05:19 +00:00
|
|
|
*/
|
MAJOR: polling: rework the whole polling system
This commit heavily changes the polling system in order to definitely
fix the frequent breakage of SSL which needs to remember the last
EAGAIN before deciding whether to poll or not. Now we have a state per
direction for each FD, as opposed to a previous and current state
previously. An FD can have up to 8 different states for each direction,
each of which being the result of a 3-bit combination. These 3 bits
indicate a wish to access the FD, the readiness of the FD and the
subscription of the FD to the polling system.
This means that it will now be possible to remember the state of a
file descriptor across disable/enable sequences that generally happen
during forwarding, where enabling reading on a previously disabled FD
would result in forgetting the EAGAIN flag it met last time.
Several new state manipulation functions have been introduced or
adapted :
- fd_want_{recv,send} : enable receiving/sending on the FD regardless
of its state (sets the ACTIVE flag) ;
- fd_stop_{recv,send} : stop receiving/sending on the FD regardless
of its state (clears the ACTIVE flag) ;
- fd_cant_{recv,send} : report a failure to receive/send on the FD
corresponding to EAGAIN (clears the READY flag) ;
- fd_may_{recv,send} : report the ability to receive/send on the FD
as reported by poll() (sets the READY flag) ;
Some functions are used to report the current FD status :
- fd_{recv,send}_active
- fd_{recv,send}_ready
- fd_{recv,send}_polled
Some functions were removed :
- fd_ev_clr(), fd_ev_set(), fd_ev_rem(), fd_ev_wai()
The POLLHUP/POLLERR flags are now reported as ready so that the I/O layers
knows it can try to access the file descriptor to get this information.
In order to simplify the conditions to add/remove cache entries, a new
function fd_alloc_or_release_cache_entry() was created to be used from
pollers while scanning for updates.
The following pollers have been updated :
ev_select() : done, built, tested on Linux 3.10
ev_poll() : done, built, tested on Linux 3.10
ev_epoll() : done, built, tested on Linux 3.10 & 3.13
ev_kqueue() : done, built, tested on OpenBSD 5.2
2014-01-10 15:58:45 +00:00
|
|
|
static inline int fd_recv_ready(const int fd)
|
2012-11-11 15:05:19 +00:00
|
|
|
{
|
MAJOR: polling: rework the whole polling system
This commit heavily changes the polling system in order to definitely
fix the frequent breakage of SSL which needs to remember the last
EAGAIN before deciding whether to poll or not. Now we have a state per
direction for each FD, as opposed to a previous and current state
previously. An FD can have up to 8 different states for each direction,
each of which being the result of a 3-bit combination. These 3 bits
indicate a wish to access the FD, the readiness of the FD and the
subscription of the FD to the polling system.
This means that it will now be possible to remember the state of a
file descriptor across disable/enable sequences that generally happen
during forwarding, where enabling reading on a previously disabled FD
would result in forgetting the EAGAIN flag it met last time.
Several new state manipulation functions have been introduced or
adapted :
- fd_want_{recv,send} : enable receiving/sending on the FD regardless
of its state (sets the ACTIVE flag) ;
- fd_stop_{recv,send} : stop receiving/sending on the FD regardless
of its state (clears the ACTIVE flag) ;
- fd_cant_{recv,send} : report a failure to receive/send on the FD
corresponding to EAGAIN (clears the READY flag) ;
- fd_may_{recv,send} : report the ability to receive/send on the FD
as reported by poll() (sets the READY flag) ;
Some functions are used to report the current FD status :
- fd_{recv,send}_active
- fd_{recv,send}_ready
- fd_{recv,send}_polled
Some functions were removed :
- fd_ev_clr(), fd_ev_set(), fd_ev_rem(), fd_ev_wai()
The POLLHUP/POLLERR flags are now reported as ready so that the I/O layers
knows it can try to access the file descriptor to get this information.
In order to simplify the conditions to add/remove cache entries, a new
function fd_alloc_or_release_cache_entry() was created to be used from
pollers while scanning for updates.
The following pollers have been updated :
ev_select() : done, built, tested on Linux 3.10
ev_poll() : done, built, tested on Linux 3.10
ev_epoll() : done, built, tested on Linux 3.10 & 3.13
ev_kqueue() : done, built, tested on OpenBSD 5.2
2014-01-10 15:58:45 +00:00
|
|
|
return (unsigned)fdtab[fd].state & FD_EV_READY_R;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* returns true if the FD is active for send
|
|
|
|
*/
|
|
|
|
static inline int fd_send_active(const int fd)
|
|
|
|
{
|
|
|
|
return (unsigned)fdtab[fd].state & FD_EV_ACTIVE_W;
|
2012-11-11 15:05:19 +00:00
|
|
|
}
|
|
|
|
|
MAJOR: polling: rework the whole polling system
This commit heavily changes the polling system in order to definitely
fix the frequent breakage of SSL which needs to remember the last
EAGAIN before deciding whether to poll or not. Now we have a state per
direction for each FD, as opposed to a previous and current state
previously. An FD can have up to 8 different states for each direction,
each of which being the result of a 3-bit combination. These 3 bits
indicate a wish to access the FD, the readiness of the FD and the
subscription of the FD to the polling system.
This means that it will now be possible to remember the state of a
file descriptor across disable/enable sequences that generally happen
during forwarding, where enabling reading on a previously disabled FD
would result in forgetting the EAGAIN flag it met last time.
Several new state manipulation functions have been introduced or
adapted :
- fd_want_{recv,send} : enable receiving/sending on the FD regardless
of its state (sets the ACTIVE flag) ;
- fd_stop_{recv,send} : stop receiving/sending on the FD regardless
of its state (clears the ACTIVE flag) ;
- fd_cant_{recv,send} : report a failure to receive/send on the FD
corresponding to EAGAIN (clears the READY flag) ;
- fd_may_{recv,send} : report the ability to receive/send on the FD
as reported by poll() (sets the READY flag) ;
Some functions are used to report the current FD status :
- fd_{recv,send}_active
- fd_{recv,send}_ready
- fd_{recv,send}_polled
Some functions were removed :
- fd_ev_clr(), fd_ev_set(), fd_ev_rem(), fd_ev_wai()
The POLLHUP/POLLERR flags are now reported as ready so that the I/O layers
knows it can try to access the file descriptor to get this information.
In order to simplify the conditions to add/remove cache entries, a new
function fd_alloc_or_release_cache_entry() was created to be used from
pollers while scanning for updates.
The following pollers have been updated :
ev_select() : done, built, tested on Linux 3.10
ev_poll() : done, built, tested on Linux 3.10
ev_epoll() : done, built, tested on Linux 3.10 & 3.13
ev_kqueue() : done, built, tested on OpenBSD 5.2
2014-01-10 15:58:45 +00:00
|
|
|
/*
|
|
|
|
* returns true if the FD is ready for send
|
2012-11-11 15:05:19 +00:00
|
|
|
*/
|
MAJOR: polling: rework the whole polling system
This commit heavily changes the polling system in order to definitely
fix the frequent breakage of SSL which needs to remember the last
EAGAIN before deciding whether to poll or not. Now we have a state per
direction for each FD, as opposed to a previous and current state
previously. An FD can have up to 8 different states for each direction,
each of which being the result of a 3-bit combination. These 3 bits
indicate a wish to access the FD, the readiness of the FD and the
subscription of the FD to the polling system.
This means that it will now be possible to remember the state of a
file descriptor across disable/enable sequences that generally happen
during forwarding, where enabling reading on a previously disabled FD
would result in forgetting the EAGAIN flag it met last time.
Several new state manipulation functions have been introduced or
adapted :
- fd_want_{recv,send} : enable receiving/sending on the FD regardless
of its state (sets the ACTIVE flag) ;
- fd_stop_{recv,send} : stop receiving/sending on the FD regardless
of its state (clears the ACTIVE flag) ;
- fd_cant_{recv,send} : report a failure to receive/send on the FD
corresponding to EAGAIN (clears the READY flag) ;
- fd_may_{recv,send} : report the ability to receive/send on the FD
as reported by poll() (sets the READY flag) ;
Some functions are used to report the current FD status :
- fd_{recv,send}_active
- fd_{recv,send}_ready
- fd_{recv,send}_polled
Some functions were removed :
- fd_ev_clr(), fd_ev_set(), fd_ev_rem(), fd_ev_wai()
The POLLHUP/POLLERR flags are now reported as ready so that the I/O layers
knows it can try to access the file descriptor to get this information.
In order to simplify the conditions to add/remove cache entries, a new
function fd_alloc_or_release_cache_entry() was created to be used from
pollers while scanning for updates.
The following pollers have been updated :
ev_select() : done, built, tested on Linux 3.10
ev_poll() : done, built, tested on Linux 3.10
ev_epoll() : done, built, tested on Linux 3.10 & 3.13
ev_kqueue() : done, built, tested on OpenBSD 5.2
2014-01-10 15:58:45 +00:00
|
|
|
static inline int fd_send_ready(const int fd)
|
2012-11-11 15:05:19 +00:00
|
|
|
{
|
MAJOR: polling: rework the whole polling system
This commit heavily changes the polling system in order to definitely
fix the frequent breakage of SSL which needs to remember the last
EAGAIN before deciding whether to poll or not. Now we have a state per
direction for each FD, as opposed to a previous and current state
previously. An FD can have up to 8 different states for each direction,
each of which being the result of a 3-bit combination. These 3 bits
indicate a wish to access the FD, the readiness of the FD and the
subscription of the FD to the polling system.
This means that it will now be possible to remember the state of a
file descriptor across disable/enable sequences that generally happen
during forwarding, where enabling reading on a previously disabled FD
would result in forgetting the EAGAIN flag it met last time.
Several new state manipulation functions have been introduced or
adapted :
- fd_want_{recv,send} : enable receiving/sending on the FD regardless
of its state (sets the ACTIVE flag) ;
- fd_stop_{recv,send} : stop receiving/sending on the FD regardless
of its state (clears the ACTIVE flag) ;
- fd_cant_{recv,send} : report a failure to receive/send on the FD
corresponding to EAGAIN (clears the READY flag) ;
- fd_may_{recv,send} : report the ability to receive/send on the FD
as reported by poll() (sets the READY flag) ;
Some functions are used to report the current FD status :
- fd_{recv,send}_active
- fd_{recv,send}_ready
- fd_{recv,send}_polled
Some functions were removed :
- fd_ev_clr(), fd_ev_set(), fd_ev_rem(), fd_ev_wai()
The POLLHUP/POLLERR flags are now reported as ready so that the I/O layers
knows it can try to access the file descriptor to get this information.
In order to simplify the conditions to add/remove cache entries, a new
function fd_alloc_or_release_cache_entry() was created to be used from
pollers while scanning for updates.
The following pollers have been updated :
ev_select() : done, built, tested on Linux 3.10
ev_poll() : done, built, tested on Linux 3.10
ev_epoll() : done, built, tested on Linux 3.10 & 3.13
ev_kqueue() : done, built, tested on OpenBSD 5.2
2014-01-10 15:58:45 +00:00
|
|
|
return (unsigned)fdtab[fd].state & FD_EV_READY_W;
|
|
|
|
}
|
2012-11-11 15:05:19 +00:00
|
|
|
|
2017-08-30 07:59:38 +00:00
|
|
|
/*
|
|
|
|
* returns true if the FD is active for recv or send
|
|
|
|
*/
|
|
|
|
static inline int fd_active(const int fd)
|
|
|
|
{
|
|
|
|
return (unsigned)fdtab[fd].state & FD_EV_ACTIVE_RW;
|
|
|
|
}
|
|
|
|
|
MAJOR: polling: rework the whole polling system
This commit heavily changes the polling system in order to definitely
fix the frequent breakage of SSL which needs to remember the last
EAGAIN before deciding whether to poll or not. Now we have a state per
direction for each FD, as opposed to a previous and current state
previously. An FD can have up to 8 different states for each direction,
each of which being the result of a 3-bit combination. These 3 bits
indicate a wish to access the FD, the readiness of the FD and the
subscription of the FD to the polling system.
This means that it will now be possible to remember the state of a
file descriptor across disable/enable sequences that generally happen
during forwarding, where enabling reading on a previously disabled FD
would result in forgetting the EAGAIN flag it met last time.
Several new state manipulation functions have been introduced or
adapted :
- fd_want_{recv,send} : enable receiving/sending on the FD regardless
of its state (sets the ACTIVE flag) ;
- fd_stop_{recv,send} : stop receiving/sending on the FD regardless
of its state (clears the ACTIVE flag) ;
- fd_cant_{recv,send} : report a failure to receive/send on the FD
corresponding to EAGAIN (clears the READY flag) ;
- fd_may_{recv,send} : report the ability to receive/send on the FD
as reported by poll() (sets the READY flag) ;
Some functions are used to report the current FD status :
- fd_{recv,send}_active
- fd_{recv,send}_ready
- fd_{recv,send}_polled
Some functions were removed :
- fd_ev_clr(), fd_ev_set(), fd_ev_rem(), fd_ev_wai()
The POLLHUP/POLLERR flags are now reported as ready so that the I/O layers
knows it can try to access the file descriptor to get this information.
In order to simplify the conditions to add/remove cache entries, a new
function fd_alloc_or_release_cache_entry() was created to be used from
pollers while scanning for updates.
The following pollers have been updated :
ev_select() : done, built, tested on Linux 3.10
ev_poll() : done, built, tested on Linux 3.10
ev_epoll() : done, built, tested on Linux 3.10 & 3.13
ev_kqueue() : done, built, tested on OpenBSD 5.2
2014-01-10 15:58:45 +00:00
|
|
|
/* Disable processing recv events on fd <fd> */
|
|
|
|
static inline void fd_stop_recv(int fd)
|
|
|
|
{
|
2019-09-04 11:22:50 +00:00
|
|
|
if (!(fdtab[fd].state & FD_EV_ACTIVE_R) ||
|
|
|
|
!HA_ATOMIC_BTR(&fdtab[fd].state, FD_EV_ACTIVE_R_BIT))
|
|
|
|
return;
|
2012-11-11 15:05:19 +00:00
|
|
|
}
|
|
|
|
|
MAJOR: polling: rework the whole polling system
This commit heavily changes the polling system in order to definitely
fix the frequent breakage of SSL which needs to remember the last
EAGAIN before deciding whether to poll or not. Now we have a state per
direction for each FD, as opposed to a previous and current state
previously. An FD can have up to 8 different states for each direction,
each of which being the result of a 3-bit combination. These 3 bits
indicate a wish to access the FD, the readiness of the FD and the
subscription of the FD to the polling system.
This means that it will now be possible to remember the state of a
file descriptor across disable/enable sequences that generally happen
during forwarding, where enabling reading on a previously disabled FD
would result in forgetting the EAGAIN flag it met last time.
Several new state manipulation functions have been introduced or
adapted :
- fd_want_{recv,send} : enable receiving/sending on the FD regardless
of its state (sets the ACTIVE flag) ;
- fd_stop_{recv,send} : stop receiving/sending on the FD regardless
of its state (clears the ACTIVE flag) ;
- fd_cant_{recv,send} : report a failure to receive/send on the FD
corresponding to EAGAIN (clears the READY flag) ;
- fd_may_{recv,send} : report the ability to receive/send on the FD
as reported by poll() (sets the READY flag) ;
Some functions are used to report the current FD status :
- fd_{recv,send}_active
- fd_{recv,send}_ready
- fd_{recv,send}_polled
Some functions were removed :
- fd_ev_clr(), fd_ev_set(), fd_ev_rem(), fd_ev_wai()
The POLLHUP/POLLERR flags are now reported as ready so that the I/O layers
knows it can try to access the file descriptor to get this information.
In order to simplify the conditions to add/remove cache entries, a new
function fd_alloc_or_release_cache_entry() was created to be used from
pollers while scanning for updates.
The following pollers have been updated :
ev_select() : done, built, tested on Linux 3.10
ev_poll() : done, built, tested on Linux 3.10
ev_epoll() : done, built, tested on Linux 3.10 & 3.13
ev_kqueue() : done, built, tested on OpenBSD 5.2
2014-01-10 15:58:45 +00:00
|
|
|
/* Disable processing send events on fd <fd> */
|
|
|
|
static inline void fd_stop_send(int fd)
|
2012-11-11 15:05:19 +00:00
|
|
|
{
|
2019-09-04 11:22:50 +00:00
|
|
|
if (!(fdtab[fd].state & FD_EV_ACTIVE_W) ||
|
|
|
|
!HA_ATOMIC_BTR(&fdtab[fd].state, FD_EV_ACTIVE_W_BIT))
|
|
|
|
return;
|
2012-11-11 15:05:19 +00:00
|
|
|
}
|
|
|
|
|
MAJOR: polling: rework the whole polling system
This commit heavily changes the polling system in order to definitely
fix the frequent breakage of SSL which needs to remember the last
EAGAIN before deciding whether to poll or not. Now we have a state per
direction for each FD, as opposed to a previous and current state
previously. An FD can have up to 8 different states for each direction,
each of which being the result of a 3-bit combination. These 3 bits
indicate a wish to access the FD, the readiness of the FD and the
subscription of the FD to the polling system.
This means that it will now be possible to remember the state of a
file descriptor across disable/enable sequences that generally happen
during forwarding, where enabling reading on a previously disabled FD
would result in forgetting the EAGAIN flag it met last time.
Several new state manipulation functions have been introduced or
adapted :
- fd_want_{recv,send} : enable receiving/sending on the FD regardless
of its state (sets the ACTIVE flag) ;
- fd_stop_{recv,send} : stop receiving/sending on the FD regardless
of its state (clears the ACTIVE flag) ;
- fd_cant_{recv,send} : report a failure to receive/send on the FD
corresponding to EAGAIN (clears the READY flag) ;
- fd_may_{recv,send} : report the ability to receive/send on the FD
as reported by poll() (sets the READY flag) ;
Some functions are used to report the current FD status :
- fd_{recv,send}_active
- fd_{recv,send}_ready
- fd_{recv,send}_polled
Some functions were removed :
- fd_ev_clr(), fd_ev_set(), fd_ev_rem(), fd_ev_wai()
The POLLHUP/POLLERR flags are now reported as ready so that the I/O layers
knows it can try to access the file descriptor to get this information.
In order to simplify the conditions to add/remove cache entries, a new
function fd_alloc_or_release_cache_entry() was created to be used from
pollers while scanning for updates.
The following pollers have been updated :
ev_select() : done, built, tested on Linux 3.10
ev_poll() : done, built, tested on Linux 3.10
ev_epoll() : done, built, tested on Linux 3.10 & 3.13
ev_kqueue() : done, built, tested on OpenBSD 5.2
2014-01-10 15:58:45 +00:00
|
|
|
/* Disable processing of events on fd <fd> for both directions. */
|
|
|
|
static inline void fd_stop_both(int fd)
|
2012-08-09 10:11:58 +00:00
|
|
|
{
|
2018-01-17 20:25:57 +00:00
|
|
|
unsigned char old, new;
|
|
|
|
|
|
|
|
old = fdtab[fd].state;
|
|
|
|
do {
|
|
|
|
if (!(old & FD_EV_ACTIVE_RW))
|
|
|
|
return;
|
|
|
|
new = old & ~FD_EV_ACTIVE_RW;
|
2019-03-08 17:47:42 +00:00
|
|
|
} while (unlikely(!_HA_ATOMIC_CAS(&fdtab[fd].state, &old, new)));
|
2012-08-09 10:11:58 +00:00
|
|
|
}
|
|
|
|
|
MAJOR: polling: rework the whole polling system
This commit heavily changes the polling system in order to definitely
fix the frequent breakage of SSL which needs to remember the last
EAGAIN before deciding whether to poll or not. Now we have a state per
direction for each FD, as opposed to a previous and current state
previously. An FD can have up to 8 different states for each direction,
each of which being the result of a 3-bit combination. These 3 bits
indicate a wish to access the FD, the readiness of the FD and the
subscription of the FD to the polling system.
This means that it will now be possible to remember the state of a
file descriptor across disable/enable sequences that generally happen
during forwarding, where enabling reading on a previously disabled FD
would result in forgetting the EAGAIN flag it met last time.
Several new state manipulation functions have been introduced or
adapted :
- fd_want_{recv,send} : enable receiving/sending on the FD regardless
of its state (sets the ACTIVE flag) ;
- fd_stop_{recv,send} : stop receiving/sending on the FD regardless
of its state (clears the ACTIVE flag) ;
- fd_cant_{recv,send} : report a failure to receive/send on the FD
corresponding to EAGAIN (clears the READY flag) ;
- fd_may_{recv,send} : report the ability to receive/send on the FD
as reported by poll() (sets the READY flag) ;
Some functions are used to report the current FD status :
- fd_{recv,send}_active
- fd_{recv,send}_ready
- fd_{recv,send}_polled
Some functions were removed :
- fd_ev_clr(), fd_ev_set(), fd_ev_rem(), fd_ev_wai()
The POLLHUP/POLLERR flags are now reported as ready so that the I/O layers
knows it can try to access the file descriptor to get this information.
In order to simplify the conditions to add/remove cache entries, a new
function fd_alloc_or_release_cache_entry() was created to be used from
pollers while scanning for updates.
The following pollers have been updated :
ev_select() : done, built, tested on Linux 3.10
ev_poll() : done, built, tested on Linux 3.10
ev_epoll() : done, built, tested on Linux 3.10 & 3.13
ev_kqueue() : done, built, tested on OpenBSD 5.2
2014-01-10 15:58:45 +00:00
|
|
|
/* Report that FD <fd> cannot receive anymore without polling (EAGAIN detected). */
|
|
|
|
static inline void fd_cant_recv(const int fd)
|
2012-08-09 10:11:58 +00:00
|
|
|
{
|
2019-09-04 11:22:50 +00:00
|
|
|
/* marking ready never changes polled status */
|
|
|
|
if (!(fdtab[fd].state & FD_EV_READY_R) ||
|
|
|
|
!HA_ATOMIC_BTR(&fdtab[fd].state, FD_EV_READY_R_BIT))
|
|
|
|
return;
|
2012-08-09 10:11:58 +00:00
|
|
|
}
|
|
|
|
|
BUG/MEDIUM: fd/threads: fix excessive CPU usage on multi-thread accept
While experimenting with potentially improved fairness and latency using
ticket locks on a Ryzen 16-thread/8-core, a very strange situation happened
a lot for some levels of traffic. Around 300k connections per second, no
more connections would be accepted on the multi-threaded listener but all
others would continue to work fine. All attempts to trace showed that the
threads were all in the trylock in the fd cache, or in the spinlock of
fd_update_events(), or in the one of fd_may_recv(). But as indicated this
was not a deadlock since the process continues to work fine.
After quite some investigation it appeared that the issue is caused by a
lack of fairness between the fdcache's trylock and these functions' spin
locks above. In fact, regardless of the success or failure of the fdcache's
attempt at grabbing the lock, the poller was calling fd_update_events()
which locks the FD once for something that can be done with a CAS, and
then calls fd_may_recv() with another lock for something that most often
didn't change. The high contention on these spinlocks leaves no chance to
any other thread to grab the lock using trylock(), and once this happens,
there is no thread left to process incoming connection events nor to stop
polling on the FD, leaving all threads at 100% CPU but partially operational.
This patch addresses the issue by using bit-test-and-set instead of the OR
in fd_may_recv() / fd_may_send() so that nothing is done if the FD was
already configured as expected. It does the same in fd_update_events()
using a CAS to check if the FD's events need to be changed at all or not.
With this patch applied, it became impossible to reproduce the issue, and
now there's no way to saturate all 16 CPUs with the load used for testing,
as no more than 1350-1400 were noticed at 300+kcps vs 1600.
Ideally this patch should go further and try to remove the remaining
incarnations of the fdlock as this seems possible, but it's difficult
enough to be done in a distinct patch that will not have to be backported.
It is possible that workloads involving a high connection rate may slightly
benefit from this patch and observe a slightly lower CPU usage even when
the service doesn't misbehave.
This patch must be backported to 2.0 and 1.9.
2019-07-08 21:09:03 +00:00
|
|
|
/* Report that FD <fd> may receive again without polling. */
|
MAJOR: polling: rework the whole polling system
This commit heavily changes the polling system in order to definitely
fix the frequent breakage of SSL which needs to remember the last
EAGAIN before deciding whether to poll or not. Now we have a state per
direction for each FD, as opposed to a previous and current state
previously. An FD can have up to 8 different states for each direction,
each of which being the result of a 3-bit combination. These 3 bits
indicate a wish to access the FD, the readiness of the FD and the
subscription of the FD to the polling system.
This means that it will now be possible to remember the state of a
file descriptor across disable/enable sequences that generally happen
during forwarding, where enabling reading on a previously disabled FD
would result in forgetting the EAGAIN flag it met last time.
Several new state manipulation functions have been introduced or
adapted :
- fd_want_{recv,send} : enable receiving/sending on the FD regardless
of its state (sets the ACTIVE flag) ;
- fd_stop_{recv,send} : stop receiving/sending on the FD regardless
of its state (clears the ACTIVE flag) ;
- fd_cant_{recv,send} : report a failure to receive/send on the FD
corresponding to EAGAIN (clears the READY flag) ;
- fd_may_{recv,send} : report the ability to receive/send on the FD
as reported by poll() (sets the READY flag) ;
Some functions are used to report the current FD status :
- fd_{recv,send}_active
- fd_{recv,send}_ready
- fd_{recv,send}_polled
Some functions were removed :
- fd_ev_clr(), fd_ev_set(), fd_ev_rem(), fd_ev_wai()
The POLLHUP/POLLERR flags are now reported as ready so that the I/O layers
knows it can try to access the file descriptor to get this information.
In order to simplify the conditions to add/remove cache entries, a new
function fd_alloc_or_release_cache_entry() was created to be used from
pollers while scanning for updates.
The following pollers have been updated :
ev_select() : done, built, tested on Linux 3.10
ev_poll() : done, built, tested on Linux 3.10
ev_epoll() : done, built, tested on Linux 3.10 & 3.13
ev_kqueue() : done, built, tested on OpenBSD 5.2
2014-01-10 15:58:45 +00:00
|
|
|
static inline void fd_may_recv(const int fd)
|
2012-08-09 10:14:03 +00:00
|
|
|
{
|
2018-01-17 20:25:57 +00:00
|
|
|
/* marking ready never changes polled status */
|
BUG/MEDIUM: fd/threads: fix excessive CPU usage on multi-thread accept
While experimenting with potentially improved fairness and latency using
ticket locks on a Ryzen 16-thread/8-core, a very strange situation happened
a lot for some levels of traffic. Around 300k connections per second, no
more connections would be accepted on the multi-threaded listener but all
others would continue to work fine. All attempts to trace showed that the
threads were all in the trylock in the fd cache, or in the spinlock of
fd_update_events(), or in the one of fd_may_recv(). But as indicated this
was not a deadlock since the process continues to work fine.
After quite some investigation it appeared that the issue is caused by a
lack of fairness between the fdcache's trylock and these functions' spin
locks above. In fact, regardless of the success or failure of the fdcache's
attempt at grabbing the lock, the poller was calling fd_update_events()
which locks the FD once for something that can be done with a CAS, and
then calls fd_may_recv() with another lock for something that most often
didn't change. The high contention on these spinlocks leaves no chance to
any other thread to grab the lock using trylock(), and once this happens,
there is no thread left to process incoming connection events nor to stop
polling on the FD, leaving all threads at 100% CPU but partially operational.
This patch addresses the issue by using bit-test-and-set instead of the OR
in fd_may_recv() / fd_may_send() so that nothing is done if the FD was
already configured as expected. It does the same in fd_update_events()
using a CAS to check if the FD's events need to be changed at all or not.
With this patch applied, it became impossible to reproduce the issue, and
now there's no way to saturate all 16 CPUs with the load used for testing,
as no more than 1350-1400 were noticed at 300+kcps vs 1600.
Ideally this patch should go further and try to remove the remaining
incarnations of the fdlock as this seems possible, but it's difficult
enough to be done in a distinct patch that will not have to be backported.
It is possible that workloads involving a high connection rate may slightly
benefit from this patch and observe a slightly lower CPU usage even when
the service doesn't misbehave.
This patch must be backported to 2.0 and 1.9.
2019-07-08 21:09:03 +00:00
|
|
|
if ((fdtab[fd].state & FD_EV_READY_R) ||
|
|
|
|
HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_READY_R_BIT))
|
|
|
|
return;
|
2012-08-09 10:14:03 +00:00
|
|
|
}
|
|
|
|
|
2019-09-05 14:39:21 +00:00
|
|
|
/* Report that FD <fd> may receive again without polling but only if its not
|
|
|
|
* active yet. This is in order to speculatively try to enable I/Os when it's
|
|
|
|
* highly likely that these will succeed, but without interfering with polling.
|
|
|
|
*/
|
|
|
|
static inline void fd_cond_recv(const int fd)
|
|
|
|
{
|
|
|
|
if ((fdtab[fd].state & (FD_EV_ACTIVE_R|FD_EV_READY_R)) == 0)
|
|
|
|
HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_READY_R_BIT);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Report that FD <fd> may send again without polling but only if its not
|
|
|
|
* active yet. This is in order to speculatively try to enable I/Os when it's
|
|
|
|
* highly likely that these will succeed, but without interfering with polling.
|
|
|
|
*/
|
|
|
|
static inline void fd_cond_send(const int fd)
|
|
|
|
{
|
|
|
|
if ((fdtab[fd].state & (FD_EV_ACTIVE_W|FD_EV_READY_W)) == 0)
|
|
|
|
HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_READY_W_BIT);
|
|
|
|
}
|
|
|
|
|
2019-09-05 14:30:39 +00:00
|
|
|
/* Report that FD <fd> may receive and send without polling. Used at FD
|
|
|
|
* initialization.
|
|
|
|
*/
|
|
|
|
static inline void fd_may_both(const int fd)
|
|
|
|
{
|
|
|
|
HA_ATOMIC_OR(&fdtab[fd].state, FD_EV_READY_RW);
|
|
|
|
}
|
|
|
|
|
2019-09-04 11:22:50 +00:00
|
|
|
/* Disable readiness when active. This is useful to interrupt reading when it
|
2014-01-23 23:54:27 +00:00
|
|
|
* is suspected that the end of data might have been reached (eg: short read).
|
|
|
|
* This can only be done using level-triggered pollers, so if any edge-triggered
|
|
|
|
* is ever implemented, a test will have to be added here.
|
|
|
|
*/
|
|
|
|
static inline void fd_done_recv(const int fd)
|
|
|
|
{
|
2019-09-04 11:22:50 +00:00
|
|
|
/* removing ready never changes polled status */
|
|
|
|
if ((fdtab[fd].state & (FD_EV_ACTIVE_R|FD_EV_READY_R)) != (FD_EV_ACTIVE_R|FD_EV_READY_R) ||
|
|
|
|
!HA_ATOMIC_BTR(&fdtab[fd].state, FD_EV_READY_R_BIT))
|
|
|
|
return;
|
2014-01-23 23:54:27 +00:00
|
|
|
}
|
|
|
|
|
MAJOR: polling: rework the whole polling system
This commit heavily changes the polling system in order to definitely
fix the frequent breakage of SSL which needs to remember the last
EAGAIN before deciding whether to poll or not. Now we have a state per
direction for each FD, as opposed to a previous and current state
previously. An FD can have up to 8 different states for each direction,
each of which being the result of a 3-bit combination. These 3 bits
indicate a wish to access the FD, the readiness of the FD and the
subscription of the FD to the polling system.
This means that it will now be possible to remember the state of a
file descriptor across disable/enable sequences that generally happen
during forwarding, where enabling reading on a previously disabled FD
would result in forgetting the EAGAIN flag it met last time.
Several new state manipulation functions have been introduced or
adapted :
- fd_want_{recv,send} : enable receiving/sending on the FD regardless
of its state (sets the ACTIVE flag) ;
- fd_stop_{recv,send} : stop receiving/sending on the FD regardless
of its state (clears the ACTIVE flag) ;
- fd_cant_{recv,send} : report a failure to receive/send on the FD
corresponding to EAGAIN (clears the READY flag) ;
- fd_may_{recv,send} : report the ability to receive/send on the FD
as reported by poll() (sets the READY flag) ;
Some functions are used to report the current FD status :
- fd_{recv,send}_active
- fd_{recv,send}_ready
- fd_{recv,send}_polled
Some functions were removed :
- fd_ev_clr(), fd_ev_set(), fd_ev_rem(), fd_ev_wai()
The POLLHUP/POLLERR flags are now reported as ready so that the I/O layers
knows it can try to access the file descriptor to get this information.
In order to simplify the conditions to add/remove cache entries, a new
function fd_alloc_or_release_cache_entry() was created to be used from
pollers while scanning for updates.
The following pollers have been updated :
ev_select() : done, built, tested on Linux 3.10
ev_poll() : done, built, tested on Linux 3.10
ev_epoll() : done, built, tested on Linux 3.10 & 3.13
ev_kqueue() : done, built, tested on OpenBSD 5.2
2014-01-10 15:58:45 +00:00
|
|
|
/* Report that FD <fd> cannot send anymore without polling (EAGAIN detected). */
|
|
|
|
static inline void fd_cant_send(const int fd)
|
2012-08-09 10:11:58 +00:00
|
|
|
{
|
2019-09-04 11:22:50 +00:00
|
|
|
/* removing ready never changes polled status */
|
|
|
|
if (!(fdtab[fd].state & FD_EV_READY_W) ||
|
|
|
|
!HA_ATOMIC_BTR(&fdtab[fd].state, FD_EV_READY_W_BIT))
|
|
|
|
return;
|
2012-08-09 10:11:58 +00:00
|
|
|
}
|
|
|
|
|
BUG/MEDIUM: fd/threads: fix excessive CPU usage on multi-thread accept
While experimenting with potentially improved fairness and latency using
ticket locks on a Ryzen 16-thread/8-core, a very strange situation happened
a lot for some levels of traffic. Around 300k connections per second, no
more connections would be accepted on the multi-threaded listener but all
others would continue to work fine. All attempts to trace showed that the
threads were all in the trylock in the fd cache, or in the spinlock of
fd_update_events(), or in the one of fd_may_recv(). But as indicated this
was not a deadlock since the process continues to work fine.
After quite some investigation it appeared that the issue is caused by a
lack of fairness between the fdcache's trylock and these functions' spin
locks above. In fact, regardless of the success or failure of the fdcache's
attempt at grabbing the lock, the poller was calling fd_update_events()
which locks the FD once for something that can be done with a CAS, and
then calls fd_may_recv() with another lock for something that most often
didn't change. The high contention on these spinlocks leaves no chance to
any other thread to grab the lock using trylock(), and once this happens,
there is no thread left to process incoming connection events nor to stop
polling on the FD, leaving all threads at 100% CPU but partially operational.
This patch addresses the issue by using bit-test-and-set instead of the OR
in fd_may_recv() / fd_may_send() so that nothing is done if the FD was
already configured as expected. It does the same in fd_update_events()
using a CAS to check if the FD's events need to be changed at all or not.
With this patch applied, it became impossible to reproduce the issue, and
now there's no way to saturate all 16 CPUs with the load used for testing,
as no more than 1350-1400 were noticed at 300+kcps vs 1600.
Ideally this patch should go further and try to remove the remaining
incarnations of the fdlock as this seems possible, but it's difficult
enough to be done in a distinct patch that will not have to be backported.
It is possible that workloads involving a high connection rate may slightly
benefit from this patch and observe a slightly lower CPU usage even when
the service doesn't misbehave.
This patch must be backported to 2.0 and 1.9.
2019-07-08 21:09:03 +00:00
|
|
|
/* Report that FD <fd> may send again without polling (EAGAIN not detected). */
|
MAJOR: polling: rework the whole polling system
This commit heavily changes the polling system in order to definitely
fix the frequent breakage of SSL which needs to remember the last
EAGAIN before deciding whether to poll or not. Now we have a state per
direction for each FD, as opposed to a previous and current state
previously. An FD can have up to 8 different states for each direction,
each of which being the result of a 3-bit combination. These 3 bits
indicate a wish to access the FD, the readiness of the FD and the
subscription of the FD to the polling system.
This means that it will now be possible to remember the state of a
file descriptor across disable/enable sequences that generally happen
during forwarding, where enabling reading on a previously disabled FD
would result in forgetting the EAGAIN flag it met last time.
Several new state manipulation functions have been introduced or
adapted :
- fd_want_{recv,send} : enable receiving/sending on the FD regardless
of its state (sets the ACTIVE flag) ;
- fd_stop_{recv,send} : stop receiving/sending on the FD regardless
of its state (clears the ACTIVE flag) ;
- fd_cant_{recv,send} : report a failure to receive/send on the FD
corresponding to EAGAIN (clears the READY flag) ;
- fd_may_{recv,send} : report the ability to receive/send on the FD
as reported by poll() (sets the READY flag) ;
Some functions are used to report the current FD status :
- fd_{recv,send}_active
- fd_{recv,send}_ready
- fd_{recv,send}_polled
Some functions were removed :
- fd_ev_clr(), fd_ev_set(), fd_ev_rem(), fd_ev_wai()
The POLLHUP/POLLERR flags are now reported as ready so that the I/O layers
knows it can try to access the file descriptor to get this information.
In order to simplify the conditions to add/remove cache entries, a new
function fd_alloc_or_release_cache_entry() was created to be used from
pollers while scanning for updates.
The following pollers have been updated :
ev_select() : done, built, tested on Linux 3.10
ev_poll() : done, built, tested on Linux 3.10
ev_epoll() : done, built, tested on Linux 3.10 & 3.13
ev_kqueue() : done, built, tested on OpenBSD 5.2
2014-01-10 15:58:45 +00:00
|
|
|
static inline void fd_may_send(const int fd)
|
2012-08-09 10:11:58 +00:00
|
|
|
{
|
2018-01-17 20:25:57 +00:00
|
|
|
/* marking ready never changes polled status */
|
BUG/MEDIUM: fd/threads: fix excessive CPU usage on multi-thread accept
While experimenting with potentially improved fairness and latency using
ticket locks on a Ryzen 16-thread/8-core, a very strange situation happened
a lot for some levels of traffic. Around 300k connections per second, no
more connections would be accepted on the multi-threaded listener but all
others would continue to work fine. All attempts to trace showed that the
threads were all in the trylock in the fd cache, or in the spinlock of
fd_update_events(), or in the one of fd_may_recv(). But as indicated this
was not a deadlock since the process continues to work fine.
After quite some investigation it appeared that the issue is caused by a
lack of fairness between the fdcache's trylock and these functions' spin
locks above. In fact, regardless of the success or failure of the fdcache's
attempt at grabbing the lock, the poller was calling fd_update_events()
which locks the FD once for something that can be done with a CAS, and
then calls fd_may_recv() with another lock for something that most often
didn't change. The high contention on these spinlocks leaves no chance to
any other thread to grab the lock using trylock(), and once this happens,
there is no thread left to process incoming connection events nor to stop
polling on the FD, leaving all threads at 100% CPU but partially operational.
This patch addresses the issue by using bit-test-and-set instead of the OR
in fd_may_recv() / fd_may_send() so that nothing is done if the FD was
already configured as expected. It does the same in fd_update_events()
using a CAS to check if the FD's events need to be changed at all or not.
With this patch applied, it became impossible to reproduce the issue, and
now there's no way to saturate all 16 CPUs with the load used for testing,
as no more than 1350-1400 were noticed at 300+kcps vs 1600.
Ideally this patch should go further and try to remove the remaining
incarnations of the fdlock as this seems possible, but it's difficult
enough to be done in a distinct patch that will not have to be backported.
It is possible that workloads involving a high connection rate may slightly
benefit from this patch and observe a slightly lower CPU usage even when
the service doesn't misbehave.
This patch must be backported to 2.0 and 1.9.
2019-07-08 21:09:03 +00:00
|
|
|
if ((fdtab[fd].state & FD_EV_READY_W) ||
|
|
|
|
HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_READY_W_BIT))
|
|
|
|
return;
|
2012-08-09 10:11:58 +00:00
|
|
|
}
|
|
|
|
|
MAJOR: polling: rework the whole polling system
This commit heavily changes the polling system in order to definitely
fix the frequent breakage of SSL which needs to remember the last
EAGAIN before deciding whether to poll or not. Now we have a state per
direction for each FD, as opposed to a previous and current state
previously. An FD can have up to 8 different states for each direction,
each of which being the result of a 3-bit combination. These 3 bits
indicate a wish to access the FD, the readiness of the FD and the
subscription of the FD to the polling system.
This means that it will now be possible to remember the state of a
file descriptor across disable/enable sequences that generally happen
during forwarding, where enabling reading on a previously disabled FD
would result in forgetting the EAGAIN flag it met last time.
Several new state manipulation functions have been introduced or
adapted :
- fd_want_{recv,send} : enable receiving/sending on the FD regardless
of its state (sets the ACTIVE flag) ;
- fd_stop_{recv,send} : stop receiving/sending on the FD regardless
of its state (clears the ACTIVE flag) ;
- fd_cant_{recv,send} : report a failure to receive/send on the FD
corresponding to EAGAIN (clears the READY flag) ;
- fd_may_{recv,send} : report the ability to receive/send on the FD
as reported by poll() (sets the READY flag) ;
Some functions are used to report the current FD status :
- fd_{recv,send}_active
- fd_{recv,send}_ready
- fd_{recv,send}_polled
Some functions were removed :
- fd_ev_clr(), fd_ev_set(), fd_ev_rem(), fd_ev_wai()
The POLLHUP/POLLERR flags are now reported as ready so that the I/O layers
knows it can try to access the file descriptor to get this information.
In order to simplify the conditions to add/remove cache entries, a new
function fd_alloc_or_release_cache_entry() was created to be used from
pollers while scanning for updates.
The following pollers have been updated :
ev_select() : done, built, tested on Linux 3.10
ev_poll() : done, built, tested on Linux 3.10
ev_epoll() : done, built, tested on Linux 3.10 & 3.13
ev_kqueue() : done, built, tested on OpenBSD 5.2
2014-01-10 15:58:45 +00:00
|
|
|
/* Prepare FD <fd> to try to receive */
|
|
|
|
static inline void fd_want_recv(int fd)
|
2012-08-09 10:14:03 +00:00
|
|
|
{
|
2019-09-04 11:22:50 +00:00
|
|
|
if ((fdtab[fd].state & FD_EV_ACTIVE_R) ||
|
|
|
|
HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_ACTIVE_R_BIT))
|
|
|
|
return;
|
2019-09-04 07:52:57 +00:00
|
|
|
updt_fd_polling(fd);
|
2012-08-09 10:14:03 +00:00
|
|
|
}
|
|
|
|
|
MAJOR: polling: rework the whole polling system
This commit heavily changes the polling system in order to definitely
fix the frequent breakage of SSL which needs to remember the last
EAGAIN before deciding whether to poll or not. Now we have a state per
direction for each FD, as opposed to a previous and current state
previously. An FD can have up to 8 different states for each direction,
each of which being the result of a 3-bit combination. These 3 bits
indicate a wish to access the FD, the readiness of the FD and the
subscription of the FD to the polling system.
This means that it will now be possible to remember the state of a
file descriptor across disable/enable sequences that generally happen
during forwarding, where enabling reading on a previously disabled FD
would result in forgetting the EAGAIN flag it met last time.
Several new state manipulation functions have been introduced or
adapted :
- fd_want_{recv,send} : enable receiving/sending on the FD regardless
of its state (sets the ACTIVE flag) ;
- fd_stop_{recv,send} : stop receiving/sending on the FD regardless
of its state (clears the ACTIVE flag) ;
- fd_cant_{recv,send} : report a failure to receive/send on the FD
corresponding to EAGAIN (clears the READY flag) ;
- fd_may_{recv,send} : report the ability to receive/send on the FD
as reported by poll() (sets the READY flag) ;
Some functions are used to report the current FD status :
- fd_{recv,send}_active
- fd_{recv,send}_ready
- fd_{recv,send}_polled
Some functions were removed :
- fd_ev_clr(), fd_ev_set(), fd_ev_rem(), fd_ev_wai()
The POLLHUP/POLLERR flags are now reported as ready so that the I/O layers
knows it can try to access the file descriptor to get this information.
In order to simplify the conditions to add/remove cache entries, a new
function fd_alloc_or_release_cache_entry() was created to be used from
pollers while scanning for updates.
The following pollers have been updated :
ev_select() : done, built, tested on Linux 3.10
ev_poll() : done, built, tested on Linux 3.10
ev_epoll() : done, built, tested on Linux 3.10 & 3.13
ev_kqueue() : done, built, tested on OpenBSD 5.2
2014-01-10 15:58:45 +00:00
|
|
|
/* Prepare FD <fd> to try to send */
|
|
|
|
static inline void fd_want_send(int fd)
|
2012-08-09 10:11:58 +00:00
|
|
|
{
|
2019-09-04 11:22:50 +00:00
|
|
|
if ((fdtab[fd].state & FD_EV_ACTIVE_W) ||
|
|
|
|
HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_ACTIVE_W_BIT))
|
|
|
|
return;
|
2019-09-04 07:52:57 +00:00
|
|
|
updt_fd_polling(fd);
|
2012-08-09 10:11:58 +00:00
|
|
|
}
|
2006-10-15 12:52:29 +00:00
|
|
|
|
2020-03-05 17:10:51 +00:00
|
|
|
/* Set the fd as currently running on the current thread.
|
2020-04-16 18:51:34 +00:00
|
|
|
* Returns 0 if all goes well, or -1 if we no longer own the fd, and should
|
2020-03-05 17:10:51 +00:00
|
|
|
* do nothing with it.
|
|
|
|
*/
|
|
|
|
static inline int fd_set_running(int fd)
|
2020-02-27 16:26:13 +00:00
|
|
|
{
|
2020-03-05 17:10:51 +00:00
|
|
|
#ifndef HA_HAVE_CAS_DW
|
|
|
|
HA_RWLOCK_RDLOCK(OTHER_LOCK, &fd_mig_lock);
|
|
|
|
if (!(fdtab[fd].thread_mask & tid_bit)) {
|
|
|
|
HA_RWLOCK_RDUNLOCK(OTHER_LOCK, &fd_mig_lock);
|
|
|
|
return -1;
|
|
|
|
}
|
2020-02-27 16:26:13 +00:00
|
|
|
_HA_ATOMIC_OR(&fdtab[fd].running_mask, tid_bit);
|
2020-03-05 17:10:51 +00:00
|
|
|
HA_RWLOCK_RDUNLOCK(OTHER_LOCK, &fd_mig_lock);
|
|
|
|
return 0;
|
|
|
|
#else
|
|
|
|
unsigned long old_masks[2];
|
|
|
|
unsigned long new_masks[2];
|
|
|
|
old_masks[0] = fdtab[fd].running_mask;
|
|
|
|
old_masks[1] = fdtab[fd].thread_mask;
|
|
|
|
do {
|
|
|
|
if (!(old_masks[1] & tid_bit))
|
|
|
|
return -1;
|
|
|
|
new_masks[0] = fdtab[fd].running_mask | tid_bit;
|
|
|
|
new_masks[1] = old_masks[1];
|
|
|
|
|
|
|
|
} while (!(HA_ATOMIC_DWCAS(&fdtab[fd].running_mask, &old_masks, &new_masks)));
|
|
|
|
return 0;
|
|
|
|
#endif
|
2020-02-27 16:26:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void fd_set_running_excl(int fd)
|
|
|
|
{
|
|
|
|
unsigned long old_mask = 0;
|
|
|
|
while (!_HA_ATOMIC_CAS(&fdtab[fd].running_mask, &old_mask, tid_bit));
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static inline void fd_clr_running(int fd)
|
|
|
|
{
|
|
|
|
_HA_ATOMIC_AND(&fdtab[fd].running_mask, ~tid_bit);
|
|
|
|
}
|
|
|
|
|
2019-09-06 17:05:50 +00:00
|
|
|
/* Update events seen for FD <fd> and its state if needed. This should be
|
|
|
|
* called by the poller, passing FD_EV_*_{R,W,RW} in <evts>. FD_EV_ERR_*
|
|
|
|
* doesn't need to also pass FD_EV_SHUT_*, it's implied. ERR and SHUT are
|
|
|
|
* allowed to be reported regardless of R/W readiness.
|
|
|
|
*/
|
|
|
|
static inline void fd_update_events(int fd, unsigned char evts)
|
2017-08-30 08:30:04 +00:00
|
|
|
{
|
BUG/MAJOR: fd/threads, task/threads: ensure all spin locks are unlocked
Calculate if the fd or task should be locked once, before locking, and
reuse the calculation when determing when to unlock.
Fixes a race condition added in 87d54a9a for fds, and b20aa9ee for tasks,
released in 1.9-dev4. When one thread modifies thread_mask to be a single
thread for a task or fd while a second thread has locked or is waiting on a
lock for that task or fd, the second thread will not unlock it. For FDs,
this is observable when a listener is polled by multiple threads, and is
closed while those threads have events pending. For tasks, this seems
possible, where task_set_affinity is called, but I did not observe it.
This must be backported to 1.9.
2019-02-20 20:43:45 +00:00
|
|
|
unsigned long locked = atleast2(fdtab[fd].thread_mask);
|
BUG/MEDIUM: fd/threads: fix excessive CPU usage on multi-thread accept
While experimenting with potentially improved fairness and latency using
ticket locks on a Ryzen 16-thread/8-core, a very strange situation happened
a lot for some levels of traffic. Around 300k connections per second, no
more connections would be accepted on the multi-threaded listener but all
others would continue to work fine. All attempts to trace showed that the
threads were all in the trylock in the fd cache, or in the spinlock of
fd_update_events(), or in the one of fd_may_recv(). But as indicated this
was not a deadlock since the process continues to work fine.
After quite some investigation it appeared that the issue is caused by a
lack of fairness between the fdcache's trylock and these functions' spin
locks above. In fact, regardless of the success or failure of the fdcache's
attempt at grabbing the lock, the poller was calling fd_update_events()
which locks the FD once for something that can be done with a CAS, and
then calls fd_may_recv() with another lock for something that most often
didn't change. The high contention on these spinlocks leaves no chance to
any other thread to grab the lock using trylock(), and once this happens,
there is no thread left to process incoming connection events nor to stop
polling on the FD, leaving all threads at 100% CPU but partially operational.
This patch addresses the issue by using bit-test-and-set instead of the OR
in fd_may_recv() / fd_may_send() so that nothing is done if the FD was
already configured as expected. It does the same in fd_update_events()
using a CAS to check if the FD's events need to be changed at all or not.
With this patch applied, it became impossible to reproduce the issue, and
now there's no way to saturate all 16 CPUs with the load used for testing,
as no more than 1350-1400 were noticed at 300+kcps vs 1600.
Ideally this patch should go further and try to remove the remaining
incarnations of the fdlock as this seems possible, but it's difficult
enough to be done in a distinct patch that will not have to be backported.
It is possible that workloads involving a high connection rate may slightly
benefit from this patch and observe a slightly lower CPU usage even when
the service doesn't misbehave.
This patch must be backported to 2.0 and 1.9.
2019-07-08 21:09:03 +00:00
|
|
|
unsigned char old, new;
|
2019-12-27 14:52:34 +00:00
|
|
|
int new_flags, must_stop;
|
2019-09-06 17:05:50 +00:00
|
|
|
|
|
|
|
new_flags =
|
|
|
|
((evts & FD_EV_READY_R) ? FD_POLL_IN : 0) |
|
|
|
|
((evts & FD_EV_READY_W) ? FD_POLL_OUT : 0) |
|
|
|
|
((evts & FD_EV_SHUT_R) ? FD_POLL_HUP : 0) |
|
2020-02-26 15:12:45 +00:00
|
|
|
((evts & FD_EV_ERR_RW) ? FD_POLL_ERR : 0);
|
BUG/MAJOR: fd/threads, task/threads: ensure all spin locks are unlocked
Calculate if the fd or task should be locked once, before locking, and
reuse the calculation when determing when to unlock.
Fixes a race condition added in 87d54a9a for fds, and b20aa9ee for tasks,
released in 1.9-dev4. When one thread modifies thread_mask to be a single
thread for a task or fd while a second thread has locked or is waiting on a
lock for that task or fd, the second thread will not unlock it. For FDs,
this is observable when a listener is polled by multiple threads, and is
closed while those threads have events pending. For tasks, this seems
possible, where task_set_affinity is called, but I did not observe it.
This must be backported to 1.9.
2019-02-20 20:43:45 +00:00
|
|
|
|
2019-10-01 09:46:40 +00:00
|
|
|
/* SHUTW reported while FD was active for writes is an error */
|
|
|
|
if ((fdtab[fd].ev & FD_EV_ACTIVE_W) && (evts & FD_EV_SHUT_W))
|
|
|
|
new_flags |= FD_POLL_ERR;
|
|
|
|
|
2019-12-27 14:52:34 +00:00
|
|
|
/* compute the inactive events reported late that must be stopped */
|
|
|
|
must_stop = 0;
|
|
|
|
if (unlikely(!fd_active(fd))) {
|
|
|
|
/* both sides stopped */
|
|
|
|
must_stop = FD_POLL_IN | FD_POLL_OUT;
|
|
|
|
}
|
2020-02-26 15:12:45 +00:00
|
|
|
else if (unlikely(!fd_recv_active(fd) && (evts & (FD_EV_READY_R | FD_EV_SHUT_R | FD_EV_ERR_RW)))) {
|
2019-12-27 14:52:34 +00:00
|
|
|
/* only send remains */
|
|
|
|
must_stop = FD_POLL_IN;
|
|
|
|
}
|
2020-02-26 15:12:45 +00:00
|
|
|
else if (unlikely(!fd_send_active(fd) && (evts & (FD_EV_READY_W | FD_EV_SHUT_W | FD_EV_ERR_RW)))) {
|
2019-12-27 14:52:34 +00:00
|
|
|
/* only recv remains */
|
|
|
|
must_stop = FD_POLL_OUT;
|
|
|
|
}
|
|
|
|
|
BUG/MEDIUM: fd/threads: fix excessive CPU usage on multi-thread accept
While experimenting with potentially improved fairness and latency using
ticket locks on a Ryzen 16-thread/8-core, a very strange situation happened
a lot for some levels of traffic. Around 300k connections per second, no
more connections would be accepted on the multi-threaded listener but all
others would continue to work fine. All attempts to trace showed that the
threads were all in the trylock in the fd cache, or in the spinlock of
fd_update_events(), or in the one of fd_may_recv(). But as indicated this
was not a deadlock since the process continues to work fine.
After quite some investigation it appeared that the issue is caused by a
lack of fairness between the fdcache's trylock and these functions' spin
locks above. In fact, regardless of the success or failure of the fdcache's
attempt at grabbing the lock, the poller was calling fd_update_events()
which locks the FD once for something that can be done with a CAS, and
then calls fd_may_recv() with another lock for something that most often
didn't change. The high contention on these spinlocks leaves no chance to
any other thread to grab the lock using trylock(), and once this happens,
there is no thread left to process incoming connection events nor to stop
polling on the FD, leaving all threads at 100% CPU but partially operational.
This patch addresses the issue by using bit-test-and-set instead of the OR
in fd_may_recv() / fd_may_send() so that nothing is done if the FD was
already configured as expected. It does the same in fd_update_events()
using a CAS to check if the FD's events need to be changed at all or not.
With this patch applied, it became impossible to reproduce the issue, and
now there's no way to saturate all 16 CPUs with the load used for testing,
as no more than 1350-1400 were noticed at 300+kcps vs 1600.
Ideally this patch should go further and try to remove the remaining
incarnations of the fdlock as this seems possible, but it's difficult
enough to be done in a distinct patch that will not have to be backported.
It is possible that workloads involving a high connection rate may slightly
benefit from this patch and observe a slightly lower CPU usage even when
the service doesn't misbehave.
This patch must be backported to 2.0 and 1.9.
2019-07-08 21:09:03 +00:00
|
|
|
old = fdtab[fd].ev;
|
2019-09-06 17:05:50 +00:00
|
|
|
new = (old & FD_POLL_STICKY) | new_flags;
|
BUG/MEDIUM: fd/threads: fix excessive CPU usage on multi-thread accept
While experimenting with potentially improved fairness and latency using
ticket locks on a Ryzen 16-thread/8-core, a very strange situation happened
a lot for some levels of traffic. Around 300k connections per second, no
more connections would be accepted on the multi-threaded listener but all
others would continue to work fine. All attempts to trace showed that the
threads were all in the trylock in the fd cache, or in the spinlock of
fd_update_events(), or in the one of fd_may_recv(). But as indicated this
was not a deadlock since the process continues to work fine.
After quite some investigation it appeared that the issue is caused by a
lack of fairness between the fdcache's trylock and these functions' spin
locks above. In fact, regardless of the success or failure of the fdcache's
attempt at grabbing the lock, the poller was calling fd_update_events()
which locks the FD once for something that can be done with a CAS, and
then calls fd_may_recv() with another lock for something that most often
didn't change. The high contention on these spinlocks leaves no chance to
any other thread to grab the lock using trylock(), and once this happens,
there is no thread left to process incoming connection events nor to stop
polling on the FD, leaving all threads at 100% CPU but partially operational.
This patch addresses the issue by using bit-test-and-set instead of the OR
in fd_may_recv() / fd_may_send() so that nothing is done if the FD was
already configured as expected. It does the same in fd_update_events()
using a CAS to check if the FD's events need to be changed at all or not.
With this patch applied, it became impossible to reproduce the issue, and
now there's no way to saturate all 16 CPUs with the load used for testing,
as no more than 1350-1400 were noticed at 300+kcps vs 1600.
Ideally this patch should go further and try to remove the remaining
incarnations of the fdlock as this seems possible, but it's difficult
enough to be done in a distinct patch that will not have to be backported.
It is possible that workloads involving a high connection rate may slightly
benefit from this patch and observe a slightly lower CPU usage even when
the service doesn't misbehave.
This patch must be backported to 2.0 and 1.9.
2019-07-08 21:09:03 +00:00
|
|
|
|
|
|
|
if (unlikely(locked)) {
|
|
|
|
/* Locked FDs (those with more than 2 threads) are atomically updated */
|
|
|
|
while (unlikely(new != old && !_HA_ATOMIC_CAS(&fdtab[fd].ev, &old, new)))
|
2019-09-06 17:05:50 +00:00
|
|
|
new = (old & FD_POLL_STICKY) | new_flags;
|
BUG/MEDIUM: fd/threads: fix excessive CPU usage on multi-thread accept
While experimenting with potentially improved fairness and latency using
ticket locks on a Ryzen 16-thread/8-core, a very strange situation happened
a lot for some levels of traffic. Around 300k connections per second, no
more connections would be accepted on the multi-threaded listener but all
others would continue to work fine. All attempts to trace showed that the
threads were all in the trylock in the fd cache, or in the spinlock of
fd_update_events(), or in the one of fd_may_recv(). But as indicated this
was not a deadlock since the process continues to work fine.
After quite some investigation it appeared that the issue is caused by a
lack of fairness between the fdcache's trylock and these functions' spin
locks above. In fact, regardless of the success or failure of the fdcache's
attempt at grabbing the lock, the poller was calling fd_update_events()
which locks the FD once for something that can be done with a CAS, and
then calls fd_may_recv() with another lock for something that most often
didn't change. The high contention on these spinlocks leaves no chance to
any other thread to grab the lock using trylock(), and once this happens,
there is no thread left to process incoming connection events nor to stop
polling on the FD, leaving all threads at 100% CPU but partially operational.
This patch addresses the issue by using bit-test-and-set instead of the OR
in fd_may_recv() / fd_may_send() so that nothing is done if the FD was
already configured as expected. It does the same in fd_update_events()
using a CAS to check if the FD's events need to be changed at all or not.
With this patch applied, it became impossible to reproduce the issue, and
now there's no way to saturate all 16 CPUs with the load used for testing,
as no more than 1350-1400 were noticed at 300+kcps vs 1600.
Ideally this patch should go further and try to remove the remaining
incarnations of the fdlock as this seems possible, but it's difficult
enough to be done in a distinct patch that will not have to be backported.
It is possible that workloads involving a high connection rate may slightly
benefit from this patch and observe a slightly lower CPU usage even when
the service doesn't misbehave.
This patch must be backported to 2.0 and 1.9.
2019-07-08 21:09:03 +00:00
|
|
|
} else {
|
|
|
|
if (new != old)
|
|
|
|
fdtab[fd].ev = new;
|
|
|
|
}
|
2017-08-30 08:30:04 +00:00
|
|
|
|
|
|
|
if (fdtab[fd].ev & (FD_POLL_IN | FD_POLL_HUP | FD_POLL_ERR))
|
|
|
|
fd_may_recv(fd);
|
|
|
|
|
|
|
|
if (fdtab[fd].ev & (FD_POLL_OUT | FD_POLL_ERR))
|
|
|
|
fd_may_send(fd);
|
2019-07-24 16:07:06 +00:00
|
|
|
|
2020-02-27 16:26:13 +00:00
|
|
|
if (fdtab[fd].iocb && fd_active(fd)) {
|
2020-03-05 17:10:51 +00:00
|
|
|
if (fd_set_running(fd) == -1)
|
|
|
|
return;
|
2019-07-24 16:07:06 +00:00
|
|
|
fdtab[fd].iocb(fd);
|
2020-02-27 16:26:13 +00:00
|
|
|
fd_clr_running(fd);
|
|
|
|
}
|
2019-08-16 14:06:14 +00:00
|
|
|
|
2019-12-27 14:52:34 +00:00
|
|
|
/* we had to stop this FD and it still must be stopped after the I/O
|
|
|
|
* cb's changes, so let's program an update for this.
|
|
|
|
*/
|
|
|
|
if (must_stop && !(fdtab[fd].update_mask & tid_bit)) {
|
|
|
|
if (((must_stop & FD_POLL_IN) && !fd_recv_active(fd)) ||
|
|
|
|
((must_stop & FD_POLL_OUT) && !fd_send_active(fd)))
|
|
|
|
if (!HA_ATOMIC_BTS(&fdtab[fd].update_mask, tid))
|
|
|
|
fd_updt[fd_nbupdt++] = fd;
|
|
|
|
}
|
|
|
|
|
2019-08-16 14:06:14 +00:00
|
|
|
ti->flags &= ~TI_FL_STUCK; // this thread is still running
|
2017-08-30 08:30:04 +00:00
|
|
|
}
|
|
|
|
|
2008-01-18 16:20:13 +00:00
|
|
|
/* Prepares <fd> for being polled */
|
2018-01-25 06:22:13 +00:00
|
|
|
static inline void fd_insert(int fd, void *owner, void (*iocb)(int fd), unsigned long thread_mask)
|
2006-06-26 00:48:02 +00:00
|
|
|
{
|
2020-02-27 16:26:13 +00:00
|
|
|
int locked = fdtab[fd].running_mask != tid_bit;
|
BUG/MAJOR: fd/threads, task/threads: ensure all spin locks are unlocked
Calculate if the fd or task should be locked once, before locking, and
reuse the calculation when determing when to unlock.
Fixes a race condition added in 87d54a9a for fds, and b20aa9ee for tasks,
released in 1.9-dev4. When one thread modifies thread_mask to be a single
thread for a task or fd while a second thread has locked or is waiting on a
lock for that task or fd, the second thread will not unlock it. For FDs,
this is observable when a listener is polled by multiple threads, and is
closed while those threads have events pending. For tasks, this seems
possible, where task_set_affinity is called, but I did not observe it.
This must be backported to 1.9.
2019-02-20 20:43:45 +00:00
|
|
|
|
|
|
|
if (locked)
|
2020-02-27 16:26:13 +00:00
|
|
|
fd_set_running_excl(fd);
|
2018-01-25 06:22:13 +00:00
|
|
|
fdtab[fd].owner = owner;
|
|
|
|
fdtab[fd].iocb = iocb;
|
2008-01-18 16:20:13 +00:00
|
|
|
fdtab[fd].ev = 0;
|
2013-12-15 13:19:38 +00:00
|
|
|
fdtab[fd].linger_risk = 0;
|
2014-05-20 12:28:24 +00:00
|
|
|
fdtab[fd].cloned = 0;
|
2017-10-31 15:06:06 +00:00
|
|
|
fdtab[fd].thread_mask = thread_mask;
|
2018-01-17 17:44:46 +00:00
|
|
|
/* note: do not reset polled_mask here as it indicates which poller
|
|
|
|
* still knows this FD from a possible previous round.
|
|
|
|
*/
|
BUG/MAJOR: fd/threads, task/threads: ensure all spin locks are unlocked
Calculate if the fd or task should be locked once, before locking, and
reuse the calculation when determing when to unlock.
Fixes a race condition added in 87d54a9a for fds, and b20aa9ee for tasks,
released in 1.9-dev4. When one thread modifies thread_mask to be a single
thread for a task or fd while a second thread has locked or is waiting on a
lock for that task or fd, the second thread will not unlock it. For FDs,
this is observable when a listener is polled by multiple threads, and is
closed while those threads have events pending. For tasks, this seems
possible, where task_set_affinity is called, but I did not observe it.
This must be backported to 1.9.
2019-02-20 20:43:45 +00:00
|
|
|
if (locked)
|
2020-02-27 16:26:13 +00:00
|
|
|
fd_clr_running(fd);
|
2019-09-05 14:30:39 +00:00
|
|
|
/* the two directions are ready until proven otherwise */
|
|
|
|
fd_may_both(fd);
|
2019-04-16 16:37:05 +00:00
|
|
|
_HA_ATOMIC_ADD(&ha_used_fds, 1);
|
2006-06-26 00:48:02 +00:00
|
|
|
}
|
|
|
|
|
2018-10-17 09:25:54 +00:00
|
|
|
/* Computes the bounded poll() timeout based on the next expiration timer <next>
|
|
|
|
* by bounding it to MAX_DELAY_MS. <next> may equal TICK_ETERNITY. The pollers
|
|
|
|
* just needs to call this function right before polling to get their timeout
|
|
|
|
* value. Timeouts that are already expired (possibly due to a pending event)
|
|
|
|
* are accounted for in activity.poll_exp.
|
|
|
|
*/
|
|
|
|
static inline int compute_poll_timeout(int next)
|
|
|
|
{
|
|
|
|
int wait_time;
|
|
|
|
|
|
|
|
if (!tick_isset(next))
|
|
|
|
wait_time = MAX_DELAY_MS;
|
|
|
|
else if (tick_is_expired(next, now_ms)) {
|
|
|
|
activity[tid].poll_exp++;
|
|
|
|
wait_time = 0;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
wait_time = TICKS_TO_MS(tick_remain(now_ms, next)) + 1;
|
|
|
|
if (wait_time > MAX_DELAY_MS)
|
|
|
|
wait_time = MAX_DELAY_MS;
|
|
|
|
}
|
|
|
|
return wait_time;
|
|
|
|
}
|
|
|
|
|
2018-01-25 15:37:04 +00:00
|
|
|
/* These are replacements for FD_SET, FD_CLR, FD_ISSET, working on uints */
|
|
|
|
static inline void hap_fd_set(int fd, unsigned int *evts)
|
|
|
|
{
|
2019-03-08 17:47:42 +00:00
|
|
|
_HA_ATOMIC_OR(&evts[fd / (8*sizeof(*evts))], 1U << (fd & (8*sizeof(*evts) - 1)));
|
2018-01-25 15:37:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void hap_fd_clr(int fd, unsigned int *evts)
|
|
|
|
{
|
2019-03-08 17:47:42 +00:00
|
|
|
_HA_ATOMIC_AND(&evts[fd / (8*sizeof(*evts))], ~(1U << (fd & (8*sizeof(*evts) - 1))));
|
2018-01-25 15:37:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned int hap_fd_isset(int fd, unsigned int *evts)
|
|
|
|
{
|
|
|
|
return evts[fd / (8*sizeof(*evts))] & (1U << (fd & (8*sizeof(*evts) - 1)));
|
|
|
|
}
|
|
|
|
|
2018-07-26 15:55:11 +00:00
|
|
|
static inline void wake_thread(int tid)
|
|
|
|
{
|
|
|
|
char c = 'c';
|
|
|
|
|
2020-03-14 10:03:20 +00:00
|
|
|
DISGUISE(write(poller_wr_pipe[tid], &c, 1));
|
2018-07-26 15:55:11 +00:00
|
|
|
}
|
|
|
|
|
2006-06-26 00:48:02 +00:00
|
|
|
|
|
|
|
#endif /* _PROTO_FD_H */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Local variables:
|
|
|
|
* c-indent-level: 8
|
|
|
|
* c-basic-offset: 8
|
|
|
|
* End:
|
|
|
|
*/
|