mirror of
http://git.haproxy.org/git/haproxy.git/
synced 2025-01-05 03:29:35 +00:00
BUG/MEDIUM: fd/threads: fix excessive CPU usage on multi-thread accept
While experimenting with potentially improved fairness and latency using ticket locks on a Ryzen 16-thread/8-core, a very strange situation happened a lot for some levels of traffic. Around 300k connections per second, no more connections would be accepted on the multi-threaded listener but all others would continue to work fine. All attempts to trace showed that the threads were all in the trylock in the fd cache, or in the spinlock of fd_update_events(), or in the one of fd_may_recv(). But as indicated this was not a deadlock since the process continues to work fine. After quite some investigation it appeared that the issue is caused by a lack of fairness between the fdcache's trylock and these functions' spin locks above. In fact, regardless of the success or failure of the fdcache's attempt at grabbing the lock, the poller was calling fd_update_events() which locks the FD once for something that can be done with a CAS, and then calls fd_may_recv() with another lock for something that most often didn't change. The high contention on these spinlocks leaves no chance to any other thread to grab the lock using trylock(), and once this happens, there is no thread left to process incoming connection events nor to stop polling on the FD, leaving all threads at 100% CPU but partially operational. This patch addresses the issue by using bit-test-and-set instead of the OR in fd_may_recv() / fd_may_send() so that nothing is done if the FD was already configured as expected. It does the same in fd_update_events() using a CAS to check if the FD's events need to be changed at all or not. With this patch applied, it became impossible to reproduce the issue, and now there's no way to saturate all 16 CPUs with the load used for testing, as no more than 1350-1400 were noticed at 300+kcps vs 1600. Ideally this patch should go further and try to remove the remaining incarnations of the fdlock as this seems possible, but it's difficult enough to be done in a distinct patch that will not have to be backported. It is possible that workloads involving a high connection rate may slightly benefit from this patch and observe a slightly lower CPU usage even when the service doesn't misbehave. This patch must be backported to 2.0 and 1.9.
This commit is contained in:
parent
85b2cae63c
commit
1dad3843dc
@ -377,13 +377,15 @@ static inline void fd_cant_recv(const int fd)
|
||||
HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock);
|
||||
}
|
||||
|
||||
/* Report that FD <fd> can receive anymore without polling. */
|
||||
/* Report that FD <fd> may receive again without polling. */
|
||||
static inline void fd_may_recv(const int fd)
|
||||
{
|
||||
unsigned long locked;
|
||||
|
||||
/* marking ready never changes polled status */
|
||||
_HA_ATOMIC_OR(&fdtab[fd].state, FD_EV_READY_R);
|
||||
if ((fdtab[fd].state & FD_EV_READY_R) ||
|
||||
HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_READY_R_BIT))
|
||||
return;
|
||||
|
||||
locked = atleast2(fdtab[fd].thread_mask);
|
||||
if (locked)
|
||||
@ -449,13 +451,15 @@ static inline void fd_cant_send(const int fd)
|
||||
HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock);
|
||||
}
|
||||
|
||||
/* Report that FD <fd> can send anymore without polling (EAGAIN detected). */
|
||||
/* Report that FD <fd> may send again without polling (EAGAIN not detected). */
|
||||
static inline void fd_may_send(const int fd)
|
||||
{
|
||||
unsigned long locked;
|
||||
|
||||
/* marking ready never changes polled status */
|
||||
_HA_ATOMIC_OR(&fdtab[fd].state, FD_EV_READY_W);
|
||||
if ((fdtab[fd].state & FD_EV_READY_W) ||
|
||||
HA_ATOMIC_BTS(&fdtab[fd].state, FD_EV_READY_W_BIT))
|
||||
return;
|
||||
|
||||
locked = atleast2(fdtab[fd].thread_mask);
|
||||
if (locked)
|
||||
@ -522,13 +526,19 @@ static inline void fd_want_send(int fd)
|
||||
static inline void fd_update_events(int fd, int evts)
|
||||
{
|
||||
unsigned long locked = atleast2(fdtab[fd].thread_mask);
|
||||
unsigned char old, new;
|
||||
|
||||
if (locked)
|
||||
HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock);
|
||||
fdtab[fd].ev &= FD_POLL_STICKY;
|
||||
fdtab[fd].ev |= evts;
|
||||
if (locked)
|
||||
HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock);
|
||||
old = fdtab[fd].ev;
|
||||
new = (old & FD_POLL_STICKY) | evts;
|
||||
|
||||
if (unlikely(locked)) {
|
||||
/* Locked FDs (those with more than 2 threads) are atomically updated */
|
||||
while (unlikely(new != old && !_HA_ATOMIC_CAS(&fdtab[fd].ev, &old, new)))
|
||||
new = (old & FD_POLL_STICKY) | evts;
|
||||
} else {
|
||||
if (new != old)
|
||||
fdtab[fd].ev = new;
|
||||
}
|
||||
|
||||
if (fdtab[fd].ev & (FD_POLL_IN | FD_POLL_HUP | FD_POLL_ERR))
|
||||
fd_may_recv(fd);
|
||||
|
@ -50,6 +50,10 @@ enum {
|
||||
#define FD_EV_READY 2U
|
||||
#define FD_EV_POLLED 4U
|
||||
|
||||
/* bits positions for a few flags */
|
||||
#define FD_EV_READY_R_BIT 1
|
||||
#define FD_EV_READY_W_BIT 5
|
||||
|
||||
#define FD_EV_STATUS (FD_EV_ACTIVE | FD_EV_POLLED | FD_EV_READY)
|
||||
#define FD_EV_STATUS_R (FD_EV_STATUS)
|
||||
#define FD_EV_STATUS_W (FD_EV_STATUS << 4)
|
||||
|
Loading…
Reference in New Issue
Block a user