From eaf05be0ee2fc9b971132c268bafe5c5fda23fd5 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Fri, 27 Dec 2019 15:52:34 +0100 Subject: [PATCH] OPTIM: polling: do not create update entries for FD removal In order to reduce the number of poller updates, we can benefit from the fact that modern pollers use sampling to report readiness and that under load they rarely report the same FD multiple times in a row. As such it's not always necessary to disable such FDs especially when we're almost certain they'll be re-enabled again and will require another set of syscalls. Now instead of creating an update for a (possibly temporary) removal, we only perform this removal if the FD is reported again as ready while inactive. In addition this is performed via another update so that alternating workloads like transfers have a chance to re-enable the FD without any syscall during the loop (typically after the data that filled a buffer have been sent). However we only do that for single- threaded FDs as the other ones require a more complex setup and are not on the critical path. This does cause a few spurious wakeups but almost totally eliminates the calls to epoll_ctl() on connections seeing intermitent traffic like HTTP/1 to a server or client. A typical example with 100k requests for 4 kB objects over 200 connections shows that the number of epoll_ctl() calls doesn't depend on the number of requests anymore but most exclusively on the number of established connections: Before: % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 57.09 0.499964 0 654361 321190 recvfrom 38.33 0.335741 0 369097 1 epoll_wait 4.56 0.039898 0 44643 epoll_ctl 0.02 0.000211 1 200 200 connect ------ ----------- ----------- --------- --------- ---------------- 100.00 0.875814 1068301 321391 total After: % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 59.25 0.504676 0 657600 323630 recvfrom 40.68 0.346560 0 374289 1 epoll_wait 0.04 0.000370 0 620 epoll_ctl 0.03 0.000228 1 200 200 connect ------ ----------- ----------- --------- --------- ---------------- 100.00 0.851834 1032709 323831 total As expected there is also a slight increase of epoll_wait() calls since delaying de-activation of events can occasionally cause one spurious wakeup. --- include/proto/fd.h | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/include/proto/fd.h b/include/proto/fd.h index a50748572..e9985b600 100644 --- a/include/proto/fd.h +++ b/include/proto/fd.h @@ -197,7 +197,6 @@ static inline void fd_stop_recv(int fd) if (!(fdtab[fd].state & FD_EV_ACTIVE_R) || !HA_ATOMIC_BTR(&fdtab[fd].state, FD_EV_ACTIVE_R_BIT)) return; - updt_fd_polling(fd); } /* Disable processing send events on fd */ @@ -206,7 +205,6 @@ static inline void fd_stop_send(int fd) if (!(fdtab[fd].state & FD_EV_ACTIVE_W) || !HA_ATOMIC_BTR(&fdtab[fd].state, FD_EV_ACTIVE_W_BIT)) return; - updt_fd_polling(fd); } /* Disable processing of events on fd for both directions. */ @@ -220,7 +218,6 @@ static inline void fd_stop_both(int fd) return; new = old & ~FD_EV_ACTIVE_RW; } while (unlikely(!_HA_ATOMIC_CAS(&fdtab[fd].state, &old, new))); - updt_fd_polling(fd); } /* Report that FD cannot receive anymore without polling (EAGAIN detected). */ @@ -327,7 +324,7 @@ static inline void fd_update_events(int fd, unsigned char evts) { unsigned long locked = atleast2(fdtab[fd].thread_mask); unsigned char old, new; - int new_flags; + int new_flags, must_stop; new_flags = ((evts & FD_EV_READY_R) ? FD_POLL_IN : 0) | @@ -340,6 +337,21 @@ static inline void fd_update_events(int fd, unsigned char evts) if ((fdtab[fd].ev & FD_EV_ACTIVE_W) && (evts & FD_EV_SHUT_W)) new_flags |= FD_POLL_ERR; + /* compute the inactive events reported late that must be stopped */ + must_stop = 0; + if (unlikely(!fd_active(fd))) { + /* both sides stopped */ + must_stop = FD_POLL_IN | FD_POLL_OUT; + } + else if (unlikely(!fd_recv_active(fd) && (evts & (FD_EV_READY_R | FD_EV_SHUT_R | FD_EV_ERR_R)))) { + /* only send remains */ + must_stop = FD_POLL_IN; + } + else if (unlikely(!fd_send_active(fd) && (evts & (FD_EV_READY_W | FD_EV_SHUT_W | FD_EV_ERR_W)))) { + /* only recv remains */ + must_stop = FD_POLL_OUT; + } + old = fdtab[fd].ev; new = (old & FD_POLL_STICKY) | new_flags; @@ -361,6 +373,16 @@ static inline void fd_update_events(int fd, unsigned char evts) if (fdtab[fd].iocb && fd_active(fd)) fdtab[fd].iocb(fd); + /* we had to stop this FD and it still must be stopped after the I/O + * cb's changes, so let's program an update for this. + */ + if (must_stop && !(fdtab[fd].update_mask & tid_bit)) { + if (((must_stop & FD_POLL_IN) && !fd_recv_active(fd)) || + ((must_stop & FD_POLL_OUT) && !fd_send_active(fd))) + if (!HA_ATOMIC_BTS(&fdtab[fd].update_mask, tid)) + fd_updt[fd_nbupdt++] = fd; + } + ti->flags &= ~TI_FL_STUCK; // this thread is still running }