mirror of
http://git.haproxy.org/git/haproxy.git/
synced 2025-03-31 15:47:10 +00:00
BUG/MINOR: fd: protect fd state harder against a concurrent takeover
There's a theoretical race (that we failed to trigger) in function fd_update_events(), which could strike on idle connections. The "locked" variable will most often be 0 as the FD is bound to the current thread only. Another thread could take it over once "locked" is set, change the thread and running masks. Then the first thread updates the FD's state non-atomically and possibly overwrites what the other thread was preparing. It still looks like the FD's state will ultimately converge though. The solution against this is to set the running flag earlier so that a takeover() attempt cannot succeed, or that the fd_set_running() attempt fails, indicating that nothing needs to be done on this FD. While this is sufficient for a simple fix to be backported, it leaves the FD actively polled in the calling thread, this will trigger a second wakeup which will notice the absence of tid_bit in the thread_mask, getting rid of it. A more elaborate solution would consist in calling fd_set_running() directly from the pollers before calling fd_update_events(), getting rid of the thread_mask test and letting the caller eliminate that FD from its list if needed. Interestingly, this code also proves to be suboptimal in that it sets the FD state twice instead of calculating the new state at once and always using a CAS to set it. This is a leftover of a simplification that went into 2.4 and which should be explored in a future patch. This may be backported as far as 2.2.
This commit is contained in:
parent
79e90b9615
commit
d5402b8df8
@ -356,10 +356,22 @@ static inline long fd_clr_running(int fd)
|
||||
*/
|
||||
static inline void fd_update_events(int fd, uint evts)
|
||||
{
|
||||
unsigned long locked = atleast2(fdtab[fd].thread_mask);
|
||||
unsigned long locked;
|
||||
uint old, new;
|
||||
uint new_flags, must_stop;
|
||||
|
||||
ti->flags &= ~TI_FL_STUCK; // this thread is still running
|
||||
|
||||
/* do nothing if the FD was taken over under us */
|
||||
if (fd_set_running(fd) == -1)
|
||||
return;
|
||||
|
||||
locked = (fdtab[fd].thread_mask != tid_bit);
|
||||
|
||||
/* OK now we are guaranteed that our thread_mask was present and
|
||||
* that we're allowed to update the FD.
|
||||
*/
|
||||
|
||||
new_flags =
|
||||
((evts & FD_EV_READY_R) ? FD_POLL_IN : 0) |
|
||||
((evts & FD_EV_READY_W) ? FD_POLL_OUT : 0) |
|
||||
@ -404,12 +416,17 @@ static inline void fd_update_events(int fd, uint evts)
|
||||
fd_may_send(fd);
|
||||
|
||||
if (fdtab[fd].iocb && fd_active(fd)) {
|
||||
if (fd_set_running(fd) == -1)
|
||||
return;
|
||||
fdtab[fd].iocb(fd);
|
||||
if ((fdtab[fd].running_mask & tid_bit) &&
|
||||
fd_clr_running(fd) == 0 && !fdtab[fd].thread_mask)
|
||||
_fd_delete_orphan(fd);
|
||||
}
|
||||
|
||||
/* another thread might have attempted to close this FD in the mean
|
||||
* time (e.g. timeout task) striking on a previous thread and closing.
|
||||
* This is detected by both thread_mask and running_mask being 0 after
|
||||
* we remove ourselves last.
|
||||
*/
|
||||
if ((fdtab[fd].running_mask & tid_bit) &&
|
||||
fd_clr_running(fd) == 0 && !fdtab[fd].thread_mask) {
|
||||
_fd_delete_orphan(fd);
|
||||
}
|
||||
|
||||
/* we had to stop this FD and it still must be stopped after the I/O
|
||||
@ -421,8 +438,6 @@ static inline void fd_update_events(int fd, uint evts)
|
||||
if (!HA_ATOMIC_BTS(&fdtab[fd].update_mask, tid))
|
||||
fd_updt[fd_nbupdt++] = fd;
|
||||
}
|
||||
|
||||
ti->flags &= ~TI_FL_STUCK; // this thread is still running
|
||||
}
|
||||
|
||||
/* Prepares <fd> for being polled */
|
||||
|
Loading…
Reference in New Issue
Block a user