BUG/MAJOR: stream-int: don't re-arm recv if send fails
When 1) HAProxy configured to enable splice on both directions 2) After some high load, there are 2 input channels with their socket buffer being non-empty and pipe being full at the same time, sitting in `fd_cache` without any other fds. The 2 channels will repeatedly be stopped for receiving (pipe full) and waken for receiving (data in socket), thus getting out and in of `fd_cache`, making their fd swapping location in `fd_cache`. There is a `if (entry < fd_cache_num && fd_cache[entry] != fd) continue;` statement in `fd_process_cached_events` to prevent frequent polling, but since the only 2 fds are constantly swapping location, `fd_cache[entry] != fd` will always hold true, thus HAProxy can't make any progress. The root cause of the issue is dual : - there is a single fd_cache, for next events and for the ones being processed, while using two distinct arrays would avoid the problem. - the write side of the stream interface wakes the read side up even when it couldn't write, and this one really is a bug. Due to CF_WRITE_PARTIAL not being cleared during fast forwarding, a failed send() attempt will still cause ->chk_rcv() to be called on the other side, re-creating an entry for its connection fd in the cache, causing the same sequence to be repeated indefinitely without any opportunity to make progress. CF_WRITE_PARTIAL used to be used for what is present in these tests : check if a recent write operation was performed. It's part of the CF_WRITE_ACTIVITY set and is tested to check if timeouts need to be updated. It's also used to detect if a failed connect() may be retried. What this patch does is use CF_WROTE_DATA() to check for a successful write for connection retransmits, and to clear CF_WRITE_PARTIAL before preparing to send in stream_int_notify(). This way, timeouts are still updated each time a write succeeds, but chk_rcv() won't be called anymore after a failed write. It seems the fix is required all the way down to 1.5. Without this patch, the only workaround at this point is to disable splicing in at least one direction. Strictly speaking, splicing is not absolutely required, as regular forwarding could theorically cause the issue to happen if the timing is appropriate, but in practice it appears impossible to reproduce it without splicing, and even with splicing it may vary. The following config manages to reproduce it after a few attempts (haproxy going 100% CPU and having to be killed) : global maxpipes 50000 maxconn 10000 listen srv1 option splice-request option splice-response bind :8001 server s1 127.0.0.1:8002 server$ tcploop 8002 L N20 A R10 S1000000 R10 S1000000 R10 S1000000 R10 S1000000 R10 S1000000 client$ tcploop 8001 N20 C T S1000000 R10 J
This commit is contained in:
parent
a258479e3f
commit
95fad5ba4b
|
@ -561,7 +561,7 @@ static int sess_update_st_con_tcp(struct stream *s)
|
|||
* attempts and error reports.
|
||||
*/
|
||||
if (unlikely(si->flags & (SI_FL_EXP|SI_FL_ERR))) {
|
||||
if (unlikely(req->flags & CF_WRITE_PARTIAL)) {
|
||||
if (unlikely(req->flags & CF_WROTE_DATA)) {
|
||||
/* Some data were sent past the connection establishment,
|
||||
* so we need to pretend we're established to log correctly
|
||||
* and let later states handle the failure.
|
||||
|
@ -587,7 +587,7 @@ static int sess_update_st_con_tcp(struct stream *s)
|
|||
}
|
||||
|
||||
/* OK, maybe we want to abort */
|
||||
if (!(req->flags & CF_WRITE_PARTIAL) &&
|
||||
if (!(req->flags & CF_WROTE_DATA) &&
|
||||
unlikely((rep->flags & CF_SHUTW) ||
|
||||
((req->flags & CF_SHUTW_NOW) && /* FIXME: this should not prevent a connection from establishing */
|
||||
((!(req->flags & CF_WRITE_ACTIVITY) && channel_is_empty(req)) ||
|
||||
|
|
|
@ -496,6 +496,10 @@ void stream_int_notify(struct stream_interface *si)
|
|||
* the buffer is full. We must not stop based on input data alone because
|
||||
* an HTTP parser might need more data to complete the parsing.
|
||||
*/
|
||||
|
||||
/* ensure it's only set if a write attempt has succeeded */
|
||||
ic->flags &= ~CF_WRITE_PARTIAL;
|
||||
|
||||
if (!channel_is_empty(ic) &&
|
||||
(si_opposite(si)->flags & SI_FL_WAIT_DATA) &&
|
||||
(ic->buf->i == 0 || ic->pipe)) {
|
||||
|
@ -610,6 +614,9 @@ static void si_conn_send(struct connection *conn)
|
|||
struct channel *oc = si_oc(si);
|
||||
int ret;
|
||||
|
||||
/* ensure it's only set if a write attempt has succeeded */
|
||||
oc->flags &= ~CF_WRITE_PARTIAL;
|
||||
|
||||
if (oc->pipe && conn->xprt->snd_pipe) {
|
||||
ret = conn->xprt->snd_pipe(conn, oc->pipe);
|
||||
if (ret > 0)
|
||||
|
@ -936,6 +943,9 @@ static void stream_int_chk_snd_conn(struct stream_interface *si)
|
|||
struct channel *oc = si_oc(si);
|
||||
struct connection *conn = __objt_conn(si->end);
|
||||
|
||||
/* ensure it's only set if a write attempt has succeeded */
|
||||
oc->flags &= ~CF_WRITE_PARTIAL;
|
||||
|
||||
if (unlikely(si->state > SI_ST_EST || (oc->flags & CF_SHUTW)))
|
||||
return;
|
||||
|
||||
|
|
Loading…
Reference in New Issue