mirror of
http://git.haproxy.org/git/haproxy.git/
synced 2024-12-18 01:14:38 +00:00
BUG/MEDIUM: queue: deal with a rare TOCTOU in assign_server_and_queue()
After checking that a server or backend is full, it remains possible to call pendconn_add() just after the last pending requests finishes, so that there's no more connection on the server for very low maxconn (typ 1), leaving new ones in queue till the timeout. The approach depends on where the request was queued, though: - when queued on a server, we can simply detect that we may dequeue pending requests and wake them up, it will wake our request and that's fine. This needs to be done in srv_redispatch_connect() when the server is set. - when queued on a backend, it means that all servers are done with their requests. It means that all servers were full before the check and all were empty after. In practice this will only concern configs with less servers than threads. It's where the issue was first spotted, and it's very hard to reproduce with more than one server. In this case we need to load-balance again in order to find a spare server (or even to fail). For this, we call the newly added dedicated function pendconn_must_try_again() that tells whether or not a blocked pending request was dequeued and needs to be retried. This should be backported along with pendconn_must_try_again() to all stable versions, but with extreme care because over time the queue's locking evolved.
This commit is contained in:
parent
1a8f3a368f
commit
5541d4995d
@ -967,6 +967,7 @@ int assign_server_and_queue(struct stream *s)
|
||||
struct server *srv;
|
||||
int err;
|
||||
|
||||
balance_again:
|
||||
if (s->pend_pos)
|
||||
return SRV_STATUS_INTERNAL;
|
||||
|
||||
@ -1035,8 +1036,18 @@ int assign_server_and_queue(struct stream *s)
|
||||
return SRV_STATUS_FULL;
|
||||
|
||||
p = pendconn_add(s);
|
||||
if (p)
|
||||
if (p) {
|
||||
/* There's a TOCTOU here: it may happen that between the
|
||||
* moment we decided to queue the request and the moment
|
||||
* it was done, the last active request on the server
|
||||
* ended and no new one will be able to dequeue that one.
|
||||
* Since we already have our server we don't care, this
|
||||
* will be handled by the caller which will check for
|
||||
* this condition and will immediately dequeue it if
|
||||
* possible.
|
||||
*/
|
||||
return SRV_STATUS_QUEUED;
|
||||
}
|
||||
else
|
||||
return SRV_STATUS_INTERNAL;
|
||||
}
|
||||
@ -1048,8 +1059,23 @@ int assign_server_and_queue(struct stream *s)
|
||||
case SRV_STATUS_FULL:
|
||||
/* queue this stream into the proxy's queue */
|
||||
p = pendconn_add(s);
|
||||
if (p)
|
||||
if (p) {
|
||||
/* There's a TOCTOU here: it may happen that between the
|
||||
* moment we decided to queue the request and the moment
|
||||
* it was done, the last active request in the backend
|
||||
* ended and no new one will be able to dequeue that one.
|
||||
* This is more visible with maxconn 1 where it can
|
||||
* happen 1/1000 times, though the vast majority are
|
||||
* correctly recovered from. Since it's so rare and we
|
||||
* have no server assigned, the best solution in this
|
||||
* case is to detect the condition, dequeue our request
|
||||
* and balance it again.
|
||||
*/
|
||||
if (unlikely(pendconn_must_try_again(p)))
|
||||
goto balance_again;
|
||||
|
||||
return SRV_STATUS_QUEUED;
|
||||
}
|
||||
else
|
||||
return SRV_STATUS_INTERNAL;
|
||||
|
||||
@ -1966,7 +1992,16 @@ int srv_redispatch_connect(struct stream *s)
|
||||
case SRV_STATUS_QUEUED:
|
||||
s->conn_exp = tick_add_ifset(now_ms, s->be->timeout.queue);
|
||||
s->scb->state = SC_ST_QUE;
|
||||
/* do nothing else and do not wake any other stream up */
|
||||
|
||||
/* handle the unlikely event where we added to the server's
|
||||
* queue just after checking the server was full and before
|
||||
* it released its last entry (with extremely low maxconn).
|
||||
* Not needed for backend queues, already handled in
|
||||
* assign_server_and_queue().
|
||||
*/
|
||||
if (unlikely(srv && may_dequeue_tasks(srv, s->be)))
|
||||
process_srv_queue(srv);
|
||||
|
||||
return 1;
|
||||
|
||||
case SRV_STATUS_INTERNAL:
|
||||
|
Loading…
Reference in New Issue
Block a user