BUG/MEDIUM: quic: fix connection freeze on post handshake
After handshake completion, QUIC server is responsible to emit HANDSHAKE_DONE frame. Some clients wait for it to begin STREAM transfers. Previously, there was no explicit tasklet_wakeup() after handshake completion, which is necessary to emit post-handshake frames. In most cases, this was undetected as most client continue emission which will reschedule the tasklet. However, as there is no tasklet_wakeup(), this is not a consistent behavior. If this bug occurs, it causes a connection freeze, preventing the client to emit any request. The connection is finally closed on idle timeout. To fix this, add an explicit tasklet_wakeup() after handshake completion. It sounds simple enough but in fact it's difficult to find the correct location efor tasklet_wakeup() invocation, as post-handshake is directly linked to connection accept, with different orderings. Notably, if 0-RTT is used, connection can be accepted prior handshake completion. Another major point is that along HANDSHAKE_DONE frame, a series of NEW_CONNECTION_ID frames are emitted. However, these new CIDs allocation must occur after connection is migrated to its new thread as these CIDs are tied to it. A BUG_ON() is present to check this in qc_set_tid_affinity(). With all this in mind, 2 locations were selected for the necessary tasklet_wakeup() : * on qc_xprt_start() : this is useful for standard case without 0-RTT. This ensures that this is done only after connection thread migration. * on qc_ssl_provide_all_quic_data() : this is done on handshake completion with 0-RTT used. In this case only, connection is already accepted and migrated, so tasklet_wakeup() is safe. Note that as a side-change, quic_accept_push_qc() API has evolved to better reflect differences between standard and 0-RTT usages. It is now forbidden to call it multiple times on a single quic_conn instance. A BUG_ON() has been added. This issue is labelled as medium even though it seems pretty rare. It was only reproducible using QUIC interop runner, with haproxy compiled with LibreSSL with quic-go as client. However, affected code parts are pretty sensible, which justify the chosen severity. This should fix github issue #2418. It should be backported up to 2.6, after a brief period of observation. Note that the extra comment added in qc_set_tid_affinity() can be removed in 2.6 as thread migration is not implemented for this version. Other parts should apply without conflict.
This commit is contained in:
parent
3a3c2b2695
commit
d8f1ff8648
|
@ -1818,7 +1818,14 @@ int qc_set_tid_affinity(struct quic_conn *qc, uint new_tid, struct listener *new
|
|||
qc_detach_th_ctx_list(qc, 0);
|
||||
|
||||
node = eb64_first(qc->cids);
|
||||
BUG_ON(!node || eb64_next(node)); /* One and only one CID must be present before affinity rebind. */
|
||||
/* One and only one CID must be present before affinity rebind.
|
||||
*
|
||||
* This could be triggered fairly easily if tasklet is scheduled just
|
||||
* before thread migration for post-handshake state to generate new
|
||||
* CIDs. In this case, QUIC_FL_CONN_IO_TO_REQUEUE should be used
|
||||
* instead of tasklet_wakeup().
|
||||
*/
|
||||
BUG_ON(!node || eb64_next(node));
|
||||
conn_id = eb64_entry(node, struct quic_connection_id, seq_num);
|
||||
|
||||
/* At this point no connection was accounted for yet on this
|
||||
|
|
|
@ -999,18 +999,15 @@ void qc_want_recv(struct quic_conn *qc)
|
|||
struct quic_accept_queue *quic_accept_queues;
|
||||
|
||||
/* Install <qc> on the queue ready to be accepted. The queue task is then woken
|
||||
* up. If <qc> accept is already scheduled or done, nothing is done.
|
||||
* up.
|
||||
*/
|
||||
void quic_accept_push_qc(struct quic_conn *qc)
|
||||
{
|
||||
struct quic_accept_queue *queue = &quic_accept_queues[tid];
|
||||
struct li_per_thread *lthr = &qc->li->per_thr[ti->ltid];
|
||||
|
||||
/* early return if accept is already in progress/done for this
|
||||
* connection
|
||||
*/
|
||||
if (qc->flags & QUIC_FL_CONN_ACCEPT_REGISTERED)
|
||||
return;
|
||||
/* A connection must only be accepted once per instance. */
|
||||
BUG_ON(qc->flags & QUIC_FL_CONN_ACCEPT_REGISTERED);
|
||||
|
||||
BUG_ON(MT_LIST_INLIST(&qc->accept_list));
|
||||
HA_ATOMIC_INC(&qc->li->rx.quic_curr_accept);
|
||||
|
|
|
@ -593,8 +593,17 @@ int qc_ssl_provide_quic_data(struct ncbuf *ncbuf,
|
|||
if (qc_is_listener(ctx->qc)) {
|
||||
qc->flags |= QUIC_FL_CONN_NEED_POST_HANDSHAKE_FRMS;
|
||||
qc->state = QUIC_HS_ST_CONFIRMED;
|
||||
/* The connection is ready to be accepted. */
|
||||
quic_accept_push_qc(qc);
|
||||
|
||||
if (!(qc->flags & QUIC_FL_CONN_ACCEPT_REGISTERED)) {
|
||||
quic_accept_push_qc(qc);
|
||||
}
|
||||
else {
|
||||
/* Connection already accepted if 0-RTT used.
|
||||
* In this case, schedule quic-conn to ensure
|
||||
* post-handshake frames are emitted.
|
||||
*/
|
||||
tasklet_wakeup(qc->wait_event.tasklet);
|
||||
}
|
||||
|
||||
BUG_ON(qc->li->rx.quic_curr_handshake == 0);
|
||||
HA_ATOMIC_DEC(&qc->li->rx.quic_curr_handshake);
|
||||
|
|
|
@ -140,6 +140,13 @@ static int qc_xprt_start(struct connection *conn, void *ctx)
|
|||
/* mux-quic can now be considered ready. */
|
||||
qc->mux_state = QC_MUX_READY;
|
||||
|
||||
/* Schedule quic-conn to ensure post handshake frames are emitted. This
|
||||
* is not done for 0-RTT as xprt->start happens before handshake
|
||||
* completion.
|
||||
*/
|
||||
if (qc->flags & QUIC_FL_CONN_NEED_POST_HANDSHAKE_FRMS)
|
||||
tasklet_wakeup(qc->wait_event.tasklet);
|
||||
|
||||
ret = 1;
|
||||
out:
|
||||
TRACE_LEAVE(QUIC_EV_CONN_NEW, qc);
|
||||
|
|
Loading…
Reference in New Issue