BUG/MEDIUM: quic: handle retransmit for standalone FIN STREAM

STREAM frames have dedicated handling on retransmission. A special check
is done to remove data already acked in case of duplicated frames, thus
only unacked data are retransmitted.

This handling is faulty in case of an empty STREAM frame with FIN set.
On retransmission, this frame does not cover any unacked range as it is
empty and is thus discarded. This may cause the transfer to freeze with
the client waiting indefinitely for the FIN notification.

To handle retransmission of empty FIN STREAM frame, qc_stream_desc layer
have been extended. A new flag QC_SD_FL_WAIT_FOR_FIN is set by MUX QUIC
when FIN has been transmitted. If set, it prevents qc_stream_desc to be
freed until FIN is acknowledged. On retransmission side,
qc_stream_frm_is_acked() has been updated. It now reports false if
FIN bit is set on the frame and qc_stream_desc has QC_SD_FL_WAIT_FOR_FIN
set.

This must be backported up to 2.6. However, this modifies heavily
critical section for ACK handling and retransmission. As such, it must
be backported only after a period of observation.

This issue can be reproduced by using the following socat command as
server to add delay between the response and connection closure :
  $ socat TCP-LISTEN:<port>,fork,reuseaddr,crlf SYSTEM:'echo "HTTP/1.1 200 OK"; echo ""; sleep 1;'

On the client side, ngtcp2 can be used to simulate packet drop. Without
this patch, connection will be interrupted on QUIC idle timeout or
haproxy client timeout with ERR_DRAINING on ngtcp2 :
  $ ngtcp2-client --exit-on-all-streams-close -r 0.3 <host> <port> "http://<host>:<port>/?s=32o"

Alternatively to ngtcp2 random loss, an extra haproxy patch can also be
used to force skipping the emission of the empty STREAM frame :

diff --git a/include/haproxy/quic_tx-t.h b/include/haproxy/quic_tx-t.h
index efbdfe687..1ff899acd 100644
--- a/include/haproxy/quic_tx-t.h
+++ b/include/haproxy/quic_tx-t.h
@@ -26,6 +26,8 @@ extern struct pool_head *pool_head_quic_cc_buf;
 /* Flag a sent packet as being probing with old data */
 #define QUIC_FL_TX_PACKET_PROBE_WITH_OLD_DATA (1UL << 5)

+#define QUIC_FL_TX_PACKET_SKIP_SENDTO (1UL << 6)
+
 /* Structure to store enough information about TX QUIC packets. */
 struct quic_tx_packet {
 	/* List entry point. */
diff --git a/src/quic_tx.c b/src/quic_tx.c
index 2f199ac3c..2702fc9b9 100644
--- a/src/quic_tx.c
+++ b/src/quic_tx.c
@@ -318,7 +318,7 @@ static int qc_send_ppkts(struct buffer *buf, struct ssl_sock_ctx *ctx)
 		tmpbuf.size = tmpbuf.data = dglen;

 		TRACE_PROTO("TX dgram", QUIC_EV_CONN_SPPKTS, qc);
-		if (!skip_sendto) {
+		if (!skip_sendto && !(first_pkt->flags & QUIC_FL_TX_PACKET_SKIP_SENDTO)) {
 			int ret = qc_snd_buf(qc, &tmpbuf, tmpbuf.data, 0, gso);
 			if (ret < 0) {
 				if (gso && ret == -EIO) {
@@ -354,6 +354,7 @@ static int qc_send_ppkts(struct buffer *buf, struct ssl_sock_ctx *ctx)
 					qc->cntrs.sent_bytes_gso += ret;
 			}
 		}
+		first_pkt->flags &= ~QUIC_FL_TX_PACKET_SKIP_SENDTO;

 		b_del(buf, dglen + QUIC_DGRAM_HEADLEN);
 		qc->bytes.tx += tmpbuf.data;
@@ -2066,6 +2067,17 @@ static int qc_do_build_pkt(unsigned char *pos, const unsigned char *end,
 				continue;
 			}

+			switch (cf->type) {
+			case QUIC_FT_STREAM_8 ... QUIC_FT_STREAM_F:
+				if (!cf->stream.len && (qc->flags & QUIC_FL_CONN_TX_MUX_CONTEXT)) {
+					TRACE_USER("artificially drop packet with empty STREAM frame", QUIC_EV_CONN_TXPKT, qc);
+					pkt->flags |= QUIC_FL_TX_PACKET_SKIP_SENDTO;
+				}
+				break;
+			default:
+				break;
+			}
+
 			quic_tx_packet_refinc(pkt);
 			cf->pkt = pkt;
 		}
This commit is contained in:
Amaury Denoyelle 2024-08-05 18:58:49 +02:00
parent 714009b7bc
commit e177cf341c
6 changed files with 65 additions and 34 deletions

View File

@ -20,6 +20,7 @@ struct qc_stream_buf {
};
#define QC_SD_FL_RELEASE 0x00000001 /* set when MUX has finished to use this stream */
#define QC_SD_FL_WAIT_FOR_FIN 0x00000002 /* set if sent FIN is waiting for acknowledgement */
/* QUIC STREAM descriptor.
*

View File

@ -11,7 +11,7 @@ struct quic_conn;
struct qc_stream_desc *qc_stream_desc_new(uint64_t id, enum qcs_type, void *ctx,
struct quic_conn *qc);
void qc_stream_desc_release(struct qc_stream_desc *stream, uint64_t final_size);
int qc_stream_desc_ack(struct qc_stream_desc **stream, size_t offset, size_t len);
int qc_stream_desc_ack(struct qc_stream_desc **stream, size_t offset, size_t len, int fin);
void qc_stream_desc_free(struct qc_stream_desc *stream, int closing);
struct buffer *qc_stream_buf_get(struct qc_stream_desc *stream);

View File

@ -1884,8 +1884,12 @@ void qcc_streams_sent_done(struct qcs *qcs, uint64_t data, uint64_t offset)
if (qcs->flags & (QC_SF_FIN_STREAM|QC_SF_DETACH)) {
/* Close stream locally. */
qcs_close_local(qcs);
/* Reset flag to not emit multiple FIN STREAM frames. */
qcs->flags &= ~QC_SF_FIN_STREAM;
if (qcs->flags & QC_SF_FIN_STREAM) {
qcs->stream->flags |= QC_SD_FL_WAIT_FOR_FIN;
/* Reset flag to not emit multiple FIN STREAM frames. */
qcs->flags &= ~QC_SF_FIN_STREAM;
}
}
}

View File

@ -21,13 +21,19 @@ int qc_stream_frm_is_acked(struct quic_conn *qc, struct quic_frame *f)
{
const struct qf_stream *frm = &f->stream;
const struct qc_stream_desc *s = frm->stream;
const int frm_fin = f->type & QUIC_STREAM_FRAME_TYPE_FIN_BIT;
if (!eb64_lookup(&qc->streams_by_id, frm->id)) {
TRACE_DEVEL("STREAM frame already acked : stream released", QUIC_EV_CONN_PRSAFRM, qc, f);
return 1;
}
if (frm->offset.key + frm->len <= s->ack_offset) {
/* Frame cannot advertise FIN for a smaller data range. */
BUG_ON(frm_fin && frm->offset.key + frm->len < s->ack_offset);
if (frm->offset.key + frm->len < s->ack_offset ||
(frm->offset.key + frm->len == s->ack_offset &&
(!frm_fin || !(s->flags & QC_SD_FL_WAIT_FOR_FIN)))) {
TRACE_DEVEL("STREAM frame already acked : fully acked range", QUIC_EV_CONN_PRSAFRM, qc, f);
return 1;
}

View File

@ -218,15 +218,18 @@ static int quic_stream_try_to_consume(struct quic_conn *qc,
struct qf_stream *strm_frm;
struct quic_frame *frm;
size_t offset, len;
int fin;
strm_frm = eb64_entry(frm_node, struct qf_stream, offset);
frm = container_of(strm_frm, struct quic_frame, stream);
offset = strm_frm->offset.key;
len = strm_frm->len;
fin = frm->type & QUIC_STREAM_FRAME_TYPE_FIN_BIT;
if (offset > stream->ack_offset)
break;
if (qc_stream_desc_ack(&stream, offset, len)) {
if (qc_stream_desc_ack(&stream, offset, len, fin)) {
/* cf. next comment : frame may be freed at this stage. */
TRACE_DEVEL("stream consumed", QUIC_EV_CONN_ACKSTRM,
qc, stream ? strm_frm : NULL, stream);
@ -246,7 +249,6 @@ static int quic_stream_try_to_consume(struct quic_conn *qc,
frm_node = eb64_next(frm_node);
eb64_delete(&strm_frm->offset);
frm = container_of(strm_frm, struct quic_frame, stream);
qc_release_frm(qc, frm);
}
@ -272,6 +274,7 @@ static void qc_handle_newly_acked_frm(struct quic_conn *qc, struct quic_frame *f
struct qc_stream_desc *stream = NULL;
const size_t offset = strm_frm->offset.key;
const size_t len = strm_frm->len;
const int fin = frm->type & QUIC_STREAM_FRAME_TYPE_FIN_BIT;
/* do not use strm_frm->stream as the qc_stream_desc instance
* might be freed at this stage. Use the id to do a proper
@ -291,7 +294,7 @@ static void qc_handle_newly_acked_frm(struct quic_conn *qc, struct quic_frame *f
TRACE_DEVEL("acked stream", QUIC_EV_CONN_ACKSTRM, qc, strm_frm, stream);
if (offset <= stream->ack_offset) {
if (qc_stream_desc_ack(&stream, offset, len)) {
if (qc_stream_desc_ack(&stream, offset, len, fin)) {
TRACE_DEVEL("stream consumed", QUIC_EV_CONN_ACKSTRM,
qc, strm_frm, stream);
}

View File

@ -17,6 +17,12 @@ DECLARE_STATIC_POOL(pool_head_quic_stream_buf, "qc_stream_buf",
sizeof(struct qc_stream_buf));
/* Returns true if nothing to ack yet for stream <s> including FIN bit. */
static inline int qc_stream_desc_done(const struct qc_stream_desc *s)
{
return !(s->flags & QC_SD_FL_WAIT_FOR_FIN) && LIST_ISEMPTY(&s->buf_list);
}
static void qc_stream_buf_free(struct qc_stream_desc *stream,
struct qc_stream_buf **stream_buf)
{
@ -116,7 +122,7 @@ void qc_stream_desc_release(struct qc_stream_desc *stream,
/* Remove unsent data from current buffer. */
if (final_size < tail_offset) {
b_sub(buf, tail_offset - final_size);
/* Remove buffer is all ACK already received. */
/* Remove buffer if all ACK already received. */
if (!b_data(buf))
qc_stream_buf_free(stream, &stream_buf);
}
@ -125,53 +131,64 @@ void qc_stream_desc_release(struct qc_stream_desc *stream,
stream->buf = NULL;
}
if (LIST_ISEMPTY(&stream->buf_list)) {
if (qc_stream_desc_done(stream)) {
/* if no buffer left we can free the stream. */
qc_stream_desc_free(stream, 0);
}
}
/* Acknowledge data at <offset> of length <len> for <stream>. It is handled
* only if it covers a range corresponding to stream.ack_offset. After data
* removal, if the stream does not contains data any more and is already
* released, the instance stream is freed. <stream> is set to NULL to indicate
* this.
/* Acknowledge data at <offset> of length <len> for <stream> with <fin> set for
* the final data. After data removal, if the stream does not contains data
* any more and is already released, the instance stream is freed. <stream> is
* set to NULL to indicate this.
*
* Returns the count of byte removed from stream. Do not forget to check if
* <stream> is NULL after invocation.
*/
int qc_stream_desc_ack(struct qc_stream_desc **stream, size_t offset, size_t len)
int qc_stream_desc_ack(struct qc_stream_desc **stream, size_t offset, size_t len,
int fin)
{
struct qc_stream_desc *s = *stream;
struct qc_stream_buf *stream_buf;
struct buffer *buf;
struct qc_stream_buf *stream_buf = NULL;
struct buffer *buf = NULL;
size_t diff;
if (offset + len <= s->ack_offset || offset > s->ack_offset)
/* Cannot advertise FIN for an inferior data range. */
BUG_ON(fin && offset + len < s->ack_offset);
if (offset + len < s->ack_offset || offset > s->ack_offset)
return 0;
/* There must be at least a buffer or we must not report an ACK. */
BUG_ON(LIST_ISEMPTY(&s->buf_list));
/* get oldest buffer from buf_list */
stream_buf = LIST_NEXT(&s->buf_list, struct qc_stream_buf *, list);
buf = &stream_buf->buf;
diff = offset + len - s->ack_offset;
s->ack_offset += diff;
b_del(buf, diff);
if (diff) {
/* Buf list cannot be empty if there is still unacked data. */
BUG_ON(LIST_ISEMPTY(&s->buf_list));
/* Free oldest buffer if all data acknowledged. */
if (!b_data(buf)) {
qc_stream_buf_free(s, &stream_buf);
/* get oldest buffer from buf_list */
stream_buf = LIST_NEXT(&s->buf_list, struct qc_stream_buf *, list);
buf = &stream_buf->buf;
/* Free stream instance if already released and no buffers left. */
if ((s->flags & QC_SD_FL_RELEASE) && LIST_ISEMPTY(&s->buf_list)) {
qc_stream_desc_free(s, 0);
*stream = NULL;
s->ack_offset += diff;
b_del(buf, diff);
/* Free oldest buffer if all data acknowledged. */
if (!b_data(buf)) {
qc_stream_buf_free(s, &stream_buf);
buf = NULL;
}
}
if (fin) {
/* Mark FIN as acknowledged. */
s->flags &= ~QC_SD_FL_WAIT_FOR_FIN;
}
/* Free stream instance if already released and everything acknowledged. */
if ((s->flags & QC_SD_FL_RELEASE) && qc_stream_desc_done(s)) {
qc_stream_desc_free(s, 0);
*stream = NULL;
}
return diff;
}