From 67bf1d6c9e9410424cc5adc0a77477a820a14ad5 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Thu, 25 Jul 2024 14:46:10 +0200 Subject: [PATCH] MINOR: quic: support a tolerance for spurious losses Tests performed between a 1 Gbps connected server and a 100 mbps client, distant by 95ms showed that: - we need 1.1 MB in flight to fill the link - rare but inevitable losses are sufficient to make cubic's window collapse fast and long to recover - a 100 MB object takes 69s to download - tolerance for 1 loss between two ACKs suffices to shrink the download time to 20-22s - 2 losses go to 17-20s - 4 losses reach 14-17s At 100 concurrent connections that fill the server's link: - 0 loss tolerance shows 2-3% losses - 1 loss tolerance shows 3-5% losses - 2 loss tolerance shows 10-13% losses - 4 loss tolerance shows 23-29% losses As such while there can be a significant gain sometimes in setting this tolerance above zero, it can also significantly waste bandwidth by sending far more than can be received. While it's probably not a solution to real world problems, it repeatedly proved to be a very effective troubleshooting tool helping to figure different root causes of low transfer speeds. In spirit it is comparable to the no-cc congestion algorithm, i.e. it must not be used except for experimentation. --- doc/configuration.txt | 19 +++++++++++++++++++ include/haproxy/global-t.h | 1 + include/haproxy/quic_cc-t.h | 2 +- src/cfgparse-quic.c | 5 ++++- src/quic_cc_cubic.c | 20 ++++++++++++++++++++ 5 files changed, 45 insertions(+), 2 deletions(-) diff --git a/doc/configuration.txt b/doc/configuration.txt index 4307982cd2..78c9b358df 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -3902,6 +3902,25 @@ tune.quic.cc-hystart { on | off } QUIC connections used as a replacement for the slow start phase of congestion control algorithms which may cause high packet loss. It is disabled by default. +tune.quic.cc.cubic.min-losses + Defines how many lost packets are needed for the Cubic congestion control + algorithm to really consider a loss event. Normally, any loss event is + considered as the result of a congestion and is sufficient for Cubic to + restart from a smaller window. But experiments show that there can be a + variety of causes for losses that are not at all caused by congestion and + that can simply be qualified of spurious losses, and for which adjusting the + window will have no effect, except slowing communication down. Poor radio + signal, out-of-order delivery, high CPU usage on a client causing random + delays, as well as system timer imprecision can be among the common causes + for this. This setting allows to make Cubic a bit more tolerant to spurious + losses, by changing the minimum number of cumulated losses between two ACKs + to be considered as a loss event, which defaults to 1. Some significant gains + have been observed experimentally, but always accompanied with an aggravation + of the bandwidth wasted on retransmits, and an increased risk of saturation + of congested links. The value 2 may be used for short periods of time to + compare some metrics. Never go beyond 2 without an expert's prior analysis of + the situation. The default and minimum value is 1. Always use 1. + tune.quic.disable-udp-gso Disable UDP GSO emission. This kernel feature allows to emit multiple datagrams via a single system call which is more efficient for large diff --git a/include/haproxy/global-t.h b/include/haproxy/global-t.h index bdc2d35486..8ded609c57 100644 --- a/include/haproxy/global-t.h +++ b/include/haproxy/global-t.h @@ -204,6 +204,7 @@ struct global { unsigned int quic_retry_threshold; unsigned int quic_reorder_ratio; unsigned int quic_max_frame_loss; + unsigned int quic_cubic_loss_tol; #endif /* USE_QUIC */ } tune; struct { diff --git a/include/haproxy/quic_cc-t.h b/include/haproxy/quic_cc-t.h index 92115d2fd7..b4e72e78dc 100644 --- a/include/haproxy/quic_cc-t.h +++ b/include/haproxy/quic_cc-t.h @@ -88,7 +88,7 @@ struct quic_cc { /* is there only for debugging purpose. */ struct quic_conn *qc; struct quic_cc_algo *algo; - uint32_t priv[18]; + uint32_t priv[20]; }; struct quic_cc_path { diff --git a/src/cfgparse-quic.c b/src/cfgparse-quic.c index 31168ad870..7924281bad 100644 --- a/src/cfgparse-quic.c +++ b/src/cfgparse-quic.c @@ -260,7 +260,9 @@ static int cfg_parse_quic_tune_setting(char **args, int section_type, } suffix = args[0] + prefix_len; - if (strcmp(suffix, "frontend.conn-tx-buffers.limit") == 0) { + if (strcmp(suffix, "cc.cubic.min-losses") == 0) + global.tune.quic_cubic_loss_tol = arg - 1; + else if (strcmp(suffix, "frontend.conn-tx-buffers.limit") == 0) { memprintf(err, "'%s' keyword is now obsolote and has no effect. " "Use 'tune.quic.frontend.max-window-size' to limit Tx buffer allocation per connection.", args[0]); return 1; @@ -368,6 +370,7 @@ static struct cfg_kw_list cfg_kws = {ILH, { { CFG_GLOBAL, "tune.quic.socket-owner", cfg_parse_quic_tune_socket_owner }, { CFG_GLOBAL, "tune.quic.backend.max-idle-timeou", cfg_parse_quic_time }, { CFG_GLOBAL, "tune.quic.cc-hystart", cfg_parse_quic_tune_on_off }, + { CFG_GLOBAL, "tune.quic.cc.cubic.min-losses", cfg_parse_quic_tune_setting }, { CFG_GLOBAL, "tune.quic.frontend.conn-tx-buffers.limit", cfg_parse_quic_tune_setting }, { CFG_GLOBAL, "tune.quic.frontend.glitches-threshold", cfg_parse_quic_tune_setting }, { CFG_GLOBAL, "tune.quic.frontend.max-streams-bidi", cfg_parse_quic_tune_setting }, diff --git a/src/quic_cc_cubic.c b/src/quic_cc_cubic.c index 245917d8c1..898476a2d3 100644 --- a/src/quic_cc_cubic.c +++ b/src/quic_cc_cubic.c @@ -83,6 +83,8 @@ struct cubic { uint32_t recovery_start_time; /* HyStart++ state. */ struct quic_hystart hystart; + /* Consecutive number of losses since last ACK */ + uint32_t consecutive_losses; }; static void quic_cc_cubic_reset(struct quic_cc *cc) @@ -107,7 +109,10 @@ static void quic_cc_cubic_reset(struct quic_cc *cc) static int quic_cc_cubic_init(struct quic_cc *cc) { + struct cubic *c = quic_cc_priv(cc); + quic_cc_cubic_reset(cc); + c->consecutive_losses = 0; return 1; } @@ -486,13 +491,28 @@ static void quic_cc_cubic_ss_cb(struct quic_cc *cc, struct quic_cc_event *ev) /* Congestion avoidance callback. */ static void quic_cc_cubic_ca_cb(struct quic_cc *cc, struct quic_cc_event *ev) { + struct cubic *c = quic_cc_priv(cc); + TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc); TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, ev); switch (ev->type) { case QUIC_CC_EVT_ACK: + c->consecutive_losses = 0; quic_cubic_update(cc, ev->ack.acked); break; case QUIC_CC_EVT_LOSS: + /* Principle: we may want to tolerate one or a few occasional + * losses that are *not* caused by congestion that we'd have + * any control on. Tests show that over long distances this + * significantly improves the transfer stability and + * performance, but can quickly result in a massive loss + * increase if set too high. This counter is reset upon ACKs. + * Maybe we could refine this to consider only certain ACKs + * though. + */ + c->consecutive_losses += ev->loss.count; + if (c->consecutive_losses <= global.tune.quic_cubic_loss_tol) + goto out; quic_enter_recovery(cc); break; case QUIC_CC_EVT_ECN_CE: