MINOR: quic: support a tolerance for spurious losses

Tests performed between a 1 Gbps connected server and a 100 mbps client,
distant by 95ms showed that:

  - we need 1.1 MB in flight to fill the link
  - rare but inevitable losses are sufficient to make cubic's window
    collapse fast and long to recover
  - a 100 MB object takes 69s to download
  - tolerance for 1 loss between two ACKs suffices to shrink the download
    time to 20-22s
  - 2 losses go to 17-20s
  - 4 losses reach 14-17s

At 100 concurrent connections that fill the server's link:
  - 0 loss tolerance shows 2-3% losses
  - 1 loss tolerance shows 3-5% losses
  - 2 loss tolerance shows 10-13% losses
  - 4 loss tolerance shows 23-29% losses

As such while there can be a significant gain sometimes in setting this
tolerance above zero, it can also significantly waste bandwidth by sending
far more than can be received. While it's probably not a solution to real
world problems, it repeatedly proved to be a very effective troubleshooting
tool helping to figure different root causes of low transfer speeds. In
spirit it is comparable to the no-cc congestion algorithm, i.e. it must
not be used except for experimentation.
This commit is contained in:
Willy Tarreau 2024-07-25 14:46:10 +02:00
parent fab0e99aa1
commit 67bf1d6c9e
5 changed files with 45 additions and 2 deletions

View File

@ -3902,6 +3902,25 @@ tune.quic.cc-hystart { on | off }
QUIC connections used as a replacement for the slow start phase of congestion
control algorithms which may cause high packet loss. It is disabled by default.
tune.quic.cc.cubic.min-losses <number>
Defines how many lost packets are needed for the Cubic congestion control
algorithm to really consider a loss event. Normally, any loss event is
considered as the result of a congestion and is sufficient for Cubic to
restart from a smaller window. But experiments show that there can be a
variety of causes for losses that are not at all caused by congestion and
that can simply be qualified of spurious losses, and for which adjusting the
window will have no effect, except slowing communication down. Poor radio
signal, out-of-order delivery, high CPU usage on a client causing random
delays, as well as system timer imprecision can be among the common causes
for this. This setting allows to make Cubic a bit more tolerant to spurious
losses, by changing the minimum number of cumulated losses between two ACKs
to be considered as a loss event, which defaults to 1. Some significant gains
have been observed experimentally, but always accompanied with an aggravation
of the bandwidth wasted on retransmits, and an increased risk of saturation
of congested links. The value 2 may be used for short periods of time to
compare some metrics. Never go beyond 2 without an expert's prior analysis of
the situation. The default and minimum value is 1. Always use 1.
tune.quic.disable-udp-gso
Disable UDP GSO emission. This kernel feature allows to emit multiple
datagrams via a single system call which is more efficient for large

View File

@ -204,6 +204,7 @@ struct global {
unsigned int quic_retry_threshold;
unsigned int quic_reorder_ratio;
unsigned int quic_max_frame_loss;
unsigned int quic_cubic_loss_tol;
#endif /* USE_QUIC */
} tune;
struct {

View File

@ -88,7 +88,7 @@ struct quic_cc {
/* <conn> is there only for debugging purpose. */
struct quic_conn *qc;
struct quic_cc_algo *algo;
uint32_t priv[18];
uint32_t priv[20];
};
struct quic_cc_path {

View File

@ -260,7 +260,9 @@ static int cfg_parse_quic_tune_setting(char **args, int section_type,
}
suffix = args[0] + prefix_len;
if (strcmp(suffix, "frontend.conn-tx-buffers.limit") == 0) {
if (strcmp(suffix, "cc.cubic.min-losses") == 0)
global.tune.quic_cubic_loss_tol = arg - 1;
else if (strcmp(suffix, "frontend.conn-tx-buffers.limit") == 0) {
memprintf(err, "'%s' keyword is now obsolote and has no effect. "
"Use 'tune.quic.frontend.max-window-size' to limit Tx buffer allocation per connection.", args[0]);
return 1;
@ -368,6 +370,7 @@ static struct cfg_kw_list cfg_kws = {ILH, {
{ CFG_GLOBAL, "tune.quic.socket-owner", cfg_parse_quic_tune_socket_owner },
{ CFG_GLOBAL, "tune.quic.backend.max-idle-timeou", cfg_parse_quic_time },
{ CFG_GLOBAL, "tune.quic.cc-hystart", cfg_parse_quic_tune_on_off },
{ CFG_GLOBAL, "tune.quic.cc.cubic.min-losses", cfg_parse_quic_tune_setting },
{ CFG_GLOBAL, "tune.quic.frontend.conn-tx-buffers.limit", cfg_parse_quic_tune_setting },
{ CFG_GLOBAL, "tune.quic.frontend.glitches-threshold", cfg_parse_quic_tune_setting },
{ CFG_GLOBAL, "tune.quic.frontend.max-streams-bidi", cfg_parse_quic_tune_setting },

View File

@ -83,6 +83,8 @@ struct cubic {
uint32_t recovery_start_time;
/* HyStart++ state. */
struct quic_hystart hystart;
/* Consecutive number of losses since last ACK */
uint32_t consecutive_losses;
};
static void quic_cc_cubic_reset(struct quic_cc *cc)
@ -107,7 +109,10 @@ static void quic_cc_cubic_reset(struct quic_cc *cc)
static int quic_cc_cubic_init(struct quic_cc *cc)
{
struct cubic *c = quic_cc_priv(cc);
quic_cc_cubic_reset(cc);
c->consecutive_losses = 0;
return 1;
}
@ -486,13 +491,28 @@ static void quic_cc_cubic_ss_cb(struct quic_cc *cc, struct quic_cc_event *ev)
/* Congestion avoidance callback. */
static void quic_cc_cubic_ca_cb(struct quic_cc *cc, struct quic_cc_event *ev)
{
struct cubic *c = quic_cc_priv(cc);
TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc);
TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, ev);
switch (ev->type) {
case QUIC_CC_EVT_ACK:
c->consecutive_losses = 0;
quic_cubic_update(cc, ev->ack.acked);
break;
case QUIC_CC_EVT_LOSS:
/* Principle: we may want to tolerate one or a few occasional
* losses that are *not* caused by congestion that we'd have
* any control on. Tests show that over long distances this
* significantly improves the transfer stability and
* performance, but can quickly result in a massive loss
* increase if set too high. This counter is reset upon ACKs.
* Maybe we could refine this to consider only certain ACKs
* though.
*/
c->consecutive_losses += ev->loss.count;
if (c->consecutive_losses <= global.tune.quic_cubic_loss_tol)
goto out;
quic_enter_recovery(cc);
break;
case QUIC_CC_EVT_ECN_CE: