From 1ed9d37c88ba9d0337394887572bed3e2243d0b7 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Wed, 9 Oct 2024 08:30:38 +0200 Subject: [PATCH] MINOR: mux-h2: add tune.h2.be.rxbuf and tune.h2.fe.rxbuf global settings These settings allow to change the total buffer size allocated to the backend and frontend respectively. This way it's no longer necessary to play with tune.bufsize nor increase the number of streams to benefit from more buffers. Setting tune.h2.fe.rxbuf to 4m to match a sender's max tcp_wmem resulted in 257 Mbps for a single stream at 103ms vs 121 Mbps default (or 5.1 Mbps with a single buffer and 64kB window). --- doc/configuration.txt | 57 +++++++++++++++++++++++++++++++++++-------- src/mux_h2.c | 37 +++++++++++++++++++++++++++- 2 files changed, 83 insertions(+), 11 deletions(-) diff --git a/doc/configuration.txt b/doc/configuration.txt index 1c5cc93375..9e28a282f1 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -3425,7 +3425,8 @@ tune.h2.be.initial-window-size not set, the common default value set by tune.h2.initial-window-size applies. It can make sense to slightly increase this value to allow faster downloads or to reduce CPU usage on the servers, at the expense of creating unfairness - between clients. It doesn't affect resource usage. + between clients. It is better to use tune.h2.be.rxbuf instead, which does not + cause any unfairness. It doesn't affect resource usage. See also: tune.h2.initial-window-size. @@ -3443,6 +3444,22 @@ tune.h2.be.max-concurrent-streams case). It is highly recommended not to increase this value; some might find it optimal to run at low values (1..5 typically). +tune.h2.be.rxbuf + Sets the HTTP/2 receive buffer size for outgoing connections, in bytes. This + size will be rounded up to the next multiple of tune.bufsize and will be + shared between all streams uploading data (both HEADERS and DATA frames). In + any case, one buffer will always be granted to each stream, and 7/8 of the + unused buffers will be shared between streams downloading payload, allowing + to significantly improve upload performance and avoid head-of-line blocking + (HoL) on backend connections shared between multiple clients when http-reuse + is set to "always". The advertised per-stream window is automatically + adjusted to reflect the available space so that in practice it should not be + required to touch tune.h2.be.initial-window-size. If less than the size + required to deal with all streams is set, this minimum will be used. The + default value is about 1600k (100 streams with 16kB buffers each). + + See also: tune.h2.be.initial-window-size, tune.h2.fe.rxbuf, http-reuse. + tune.h2.fe.glitches-threshold Sets the threshold for the number of glitches on a frontend connection, where that connection will automatically be killed. This allows to automatically @@ -3461,11 +3478,12 @@ tune.h2.fe.initial-window-size from HAProxy. This setting only affects payload contents (i.e. the body of POST requests), not headers. When not set, the common default value set by tune.h2.initial-window-size applies. It can make sense to increase this value - to allow faster uploads. The default value of 65536 allows up to 5 Mbps of - bandwidth per client over a 100 ms ping time, and 500 Mbps for 1 ms ping + to allow faster uploads. The default value of 65536 allows at least 5 Mbps of + bandwidth per stream over a 100 ms ping time, and 500 Mbps for 1 ms ping time. It doesn't affect resource usage. Using too large values may cause clients to experience a lack of responsiveness if pages are accessed in - parallel to large uploads. + parallel to large uploads. It is better to use tune.h2.fe.rxbuf instead, + which does not cause any unfairness. See also: tune.h2.initial-window-size. @@ -3514,6 +3532,21 @@ tune.h2.fe.max-total-streams errors with this setting; as such it may be needed to disable it when running performance benchmarks. See also "tune.h2.fe.max-concurrent-streams". +tune.h2.fe.rxbuf + Sets the HTTP/2 receive buffer size for incoming connections, in bytes. This + size will be rounded up to the next multiple of tune.bufsize and will be + shared between all streams uploading data (both HEADERS and DATA frames). In + any case, one buffer will always be granted to each stream, and 7/8 of the + unused buffers will be shared between streams uploading payload, allowing to + significantly improve upload performance. The advertised per-stream window is + automatically adjusted to reflect the available space so that in practice it + should not be required to touch tune.h2.fe.initial-window-size. If less than + the size required to deal with all streams is set, this minimum will be used. + The default value of 1600k (100 streams with 16kB buffers each) permits + roughly 130 Mbps of upload speed for a client with a 100ms RTT. + + See also: tune.h2.fe.initial-window-size and tune.h2.be.rxbuf. + tune.h2.header-table-size Sets the HTTP/2 dynamic header table size. It defaults to 4096 bytes and cannot be larger than 65536 bytes. A larger value may help certain clients @@ -3525,13 +3558,17 @@ tune.h2.initial-window-size Sets the default value for the HTTP/2 initial window size, on both incoming and outgoing connections. This value is used for incoming connections when tune.h2.fe.initial-window-size is not set, and by outgoing connections when - tune.h2.be.initial-window-size is not set. The default value is 65536, which - for uploads roughly allows up to 5 Mbps of bandwidth per client over a + tune.h2.be.initial-window-size is not set. This setting is used both as the + initial value and as a minimum per stream. The default value is 65536, which + for uploads roughly allows at least 5 Mbps of bandwidth per stream over a network showing a 100 ms ping time, or 500 Mbps over a 1-ms local network. - Given that changing the default value will both increase upload speeds and - cause more unfairness between clients on downloads, it is recommended to - instead use the side-specific settings tune.h2.fe.initial-window-size and - tune.h2.be.initial-window-size. + When less receive buffers than the maximum are in use, within the limits + defined by tune.h2.be.rxbuf and tune.h2.fe.rxbuf, unused buffers will be + shared between receiving streams. As such there is normally no point in + changing this default setting. Given that changing this default value will + both increase upload speeds and cause more unfairness between clients on + downloads, it is recommended to instead use the side-specific settings + tune.h2.fe.initial-window-size and tune.h2.be.initial-window-size. tune.h2.max-concurrent-streams Sets the default HTTP/2 maximum number of concurrent streams per connection diff --git a/src/mux_h2.c b/src/mux_h2.c index ddc49eb980..a5c03b50ff 100644 --- a/src/mux_h2.c +++ b/src/mux_h2.c @@ -468,6 +468,8 @@ static int h2_be_settings_initial_window_size = 0; /* backend's default init static int h2_fe_settings_initial_window_size = 0; /* frontend's default initial value */ static int h2_be_glitches_threshold = 0; /* backend's max glitches: unlimited */ static int h2_fe_glitches_threshold = 0; /* frontend's max glitches: unlimited */ +static uint h2_be_rxbuf = 0; /* backend's default total rxbuf (bytes) */ +static uint h2_fe_rxbuf = 0; /* frontend's default total rxbuf (bytes) */ static unsigned int h2_settings_max_concurrent_streams = 100; /* default value */ static unsigned int h2_be_settings_max_concurrent_streams = 0; /* backend value */ static unsigned int h2_fe_settings_max_concurrent_streams = 0; /* frontend value */ @@ -1170,6 +1172,7 @@ static int h2_init(struct connection *conn, struct proxy *prx, struct session *s struct h2c *h2c; struct task *t = NULL; void *conn_ctx = conn->ctx; + uint nb_rxbufs; TRACE_ENTER(H2_EV_H2C_NEW); @@ -1239,7 +1242,10 @@ static int h2_init(struct connection *conn, struct proxy *prx, struct session *s h2c->st0 = H2_CS_PREFACE; h2c->conn = conn; h2c->streams_limit = h2c_max_concurrent_streams(h2c); - bl_init(h2c->shared_rx_bufs, h2c->streams_limit + 1); + nb_rxbufs = (h2c->flags & H2_CF_IS_BACK) ? h2_be_rxbuf : h2_fe_rxbuf; + nb_rxbufs = (nb_rxbufs + global.tune.bufsize - 9 - 1) / (global.tune.bufsize - 9); + nb_rxbufs = MAX(nb_rxbufs, h2c->streams_limit); + bl_init(h2c->shared_rx_bufs, nb_rxbufs + 1); h2c->max_id = -1; h2c->errcode = H2_ERR_NO_ERROR; @@ -8221,6 +8227,27 @@ static int h2_parse_max_frame_size(char **args, int section_type, struct proxy * return 0; } +/* config parser for global "tune.h2.{be.,fe.}rxbuf" */ +static int h2_parse_rxbuf(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) +{ + const char *errptr; + uint *vptr; + + if (too_many_args(1, args, err, NULL)) + return -1; + + /* backend/frontend */ + vptr = (args[0][8] == 'b') ? &h2_be_rxbuf : &h2_fe_rxbuf; + + *vptr = atoi(args[1]); + if ((errptr = parse_size_err(args[1], vptr)) != NULL) { + memprintf(err, "'%s': unexpected character '%c' in size argument '%s'.", args[0], *errptr, args[1]); + return -1; + } + return 0; +} /* config parser for global "tune.h2.zero-copy-fwd-send" */ static int h2_parse_zero_copy_fwd_snd(char **args, int section_type, struct proxy *curpx, @@ -8282,10 +8309,12 @@ static struct cfg_kw_list cfg_kws = {ILH, { { CFG_GLOBAL, "tune.h2.be.glitches-threshold", h2_parse_glitches_threshold }, { CFG_GLOBAL, "tune.h2.be.initial-window-size", h2_parse_initial_window_size }, { CFG_GLOBAL, "tune.h2.be.max-concurrent-streams", h2_parse_max_concurrent_streams }, + { CFG_GLOBAL, "tune.h2.be.rxbuf", h2_parse_rxbuf }, { CFG_GLOBAL, "tune.h2.fe.glitches-threshold", h2_parse_glitches_threshold }, { CFG_GLOBAL, "tune.h2.fe.initial-window-size", h2_parse_initial_window_size }, { CFG_GLOBAL, "tune.h2.fe.max-concurrent-streams", h2_parse_max_concurrent_streams }, { CFG_GLOBAL, "tune.h2.fe.max-total-streams", h2_parse_max_total_streams }, + { CFG_GLOBAL, "tune.h2.fe.rxbuf", h2_parse_rxbuf }, { CFG_GLOBAL, "tune.h2.header-table-size", h2_parse_header_table_size }, { CFG_GLOBAL, "tune.h2.initial-window-size", h2_parse_initial_window_size }, { CFG_GLOBAL, "tune.h2.max-concurrent-streams", h2_parse_max_concurrent_streams }, @@ -8302,6 +8331,7 @@ INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws); static int init_h2() { uint max_bufs; + uint rx_bufs; pool_head_hpack_tbl = create_pool("hpack_tbl", h2_settings_header_table_size, @@ -8320,6 +8350,11 @@ static int init_h2() h2_be_settings_max_concurrent_streams : h2_settings_max_concurrent_streams); + /* check for forced rxbufs */ + rx_bufs = MAX(h2_be_rxbuf, h2_fe_rxbuf); + rx_bufs = (rx_bufs + global.tune.bufsize - 9 - 1) / (global.tune.bufsize - 9); + max_bufs = MAX(max_bufs, rx_bufs); + pool_head_h2_rx_bufs = create_pool("h2_rx_bufs", (max_bufs + 1) * sizeof(struct bl_elem), MEM_F_SHARED|MEM_F_EXACT);