diff --git a/doc/configuration.txt b/doc/configuration.txt index 1c5cc93375..9e28a282f1 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -3425,7 +3425,8 @@ tune.h2.be.initial-window-size not set, the common default value set by tune.h2.initial-window-size applies. It can make sense to slightly increase this value to allow faster downloads or to reduce CPU usage on the servers, at the expense of creating unfairness - between clients. It doesn't affect resource usage. + between clients. It is better to use tune.h2.be.rxbuf instead, which does not + cause any unfairness. It doesn't affect resource usage. See also: tune.h2.initial-window-size. @@ -3443,6 +3444,22 @@ tune.h2.be.max-concurrent-streams case). It is highly recommended not to increase this value; some might find it optimal to run at low values (1..5 typically). +tune.h2.be.rxbuf + Sets the HTTP/2 receive buffer size for outgoing connections, in bytes. This + size will be rounded up to the next multiple of tune.bufsize and will be + shared between all streams uploading data (both HEADERS and DATA frames). In + any case, one buffer will always be granted to each stream, and 7/8 of the + unused buffers will be shared between streams downloading payload, allowing + to significantly improve upload performance and avoid head-of-line blocking + (HoL) on backend connections shared between multiple clients when http-reuse + is set to "always". The advertised per-stream window is automatically + adjusted to reflect the available space so that in practice it should not be + required to touch tune.h2.be.initial-window-size. If less than the size + required to deal with all streams is set, this minimum will be used. The + default value is about 1600k (100 streams with 16kB buffers each). + + See also: tune.h2.be.initial-window-size, tune.h2.fe.rxbuf, http-reuse. + tune.h2.fe.glitches-threshold Sets the threshold for the number of glitches on a frontend connection, where that connection will automatically be killed. This allows to automatically @@ -3461,11 +3478,12 @@ tune.h2.fe.initial-window-size from HAProxy. This setting only affects payload contents (i.e. the body of POST requests), not headers. When not set, the common default value set by tune.h2.initial-window-size applies. It can make sense to increase this value - to allow faster uploads. The default value of 65536 allows up to 5 Mbps of - bandwidth per client over a 100 ms ping time, and 500 Mbps for 1 ms ping + to allow faster uploads. The default value of 65536 allows at least 5 Mbps of + bandwidth per stream over a 100 ms ping time, and 500 Mbps for 1 ms ping time. It doesn't affect resource usage. Using too large values may cause clients to experience a lack of responsiveness if pages are accessed in - parallel to large uploads. + parallel to large uploads. It is better to use tune.h2.fe.rxbuf instead, + which does not cause any unfairness. See also: tune.h2.initial-window-size. @@ -3514,6 +3532,21 @@ tune.h2.fe.max-total-streams errors with this setting; as such it may be needed to disable it when running performance benchmarks. See also "tune.h2.fe.max-concurrent-streams". +tune.h2.fe.rxbuf + Sets the HTTP/2 receive buffer size for incoming connections, in bytes. This + size will be rounded up to the next multiple of tune.bufsize and will be + shared between all streams uploading data (both HEADERS and DATA frames). In + any case, one buffer will always be granted to each stream, and 7/8 of the + unused buffers will be shared between streams uploading payload, allowing to + significantly improve upload performance. The advertised per-stream window is + automatically adjusted to reflect the available space so that in practice it + should not be required to touch tune.h2.fe.initial-window-size. If less than + the size required to deal with all streams is set, this minimum will be used. + The default value of 1600k (100 streams with 16kB buffers each) permits + roughly 130 Mbps of upload speed for a client with a 100ms RTT. + + See also: tune.h2.fe.initial-window-size and tune.h2.be.rxbuf. + tune.h2.header-table-size Sets the HTTP/2 dynamic header table size. It defaults to 4096 bytes and cannot be larger than 65536 bytes. A larger value may help certain clients @@ -3525,13 +3558,17 @@ tune.h2.initial-window-size Sets the default value for the HTTP/2 initial window size, on both incoming and outgoing connections. This value is used for incoming connections when tune.h2.fe.initial-window-size is not set, and by outgoing connections when - tune.h2.be.initial-window-size is not set. The default value is 65536, which - for uploads roughly allows up to 5 Mbps of bandwidth per client over a + tune.h2.be.initial-window-size is not set. This setting is used both as the + initial value and as a minimum per stream. The default value is 65536, which + for uploads roughly allows at least 5 Mbps of bandwidth per stream over a network showing a 100 ms ping time, or 500 Mbps over a 1-ms local network. - Given that changing the default value will both increase upload speeds and - cause more unfairness between clients on downloads, it is recommended to - instead use the side-specific settings tune.h2.fe.initial-window-size and - tune.h2.be.initial-window-size. + When less receive buffers than the maximum are in use, within the limits + defined by tune.h2.be.rxbuf and tune.h2.fe.rxbuf, unused buffers will be + shared between receiving streams. As such there is normally no point in + changing this default setting. Given that changing this default value will + both increase upload speeds and cause more unfairness between clients on + downloads, it is recommended to instead use the side-specific settings + tune.h2.fe.initial-window-size and tune.h2.be.initial-window-size. tune.h2.max-concurrent-streams Sets the default HTTP/2 maximum number of concurrent streams per connection diff --git a/src/mux_h2.c b/src/mux_h2.c index ddc49eb980..a5c03b50ff 100644 --- a/src/mux_h2.c +++ b/src/mux_h2.c @@ -468,6 +468,8 @@ static int h2_be_settings_initial_window_size = 0; /* backend's default init static int h2_fe_settings_initial_window_size = 0; /* frontend's default initial value */ static int h2_be_glitches_threshold = 0; /* backend's max glitches: unlimited */ static int h2_fe_glitches_threshold = 0; /* frontend's max glitches: unlimited */ +static uint h2_be_rxbuf = 0; /* backend's default total rxbuf (bytes) */ +static uint h2_fe_rxbuf = 0; /* frontend's default total rxbuf (bytes) */ static unsigned int h2_settings_max_concurrent_streams = 100; /* default value */ static unsigned int h2_be_settings_max_concurrent_streams = 0; /* backend value */ static unsigned int h2_fe_settings_max_concurrent_streams = 0; /* frontend value */ @@ -1170,6 +1172,7 @@ static int h2_init(struct connection *conn, struct proxy *prx, struct session *s struct h2c *h2c; struct task *t = NULL; void *conn_ctx = conn->ctx; + uint nb_rxbufs; TRACE_ENTER(H2_EV_H2C_NEW); @@ -1239,7 +1242,10 @@ static int h2_init(struct connection *conn, struct proxy *prx, struct session *s h2c->st0 = H2_CS_PREFACE; h2c->conn = conn; h2c->streams_limit = h2c_max_concurrent_streams(h2c); - bl_init(h2c->shared_rx_bufs, h2c->streams_limit + 1); + nb_rxbufs = (h2c->flags & H2_CF_IS_BACK) ? h2_be_rxbuf : h2_fe_rxbuf; + nb_rxbufs = (nb_rxbufs + global.tune.bufsize - 9 - 1) / (global.tune.bufsize - 9); + nb_rxbufs = MAX(nb_rxbufs, h2c->streams_limit); + bl_init(h2c->shared_rx_bufs, nb_rxbufs + 1); h2c->max_id = -1; h2c->errcode = H2_ERR_NO_ERROR; @@ -8221,6 +8227,27 @@ static int h2_parse_max_frame_size(char **args, int section_type, struct proxy * return 0; } +/* config parser for global "tune.h2.{be.,fe.}rxbuf" */ +static int h2_parse_rxbuf(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) +{ + const char *errptr; + uint *vptr; + + if (too_many_args(1, args, err, NULL)) + return -1; + + /* backend/frontend */ + vptr = (args[0][8] == 'b') ? &h2_be_rxbuf : &h2_fe_rxbuf; + + *vptr = atoi(args[1]); + if ((errptr = parse_size_err(args[1], vptr)) != NULL) { + memprintf(err, "'%s': unexpected character '%c' in size argument '%s'.", args[0], *errptr, args[1]); + return -1; + } + return 0; +} /* config parser for global "tune.h2.zero-copy-fwd-send" */ static int h2_parse_zero_copy_fwd_snd(char **args, int section_type, struct proxy *curpx, @@ -8282,10 +8309,12 @@ static struct cfg_kw_list cfg_kws = {ILH, { { CFG_GLOBAL, "tune.h2.be.glitches-threshold", h2_parse_glitches_threshold }, { CFG_GLOBAL, "tune.h2.be.initial-window-size", h2_parse_initial_window_size }, { CFG_GLOBAL, "tune.h2.be.max-concurrent-streams", h2_parse_max_concurrent_streams }, + { CFG_GLOBAL, "tune.h2.be.rxbuf", h2_parse_rxbuf }, { CFG_GLOBAL, "tune.h2.fe.glitches-threshold", h2_parse_glitches_threshold }, { CFG_GLOBAL, "tune.h2.fe.initial-window-size", h2_parse_initial_window_size }, { CFG_GLOBAL, "tune.h2.fe.max-concurrent-streams", h2_parse_max_concurrent_streams }, { CFG_GLOBAL, "tune.h2.fe.max-total-streams", h2_parse_max_total_streams }, + { CFG_GLOBAL, "tune.h2.fe.rxbuf", h2_parse_rxbuf }, { CFG_GLOBAL, "tune.h2.header-table-size", h2_parse_header_table_size }, { CFG_GLOBAL, "tune.h2.initial-window-size", h2_parse_initial_window_size }, { CFG_GLOBAL, "tune.h2.max-concurrent-streams", h2_parse_max_concurrent_streams }, @@ -8302,6 +8331,7 @@ INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws); static int init_h2() { uint max_bufs; + uint rx_bufs; pool_head_hpack_tbl = create_pool("hpack_tbl", h2_settings_header_table_size, @@ -8320,6 +8350,11 @@ static int init_h2() h2_be_settings_max_concurrent_streams : h2_settings_max_concurrent_streams); + /* check for forced rxbufs */ + rx_bufs = MAX(h2_be_rxbuf, h2_fe_rxbuf); + rx_bufs = (rx_bufs + global.tune.bufsize - 9 - 1) / (global.tune.bufsize - 9); + max_bufs = MAX(max_bufs, rx_bufs); + pool_head_h2_rx_bufs = create_pool("h2_rx_bufs", (max_bufs + 1) * sizeof(struct bl_elem), MEM_F_SHARED|MEM_F_EXACT);