MINOR: connection: add sample fetches to report per-connection glitches

Now with fc_glitches and bc_glitches we can retrieve the number of
detected glitches on a front or back connection. On the backend it
can indicate a bug in a server that may induce frequent reconnections
hence CPU usage in TLS reconnections, and on the frontend it may
indicate an abusive client that may be trying to attack the stack
or to fingerprint it. Small non-zero values are definitely expected
and can be caused by network glitches for example, as well as rare
bugs in the other component (or maybe even in haproxy). These should
never be considered as alarming as long as they remain low (i.e.
much less than one per request). A reg-test is provided.
This commit is contained in:
Willy Tarreau 2024-01-17 18:00:21 +01:00
parent d2b44fd730
commit 6e5aa16145
3 changed files with 177 additions and 0 deletions

View File

@ -20969,6 +20969,7 @@ bc_dst ip
bc_dst_port integer
bc_err integer
bc_err_str string
bc_glitches integer
bc_http_major integer
bc_src ip
bc_src_port integer
@ -20993,6 +20994,7 @@ fc_dst_port integer
fc_err integer
fc_err_str string
fc_fackets integer
fc_glitches integer
fc_http_major integer
fc_lost integer
fc_pp_authority string
@ -21206,6 +21208,20 @@ bc_err_str : string
"fc_err_str" fetch for a full list of error codes and their
corresponding error message.
bc_glitches : integer
Returns the number of protocol glitches counted on the backend connection.
These generally cover protocol violations as well as small anomalies that
generally indicate a bogus or misbehaving server that may cause trouble in
the infrastructure (e.g. cause connections to be aborted early, inducing
frequent TLS renegotiations). These may also be caused by too large responses
that cannot fit into a single buffer, explaining HTTP 502 errors. Ideally
this number should remain zero, though it's generally fine if it remains very
low compared to the total number of requests. These values should normally
not be considered as alarming (especially small ones), though a sudden jump
may indicate an anomaly somewhere. Not all protocol multiplexers measure this
metric and the only way to get more details about the events is to enable
traces to capture all exchanges.
bc_http_major : integer
Returns the backend connection's HTTP major version encoding, which may be 1
for HTTP/0.9 to HTTP/1.1 or 2 for HTTP/2. Note, this is based on the on-wire
@ -21411,6 +21427,23 @@ fc_fackets : integer
not TCP or if the operating system does not support TCP_INFO, for example
Linux kernels before 2.4, the sample fetch fails.
fc_glitches : integer
Returns the number of protocol glitches counted on the frontend connection.
These generally cover protocol violations as well as small anomalies that
generally indicate a bogus or misbehaving client that may cause trouble in
the infrastructure, such as excess of errors in the logs, or many connections
being aborted early, inducing frequent TLS renegotiations. These may also be
caused by too large requests that cannot fit into a single buffer, explaining
HTTP 400 errors. Ideally this number should remain zero, though it may be
possible that some browsers playing with the protocol boundaries trigger it
once in a while. These values should normally not be considered as alarming
(especially small ones), though a sudden jump may indicate an anomaly
somewhere. Large values (i.e. hundreds to thousands per connection, or as
many as the requests) may indicate a purposely built client that is trying to
fingerprint or attack the protocol stack. Not all protocol multiplexers
measure this metric, and the only way to get more details about the events is
to enable traces to capture all exchanges.
fc_http_major : integer
Reports the front connection's HTTP major version encoding, which may be 1
for HTTP/0.9 to HTTP/1.1 or 2 for HTTP/2. Note, this is based on the on-wire

View File

@ -0,0 +1,108 @@
# This test verifies that H2 anomalies counted as glitches are properly detected
# and fetched.
varnishtest "h2 glitches"
feature ignore_unknown_macro
# haproxy frontend
haproxy hap -conf {
defaults
mode http
listen fe1
bind "fd@${fe1}" proto h2
http-request return status 200 hdr x-glitches %[fc_glitches]
} -start
# valid request: no glitch
client c1 -connect ${hap_fe1_sock} {
txpri
stream 0 {
txsettings
rxsettings
txsettings -ack
rxsettings
expect settings.ack == true
} -run
stream 1 {
txreq \
-method "GET" \
-scheme "http" \
-url "/"
rxresp
expect resp.status == 200
expect resp.http.x-glitches == 0
} -run
stream 3 {
txreq \
-method "GET" \
-scheme "http" \
-url "/"
rxresp
expect resp.status == 200
expect resp.http.x-glitches == 0
} -run
} -run
# invalid path: => req decoding error => glitch++
client c2-path -connect ${hap_fe1_sock} {
txpri
stream 0 {
txsettings
rxsettings
txsettings -ack
rxsettings
expect settings.ack == true
} -run
stream 1 {
txreq \
-method "GET" \
-scheme "http" \
-url "hello-world"
rxrst
} -run
stream 3 {
txreq \
-method "GET" \
-scheme "http" \
-url "/"
rxresp
expect resp.status == 200
expect resp.http.x-glitches == 1
} -run
} -run
# invalid scheme: blocked at HTX layer, not counted
client c3-scheme -connect ${hap_fe1_sock} {
txpri
stream 0 {
txsettings
rxsettings
txsettings -ack
rxsettings
expect settings.ack == true
} -run
stream 1 {
txreq \
-method "GET" \
-scheme "http://localhost/?" \
-url "/"
rxresp
expect resp.status == 400
} -run
stream 3 {
txreq \
-method "GET" \
-scheme "http" \
-url "/"
rxresp
expect resp.status == 200
expect resp.http.x-glitches == 0
} -run
} -run

View File

@ -2242,6 +2242,40 @@ int conn_append_debug_info(struct buffer *buf, const struct connection *conn, co
return buf->data - old_len;
}
/* return the number of glitches experienced on the mux connection. */
static int
smp_fetch_fc_glitches(const struct arg *args, struct sample *smp, const char *kw, void *private)
{
struct connection *conn = NULL;
int ret;
if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
conn = (kw[0] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
else
conn = (kw[0] != 'b') ? objt_conn(smp->sess->origin) :
smp->strm ? sc_conn(smp->strm->scb) : NULL;
/* No connection or a connection with an unsupported mux */
if (!conn || (conn->mux && !conn->mux->ctl))
return 0;
/* Mux not installed yet, this may change */
if (!conn->mux) {
smp->flags |= SMP_F_MAY_CHANGE;
return 0;
}
ret = conn->mux->ctl(conn, MUX_CTL_GET_GLITCHES, NULL);
if (ret < 0) {
/* not supported by the mux */
return 0;
}
smp->data.type = SMP_T_SINT;
smp->data.u.sint = ret;
return 1;
}
/* return the major HTTP version as 1 or 2 depending on how the request arrived
* before being processed.
*
@ -2488,9 +2522,11 @@ int smp_fetch_fc_err_str(const struct arg *args, struct sample *smp, const char
static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
{ "bc_err", smp_fetch_fc_err, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV },
{ "bc_err_str", smp_fetch_fc_err_str, 0, NULL, SMP_T_STR, SMP_USE_L4SRV },
{ "bc_glitches", smp_fetch_fc_glitches, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV },
{ "bc_http_major", smp_fetch_fc_http_major, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV },
{ "fc_err", smp_fetch_fc_err, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
{ "fc_err_str", smp_fetch_fc_err_str, 0, NULL, SMP_T_STR, SMP_USE_L4CLI },
{ "fc_glitches", smp_fetch_fc_glitches, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
{ "fc_http_major", smp_fetch_fc_http_major, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
{ "fc_rcvd_proxy", smp_fetch_fc_rcvd_proxy, 0, NULL, SMP_T_BOOL, SMP_USE_L4CLI },
{ "fc_pp_authority", smp_fetch_fc_pp_authority, 0, NULL, SMP_T_STR, SMP_USE_L4CLI },