From ae526784444aeadbde5f109c8c6f9c8f9d48477d Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Thu, 4 Mar 2010 20:34:23 +0100 Subject: [PATCH] [STATS] count transfer aborts caused by client and by server Often we need to understand why some transfers were aborted or what constitutes server response errors. With those two counters, it is now possible to detect an unexpected transfer abort during a data phase (eg: too short HTTP response), and to know what part of the server response errors may in fact be assigned to aborted transfers. --- doc/configuration.txt | 4 ++- include/types/counters.h | 2 ++ src/dumpstats.c | 38 ++++++++++++++++++++----- src/proto_http.c | 6 ++++ src/session.c | 60 +++++++++++++++++++++++++++++++++------- 5 files changed, 92 insertions(+), 18 deletions(-) diff --git a/doc/configuration.txt b/doc/configuration.txt index 394a46f1b..761edfb57 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -7584,7 +7584,7 @@ page. Both means provide a CSV format whose fields follow. 11. dresp: denied responses 12. ereq: request errors 13. econ: connection errors - 14. eresp: response errors + 14. eresp: response errors (among which srv_abrt) 15. wretr: retries (warning) 16. wredis: redispatches (warning) 17. status: status (UP/DOWN/NOLB/MAINT/MAINT(via)...) @@ -7635,6 +7635,8 @@ page. Both means provide a CSV format whose fields follow. 46. req_rate: HTTP requests per second over last elapsed second 47. req_rate_max: max number of HTTP requests per second observed 48. req_tot: total number of HTTP requests received + 49. cli_abrt: number of data transfers aborted by the client + 50. srv_abrt: number of data transfers aborted by the server (inc. in eresp) 9.2. Unix Socket commands diff --git a/include/types/counters.h b/include/types/counters.h index 33fbb5cde..6e620e186 100644 --- a/include/types/counters.h +++ b/include/types/counters.h @@ -47,6 +47,7 @@ struct pxcounters { } fe, be; /* FE and BE stats */ long long failed_conns, failed_resp; /* failed connect() and responses */ + long long cli_aborts, srv_aborts; /* aborted responses during DATA phase due to client or server */ long long retries, redispatches; /* retried and redispatched connections */ }; @@ -74,6 +75,7 @@ struct srvcounters { long long bytes_out; /* number of bytes transferred from the server to the client */ long long failed_conns, failed_resp; /* failed connect() and responses */ + long long cli_aborts, srv_aborts; /* aborted responses during DATA phase due to client or server */ long long retries, redispatches; /* retried and redispatched connections */ long long failed_secu; /* blocked responses because of security concerns */ diff --git a/src/dumpstats.c b/src/dumpstats.c index d3d036b02..b2967fc41 100644 --- a/src/dumpstats.c +++ b/src/dumpstats.c @@ -248,7 +248,8 @@ int print_csv_header(struct chunk *msg) "rate,rate_lim,rate_max," "check_status,check_code,check_duration," "hrsp_1xx,hrsp_2xx,hrsp_3xx,hrsp_4xx,hrsp_5xx,hrsp_other,hanafail," - "req_rate, req_rate_max, req_tot," + "req_rate,req_rate_max,req_tot," + "cli_abrt,srv_abrt," "\n"); } @@ -1543,6 +1544,9 @@ int stats_dump_proxy(struct session *s, struct proxy *px, struct uri_auth *uri) read_freq_ctr(&px->fe_req_per_sec), px->counters.fe_rps_max, px->counters.cum_fe_req); + /* errors: cli_aborts, srv_aborts */ + chunk_printf(&msg, ",,"); + /* finish with EOL */ chunk_printf(&msg, "\n"); } @@ -1672,6 +1676,8 @@ int stats_dump_proxy(struct session *s, struct proxy *px, struct uri_auth *uri) "," /* requests : req_rate, req_rate_max, req_tot, */ ",,," + /* errors: cli_aborts, srv_aborts */ + ",," "\n", px->id, l->name, l->nbconn, l->counters->conn_max, @@ -1828,14 +1834,19 @@ int stats_dump_proxy(struct session *s, struct proxy *px, struct uri_auth *uri) "%s%s" /* denied: req, resp */ "%s" - /* errors : request, connect, response */ - "%s%s\n" + /* errors : request, connect */ + "%s" + /* errors : response */ + "%s" /* warnings: retries, redispatches */ "%lld%lld" "", U2H0(sv->counters.bytes_in), U2H1(sv->counters.bytes_out), U2H2(sv->counters.failed_secu), - U2H3(sv->counters.failed_conns), U2H4(sv->counters.failed_resp), + U2H3(sv->counters.failed_conns), + U2H4(sv->counters.cli_aborts), + U2H5(sv->counters.srv_aborts), + U2H6(sv->counters.failed_resp), sv->counters.retries, sv->counters.redispatches); /* status, lest check */ @@ -2062,6 +2073,10 @@ int stats_dump_proxy(struct session *s, struct proxy *px, struct uri_auth *uri) /* requests : req_rate, req_rate_max, req_tot, */ chunk_printf(&msg, ",,,"); + /* errors: cli_aborts, srv_aborts */ + chunk_printf(&msg, "%lld,%lld,", + sv->counters.cli_aborts, sv->counters.srv_aborts); + /* finish with EOL */ chunk_printf(&msg, "\n"); } @@ -2150,8 +2165,10 @@ int stats_dump_proxy(struct session *s, struct proxy *px, struct uri_auth *uri) chunk_printf(&msg, /* denied: req, resp */ "%s%s" - /* errors : request, connect, response */ - "%s%s\n" + /* errors : request, connect */ + "%s" + /* errors : response */ + "%s" /* warnings: retries, redispatches */ "%lld%lld" /* backend status: reflect backend status (up/down): we display UP @@ -2162,7 +2179,10 @@ int stats_dump_proxy(struct session *s, struct proxy *px, struct uri_auth *uri) "%d%d" "", U2H0(px->counters.denied_req), U2H1(px->counters.denied_resp), - U2H2(px->counters.failed_conns), U2H3(px->counters.failed_resp), + U2H2(px->counters.failed_conns), + U2H3(px->counters.cli_aborts), + U2H4(px->counters.srv_aborts), + U2H5(px->counters.failed_resp), px->counters.retries, px->counters.redispatches, human_time(now.tv_sec - px->last_change, 1), (px->lbprm.tot_weight > 0 || !px->srv) ? "UP" : @@ -2243,6 +2263,10 @@ int stats_dump_proxy(struct session *s, struct proxy *px, struct uri_auth *uri) /* requests : req_rate, req_rate_max, req_tot, */ chunk_printf(&msg, ",,,"); + /* errors: cli_aborts, srv_aborts */ + chunk_printf(&msg, "%lld,%lld,", + px->counters.cli_aborts, px->counters.srv_aborts); + /* finish with EOL */ chunk_printf(&msg, "\n"); diff --git a/src/proto_http.c b/src/proto_http.c index 72d2da9ec..3db8ac4a7 100644 --- a/src/proto_http.c +++ b/src/proto_http.c @@ -3971,6 +3971,9 @@ int http_sync_res_state(struct session *s) } else if (buf->flags & BF_SHUTW) { txn->rsp.msg_state = HTTP_MSG_ERROR; + s->be->counters.cli_aborts++; + if (s->srv) + s->srv->counters.cli_aborts++; goto wait_other_side; } } @@ -5071,6 +5074,9 @@ int http_response_forward_body(struct session *s, struct buffer *res, int an_bit if (res->flags & BF_SHUTR) { if (!(s->flags & SN_ERR_MASK)) s->flags |= SN_ERR_SRVCL; + s->be->counters.srv_aborts++; + if (s->srv) + s->srv->counters.srv_aborts++; goto return_bad_res; } diff --git a/src/session.c b/src/session.c index b60214745..60d8d2abb 100644 --- a/src/session.c +++ b/src/session.c @@ -893,6 +893,9 @@ struct task *process_session(struct task *t) s->si[0].shutw(&s->si[0]); stream_int_report_error(&s->si[0]); if (!(s->req->analysers) && !(s->rep->analysers)) { + s->be->counters.cli_aborts++; + if (s->srv) + s->srv->counters.cli_aborts++; if (!(s->flags & SN_ERR_MASK)) s->flags |= SN_ERR_CLICL; if (!(s->flags & SN_FINST_MASK)) @@ -910,6 +913,9 @@ struct task *process_session(struct task *t) if (s->srv) s->srv->counters.failed_resp++; if (!(s->req->analysers) && !(s->rep->analysers)) { + s->be->counters.srv_aborts++; + if (s->srv) + s->srv->counters.srv_aborts++; if (!(s->flags & SN_ERR_MASK)) s->flags |= SN_ERR_SRVCL; if (!(s->flags & SN_FINST_MASK)) @@ -1212,33 +1218,67 @@ resync_stream_interface: /* - * Now we propagate unhandled errors to the session + * Now we propagate unhandled errors to the session. Normally + * we're just in a data phase here since it means we have not + * seen any analyser who could set an error status. */ if (!(s->flags & SN_ERR_MASK)) { if (s->req->flags & (BF_READ_ERROR|BF_READ_TIMEOUT|BF_WRITE_ERROR|BF_WRITE_TIMEOUT)) { /* Report it if the client got an error or a read timeout expired */ s->req->analysers = 0; - if (s->req->flags & BF_READ_ERROR) + if (s->req->flags & BF_READ_ERROR) { + s->be->counters.cli_aborts++; + if (s->srv) + s->srv->counters.cli_aborts++; s->flags |= SN_ERR_CLICL; - else if (s->req->flags & BF_READ_TIMEOUT) + } + else if (s->req->flags & BF_READ_TIMEOUT) { + s->be->counters.cli_aborts++; + if (s->srv) + s->srv->counters.cli_aborts++; s->flags |= SN_ERR_CLITO; - else if (s->req->flags & BF_WRITE_ERROR) + } + else if (s->req->flags & BF_WRITE_ERROR) { + s->be->counters.srv_aborts++; + if (s->srv) + s->srv->counters.srv_aborts++; s->flags |= SN_ERR_SRVCL; - else + } + else { + s->be->counters.srv_aborts++; + if (s->srv) + s->srv->counters.srv_aborts++; s->flags |= SN_ERR_SRVTO; + } sess_set_term_flags(s); } else if (s->rep->flags & (BF_READ_ERROR|BF_READ_TIMEOUT|BF_WRITE_ERROR|BF_WRITE_TIMEOUT)) { /* Report it if the server got an error or a read timeout expired */ s->rep->analysers = 0; - if (s->rep->flags & BF_READ_ERROR) + if (s->rep->flags & BF_READ_ERROR) { + s->be->counters.srv_aborts++; + if (s->srv) + s->srv->counters.srv_aborts++; s->flags |= SN_ERR_SRVCL; - else if (s->rep->flags & BF_READ_TIMEOUT) + } + else if (s->rep->flags & BF_READ_TIMEOUT) { + s->be->counters.srv_aborts++; + if (s->srv) + s->srv->counters.srv_aborts++; s->flags |= SN_ERR_SRVTO; - else if (s->rep->flags & BF_WRITE_ERROR) + } + else if (s->rep->flags & BF_WRITE_ERROR) { + s->be->counters.cli_aborts++; + if (s->srv) + s->srv->counters.cli_aborts++; s->flags |= SN_ERR_CLICL; - else - s->flags |= SN_ERR_CLITO; + } + else { + s->be->counters.cli_aborts++; + if (s->srv) + s->srv->counters.cli_aborts++; + s->flags |= SN_ERR_CLITO; + } sess_set_term_flags(s); } }