From f853320b44b3e0daa396322f6016fbb566592004 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sat, 16 Aug 2008 14:55:08 +0200 Subject: [PATCH] [MINOR] term_trace: add better instrumentations to trace the code A new member has been added to the struct session. It keeps a trace of what block of code performs a close or a shutdown on a socket, and in what sequence. This is extremely convenient for post-mortem analysis where flag combinations and states seem impossible. A new ABORT_NOW() macro has also been added to make the code immediately segfault where called. --- include/common/debug.h | 5 ++++ include/proto/session.h | 6 +++++ include/types/session.h | 57 +++++++++++++++++++++++++++++++++++++++++ src/client.c | 3 +++ src/proto_http.c | 28 ++++++++++++++++++++ src/proto_uxst.c | 1 + 6 files changed, 100 insertions(+) diff --git a/include/common/debug.h b/include/common/debug.h index cbd752ef6..fe05a444a 100644 --- a/include/common/debug.h +++ b/include/common/debug.h @@ -36,6 +36,11 @@ #define FSM_PRINTF(x...) #endif +/* This abort is more efficient than abort() because it does not mangle the + * stack and stops at the exact location we need. + */ +#define ABORT_NOW() (*(int*)0=0) + /* this one is provided for easy code tracing. * Usage: TRACE(sess||0, fmt, args...); * TRACE(sess, ""); diff --git a/include/proto/session.h b/include/proto/session.h index 348e65086..cdd01bf39 100644 --- a/include/proto/session.h +++ b/include/proto/session.h @@ -36,6 +36,12 @@ int init_session(); void session_process_counters(struct session *s); void sess_change_server(struct session *sess, struct server *newsrv); +static void inline trace_term(struct session *s, unsigned int code) +{ + s->term_trace <<= TT_BIT_SHIFT; + s->term_trace |= code; +} + #endif /* _PROTO_SESSION_H */ /* diff --git a/include/types/session.h b/include/types/session.h index c94401ae1..436f652d6 100644 --- a/include/types/session.h +++ b/include/types/session.h @@ -94,6 +94,62 @@ #define AN_RTR_ANY (AN_RTR_INSPECT|AN_RTR_HTTP_HDR|AN_RTR_HTTP_BODY) +/* Termination sequence tracing. + * + * These values have to be set into the field term_trace of a session when + * closing a session (half or full). They are only meant for post-mortem + * analysis. The value must be assigned this way : + * trace_term(s, TT_XXX); + * + * One TT_XXX value is assigned to each location in the code which may be + * involved in a connection closing. Since a full session close generally + * involves 4 steps, we will be able to read these steps afterwards by simply + * checking the code. Value TT_NONE is zero and must never be set, as it means + * the connection was not closed. Value TT_ANON must be used when no value was + * assigned to a specific code part. Never ever reuse an already assigned code + * as it will defeat the purpose of this trace. It is wise to use a per-file + * anonymous value though. + */ +#define TT_BIT_SHIFT 8 +enum { + TT_NONE = 0, + TT_ANON = 1, + TT_CLIENT = 0x10, + TT_CLIENT_1, + TT_CLIENT_2, + TT_HTTP_CLI = 0x20, + TT_HTTP_CLI_1, + TT_HTTP_CLI_2, + TT_HTTP_CLI_3, + TT_HTTP_CLI_4, + TT_HTTP_CLI_5, + TT_HTTP_CLI_6, + TT_HTTP_CLI_7, + TT_HTTP_CLI_8, + TT_HTTP_CLI_9, + TT_HTTP_CLI_10, + TT_HTTP_SRV = 0x30, + TT_HTTP_SRV_1, + TT_HTTP_SRV_2, + TT_HTTP_SRV_3, + TT_HTTP_SRV_4, + TT_HTTP_SRV_5, + TT_HTTP_SRV_6, + TT_HTTP_SRV_7, + TT_HTTP_SRV_8, + TT_HTTP_SRV_9, + TT_HTTP_SRV_10, + TT_HTTP_SRV_11, + TT_HTTP_SRV_12, + TT_HTTP_SRV_13, + TT_HTTP_SRV_14, + TT_HTTP_CNT = 0x40, + TT_HTTP_CNT_1, + TT_HTTP_URI = 0x50, + TT_HTTP_URI_1, +}; + + /* * Note: some session flags have dependencies : * - SN_DIRECT cannot exist without SN_ASSIGNED, because a server is @@ -117,6 +173,7 @@ struct session { int srv_state; /* state of the server side */ int conn_retries; /* number of connect retries left */ int flags; /* some flags describing the session */ + unsigned term_trace; /* term trace: 4*8 bits indicating which part of the code closed */ unsigned int analysis; /* bit field indicating remaining analysis to perform on data */ struct buffer *req; /* request buffer */ struct buffer *rep; /* response buffer */ diff --git a/src/client.c b/src/client.c index f5d6bb9d9..77b1d96c6 100644 --- a/src/client.c +++ b/src/client.c @@ -108,6 +108,7 @@ int event_accept(int fd) { s->flags = 0; s->analysis = 0; + s->term_trace = 0; /* if this session comes from a known monitoring system, we want to ignore * it as soon as possible, which means closing it immediately for TCP. @@ -380,11 +381,13 @@ int event_accept(int fd) { */ struct chunk msg = { .str = "HTTP/1.0 200 OK\r\n\r\n", .len = 19 }; client_retnclose(s, &msg); /* forge a 200 response */ + trace_term(s, TT_CLIENT_1); t->expire = s->rep->wex; } else if (p->mode == PR_MODE_HEALTH) { /* health check mode, no client reading */ struct chunk msg = { .str = "OK\n", .len = 3 }; client_retnclose(s, &msg); /* forge an "OK" response */ + trace_term(s, TT_CLIENT_2); t->expire = s->rep->wex; } else { diff --git a/src/proto_http.c b/src/proto_http.c index 07bdf4061..6121a93c1 100644 --- a/src/proto_http.c +++ b/src/proto_http.c @@ -3016,6 +3016,7 @@ int process_cli(struct session *t) buffer_shutw_done(rep); fd_delete(t->cli_fd); t->cli_state = CL_STCLOSE; + trace_term(t, TT_HTTP_CLI_1); if (!(t->flags & SN_ERR_MASK)) t->flags |= SN_ERR_CLICL; if (!(t->flags & SN_FINST_MASK)) { @@ -3036,10 +3037,12 @@ int process_cli(struct session *t) buffer_shutr_done(req); if (!(rep->flags & BF_SHUTW_STATUS)) { EV_FD_CLR(t->cli_fd, DIR_RD); + trace_term(t, TT_HTTP_CLI_2); } else { /* output was already closed */ fd_delete(t->cli_fd); t->cli_state = CL_STCLOSE; + trace_term(t, TT_HTTP_CLI_3); } return 1; } @@ -3057,9 +3060,11 @@ int process_cli(struct session *t) /* FIXME: is this still true ? */ EV_FD_SET(t->cli_fd, DIR_RD); req->rex = tick_add_ifset(now_ms, t->fe->timeout.client); + trace_term(t, TT_HTTP_CLI_4); } else { fd_delete(t->cli_fd); t->cli_state = CL_STCLOSE; + trace_term(t, TT_HTTP_CLI_5); } return 1; } @@ -3069,10 +3074,12 @@ int process_cli(struct session *t) req->flags |= BF_READ_TIMEOUT; if (!(rep->flags & BF_SHUTW_STATUS)) { EV_FD_CLR(t->cli_fd, DIR_RD); + trace_term(t, TT_HTTP_CLI_6); } else { /* output was already closed */ fd_delete(t->cli_fd); t->cli_state = CL_STCLOSE; + trace_term(t, TT_HTTP_CLI_7); } if (!(t->flags & SN_ERR_MASK)) t->flags |= SN_ERR_CLITO; @@ -3099,9 +3106,11 @@ int process_cli(struct session *t) /* FIXME: is this still true ? */ EV_FD_SET(t->cli_fd, DIR_RD); req->rex = tick_add_ifset(now_ms, t->fe->timeout.client); + trace_term(t, TT_HTTP_CLI_8); } else { fd_delete(t->cli_fd); t->cli_state = CL_STCLOSE; + trace_term(t, TT_HTTP_CLI_9); } if (!(t->flags & SN_ERR_MASK)) @@ -3144,6 +3153,7 @@ int process_cli(struct session *t) buffer_shutw_done(rep); fd_delete(t->cli_fd); t->cli_state = CL_STCLOSE; + trace_term(t, TT_HTTP_CLI_10); return 1; } } @@ -3226,6 +3236,7 @@ int process_srv(struct session *t) else srv_close_with_err(t, SN_ERR_CLICL, t->pend_pos ? SN_FINST_Q : SN_FINST_C, 0, NULL); + trace_term(t, TT_HTTP_SRV_1); return 1; } else if (req->flags & BF_MAY_CONNECT) { @@ -3248,6 +3259,7 @@ int process_srv(struct session *t) t->logs.t_queue = tv_ms_elapsed(&t->logs.tv_accept, &now); srv_close_with_err(t, SN_ERR_PRXCOND, SN_FINST_T, 500, error_message(t, HTTP_ERR_500)); + trace_term(t, TT_HTTP_SRV_2); return 1; } @@ -3265,6 +3277,7 @@ int process_srv(struct session *t) t->logs.t_queue = tv_ms_elapsed(&t->logs.tv_accept, &now); srv_close_with_err(t, SN_ERR_SRVTO, SN_FINST_Q, 503, error_message(t, HTTP_ERR_503)); + trace_term(t, TT_HTTP_SRV_3); if (t->srv) t->srv->failed_conns++; t->be->failed_conns++; @@ -3315,6 +3328,8 @@ int process_srv(struct session *t) rdr.len += 4; srv_close_with_err(t, SN_ERR_PRXCOND, SN_FINST_C, 302, &rdr); + trace_term(t, TT_HTTP_SRV_3); + /* FIXME: we should increase a counter of redirects per server and per backend. */ if (t->srv) t->srv->cum_sess++; @@ -3324,6 +3339,7 @@ int process_srv(struct session *t) t->fe->failed_req++; srv_close_with_err(t, SN_ERR_PRXCOND, SN_FINST_C, 400, error_message(t, HTTP_ERR_400)); + trace_term(t, TT_HTTP_SRV_4); return 1; } @@ -3356,6 +3372,7 @@ int process_srv(struct session *t) * overwrite the client_retnclose() output. */ srv_close_with_err(t, SN_ERR_CLICL, SN_FINST_C, 0, NULL); + trace_term(t, TT_HTTP_SRV_5); return 1; } if (!(req->flags & BF_WRITE_STATUS) && !tick_is_expired(req->cex, now_ms)) { @@ -3498,6 +3515,7 @@ int process_srv(struct session *t) } t->be->failed_resp++; t->srv_state = SV_STCLOSE; + trace_term(t, TT_HTTP_SRV_6); if (!(t->flags & SN_ERR_MASK)) t->flags |= SN_ERR_SRVCL; if (!(t->flags & SN_FINST_MASK)) @@ -3514,6 +3532,7 @@ int process_srv(struct session *t) buffer_shutr_done(rep); if (!(req->flags & BF_SHUTW_STATUS)) { EV_FD_CLR(t->srv_fd, DIR_RD); + trace_term(t, TT_HTTP_SRV_7); } else { /* output was already closed */ fd_delete(t->srv_fd); @@ -3522,6 +3541,7 @@ int process_srv(struct session *t) sess_change_server(t, NULL); } t->srv_state = SV_STCLOSE; + trace_term(t, TT_HTTP_SRV_8); if (may_dequeue_tasks(t->srv, t->be)) process_srv_queue(t->srv); @@ -3542,6 +3562,7 @@ int process_srv(struct session *t) if (!(rep->flags & BF_SHUTR_STATUS)) { EV_FD_CLR(t->srv_fd, DIR_WR); shutdown(t->srv_fd, SHUT_WR); + trace_term(t, TT_HTTP_SRV_9); /* We must ensure that the read part is still alive when switching to shutw */ /* FIXME: is this still true ? */ EV_FD_SET(t->srv_fd, DIR_RD); @@ -3553,6 +3574,7 @@ int process_srv(struct session *t) sess_change_server(t, NULL); } t->srv_state = SV_STCLOSE; + trace_term(t, TT_HTTP_SRV_10); if (may_dequeue_tasks(t->srv, t->be)) process_srv_queue(t->srv); @@ -3565,6 +3587,7 @@ int process_srv(struct session *t) rep->flags |= BF_READ_TIMEOUT; if (!(req->flags & BF_SHUTW_STATUS)) { EV_FD_CLR(t->srv_fd, DIR_RD); + trace_term(t, TT_HTTP_SRV_11); } else { fd_delete(t->srv_fd); if (t->srv) { @@ -3572,6 +3595,7 @@ int process_srv(struct session *t) sess_change_server(t, NULL); } t->srv_state = SV_STCLOSE; + trace_term(t, TT_HTTP_SRV_12); if (may_dequeue_tasks(t->srv, t->be)) process_srv_queue(t->srv); @@ -3589,6 +3613,7 @@ int process_srv(struct session *t) if (!(rep->flags & BF_SHUTR_STATUS)) { EV_FD_CLR(t->srv_fd, DIR_WR); shutdown(t->srv_fd, SHUT_WR); + trace_term(t, TT_HTTP_SRV_13); /* We must ensure that the read part is still alive when switching to shutw */ /* FIXME: is this still needed ? */ EV_FD_SET(t->srv_fd, DIR_RD); @@ -3600,6 +3625,7 @@ int process_srv(struct session *t) sess_change_server(t, NULL); } t->srv_state = SV_STCLOSE; + trace_term(t, TT_HTTP_SRV_14); if (may_dequeue_tasks(t->srv, t->be)) process_srv_queue(t->srv); @@ -3689,6 +3715,7 @@ int produce_content(struct session *s) /* unknown data source or internal error */ s->txn.status = 500; client_retnclose(s, error_message(s, HTTP_ERR_500)); + trace_term(s, TT_HTTP_CNT_1); if (!(s->flags & SN_ERR_MASK)) s->flags |= SN_ERR_PRXCOND; if (!(s->flags & SN_FINST_MASK)) @@ -5056,6 +5083,7 @@ int stats_check_uri_auth(struct session *t, struct proxy *backend) msg.len = sprintf(trash, HTTP_401_fmt, uri_auth->auth_realm); txn->status = 401; client_retnclose(t, &msg); + trace_term(t, TT_HTTP_URI_1); t->analysis &= ~AN_REQ_ANY; if (!(t->flags & SN_ERR_MASK)) t->flags |= SN_ERR_PRXCOND; diff --git a/src/proto_uxst.c b/src/proto_uxst.c index 9053805d2..f8026cee2 100644 --- a/src/proto_uxst.c +++ b/src/proto_uxst.c @@ -413,6 +413,7 @@ int uxst_event_accept(int fd) { s->flags = 0; s->analysis = 0; + s->term_trace = 0; if ((t = pool_alloc2(pool2_task)) == NULL) { Alert("out of memory in uxst_event_accept().\n");