[MINOR] term_trace: add better instrumentations to trace the code

A new member has been added to the struct session. It keeps a trace
of what block of code performs a close or a shutdown on a socket, and
in what sequence. This is extremely convenient for post-mortem analysis
where flag combinations and states seem impossible. A new ABORT_NOW()
macro has also been added to make the code immediately segfault where
called.
This commit is contained in:
Willy Tarreau 2008-08-16 14:55:08 +02:00
parent 1ae3a057df
commit f853320b44
6 changed files with 100 additions and 0 deletions

View File

@ -36,6 +36,11 @@
#define FSM_PRINTF(x...)
#endif
/* This abort is more efficient than abort() because it does not mangle the
* stack and stops at the exact location we need.
*/
#define ABORT_NOW() (*(int*)0=0)
/* this one is provided for easy code tracing.
* Usage: TRACE(sess||0, fmt, args...);
* TRACE(sess, "");

View File

@ -36,6 +36,12 @@ int init_session();
void session_process_counters(struct session *s);
void sess_change_server(struct session *sess, struct server *newsrv);
static void inline trace_term(struct session *s, unsigned int code)
{
s->term_trace <<= TT_BIT_SHIFT;
s->term_trace |= code;
}
#endif /* _PROTO_SESSION_H */
/*

View File

@ -94,6 +94,62 @@
#define AN_RTR_ANY (AN_RTR_INSPECT|AN_RTR_HTTP_HDR|AN_RTR_HTTP_BODY)
/* Termination sequence tracing.
*
* These values have to be set into the field term_trace of a session when
* closing a session (half or full). They are only meant for post-mortem
* analysis. The value must be assigned this way :
* trace_term(s, TT_XXX);
*
* One TT_XXX value is assigned to each location in the code which may be
* involved in a connection closing. Since a full session close generally
* involves 4 steps, we will be able to read these steps afterwards by simply
* checking the code. Value TT_NONE is zero and must never be set, as it means
* the connection was not closed. Value TT_ANON must be used when no value was
* assigned to a specific code part. Never ever reuse an already assigned code
* as it will defeat the purpose of this trace. It is wise to use a per-file
* anonymous value though.
*/
#define TT_BIT_SHIFT 8
enum {
TT_NONE = 0,
TT_ANON = 1,
TT_CLIENT = 0x10,
TT_CLIENT_1,
TT_CLIENT_2,
TT_HTTP_CLI = 0x20,
TT_HTTP_CLI_1,
TT_HTTP_CLI_2,
TT_HTTP_CLI_3,
TT_HTTP_CLI_4,
TT_HTTP_CLI_5,
TT_HTTP_CLI_6,
TT_HTTP_CLI_7,
TT_HTTP_CLI_8,
TT_HTTP_CLI_9,
TT_HTTP_CLI_10,
TT_HTTP_SRV = 0x30,
TT_HTTP_SRV_1,
TT_HTTP_SRV_2,
TT_HTTP_SRV_3,
TT_HTTP_SRV_4,
TT_HTTP_SRV_5,
TT_HTTP_SRV_6,
TT_HTTP_SRV_7,
TT_HTTP_SRV_8,
TT_HTTP_SRV_9,
TT_HTTP_SRV_10,
TT_HTTP_SRV_11,
TT_HTTP_SRV_12,
TT_HTTP_SRV_13,
TT_HTTP_SRV_14,
TT_HTTP_CNT = 0x40,
TT_HTTP_CNT_1,
TT_HTTP_URI = 0x50,
TT_HTTP_URI_1,
};
/*
* Note: some session flags have dependencies :
* - SN_DIRECT cannot exist without SN_ASSIGNED, because a server is
@ -117,6 +173,7 @@ struct session {
int srv_state; /* state of the server side */
int conn_retries; /* number of connect retries left */
int flags; /* some flags describing the session */
unsigned term_trace; /* term trace: 4*8 bits indicating which part of the code closed */
unsigned int analysis; /* bit field indicating remaining analysis to perform on data */
struct buffer *req; /* request buffer */
struct buffer *rep; /* response buffer */

View File

@ -108,6 +108,7 @@ int event_accept(int fd) {
s->flags = 0;
s->analysis = 0;
s->term_trace = 0;
/* if this session comes from a known monitoring system, we want to ignore
* it as soon as possible, which means closing it immediately for TCP.
@ -380,11 +381,13 @@ int event_accept(int fd) {
*/
struct chunk msg = { .str = "HTTP/1.0 200 OK\r\n\r\n", .len = 19 };
client_retnclose(s, &msg); /* forge a 200 response */
trace_term(s, TT_CLIENT_1);
t->expire = s->rep->wex;
}
else if (p->mode == PR_MODE_HEALTH) { /* health check mode, no client reading */
struct chunk msg = { .str = "OK\n", .len = 3 };
client_retnclose(s, &msg); /* forge an "OK" response */
trace_term(s, TT_CLIENT_2);
t->expire = s->rep->wex;
}
else {

View File

@ -3016,6 +3016,7 @@ int process_cli(struct session *t)
buffer_shutw_done(rep);
fd_delete(t->cli_fd);
t->cli_state = CL_STCLOSE;
trace_term(t, TT_HTTP_CLI_1);
if (!(t->flags & SN_ERR_MASK))
t->flags |= SN_ERR_CLICL;
if (!(t->flags & SN_FINST_MASK)) {
@ -3036,10 +3037,12 @@ int process_cli(struct session *t)
buffer_shutr_done(req);
if (!(rep->flags & BF_SHUTW_STATUS)) {
EV_FD_CLR(t->cli_fd, DIR_RD);
trace_term(t, TT_HTTP_CLI_2);
} else {
/* output was already closed */
fd_delete(t->cli_fd);
t->cli_state = CL_STCLOSE;
trace_term(t, TT_HTTP_CLI_3);
}
return 1;
}
@ -3057,9 +3060,11 @@ int process_cli(struct session *t)
/* FIXME: is this still true ? */
EV_FD_SET(t->cli_fd, DIR_RD);
req->rex = tick_add_ifset(now_ms, t->fe->timeout.client);
trace_term(t, TT_HTTP_CLI_4);
} else {
fd_delete(t->cli_fd);
t->cli_state = CL_STCLOSE;
trace_term(t, TT_HTTP_CLI_5);
}
return 1;
}
@ -3069,10 +3074,12 @@ int process_cli(struct session *t)
req->flags |= BF_READ_TIMEOUT;
if (!(rep->flags & BF_SHUTW_STATUS)) {
EV_FD_CLR(t->cli_fd, DIR_RD);
trace_term(t, TT_HTTP_CLI_6);
} else {
/* output was already closed */
fd_delete(t->cli_fd);
t->cli_state = CL_STCLOSE;
trace_term(t, TT_HTTP_CLI_7);
}
if (!(t->flags & SN_ERR_MASK))
t->flags |= SN_ERR_CLITO;
@ -3099,9 +3106,11 @@ int process_cli(struct session *t)
/* FIXME: is this still true ? */
EV_FD_SET(t->cli_fd, DIR_RD);
req->rex = tick_add_ifset(now_ms, t->fe->timeout.client);
trace_term(t, TT_HTTP_CLI_8);
} else {
fd_delete(t->cli_fd);
t->cli_state = CL_STCLOSE;
trace_term(t, TT_HTTP_CLI_9);
}
if (!(t->flags & SN_ERR_MASK))
@ -3144,6 +3153,7 @@ int process_cli(struct session *t)
buffer_shutw_done(rep);
fd_delete(t->cli_fd);
t->cli_state = CL_STCLOSE;
trace_term(t, TT_HTTP_CLI_10);
return 1;
}
}
@ -3226,6 +3236,7 @@ int process_srv(struct session *t)
else
srv_close_with_err(t, SN_ERR_CLICL, t->pend_pos ? SN_FINST_Q : SN_FINST_C, 0, NULL);
trace_term(t, TT_HTTP_SRV_1);
return 1;
}
else if (req->flags & BF_MAY_CONNECT) {
@ -3248,6 +3259,7 @@ int process_srv(struct session *t)
t->logs.t_queue = tv_ms_elapsed(&t->logs.tv_accept, &now);
srv_close_with_err(t, SN_ERR_PRXCOND, SN_FINST_T,
500, error_message(t, HTTP_ERR_500));
trace_term(t, TT_HTTP_SRV_2);
return 1;
}
@ -3265,6 +3277,7 @@ int process_srv(struct session *t)
t->logs.t_queue = tv_ms_elapsed(&t->logs.tv_accept, &now);
srv_close_with_err(t, SN_ERR_SRVTO, SN_FINST_Q,
503, error_message(t, HTTP_ERR_503));
trace_term(t, TT_HTTP_SRV_3);
if (t->srv)
t->srv->failed_conns++;
t->be->failed_conns++;
@ -3315,6 +3328,8 @@ int process_srv(struct session *t)
rdr.len += 4;
srv_close_with_err(t, SN_ERR_PRXCOND, SN_FINST_C, 302, &rdr);
trace_term(t, TT_HTTP_SRV_3);
/* FIXME: we should increase a counter of redirects per server and per backend. */
if (t->srv)
t->srv->cum_sess++;
@ -3324,6 +3339,7 @@ int process_srv(struct session *t)
t->fe->failed_req++;
srv_close_with_err(t, SN_ERR_PRXCOND, SN_FINST_C,
400, error_message(t, HTTP_ERR_400));
trace_term(t, TT_HTTP_SRV_4);
return 1;
}
@ -3356,6 +3372,7 @@ int process_srv(struct session *t)
* overwrite the client_retnclose() output.
*/
srv_close_with_err(t, SN_ERR_CLICL, SN_FINST_C, 0, NULL);
trace_term(t, TT_HTTP_SRV_5);
return 1;
}
if (!(req->flags & BF_WRITE_STATUS) && !tick_is_expired(req->cex, now_ms)) {
@ -3498,6 +3515,7 @@ int process_srv(struct session *t)
}
t->be->failed_resp++;
t->srv_state = SV_STCLOSE;
trace_term(t, TT_HTTP_SRV_6);
if (!(t->flags & SN_ERR_MASK))
t->flags |= SN_ERR_SRVCL;
if (!(t->flags & SN_FINST_MASK))
@ -3514,6 +3532,7 @@ int process_srv(struct session *t)
buffer_shutr_done(rep);
if (!(req->flags & BF_SHUTW_STATUS)) {
EV_FD_CLR(t->srv_fd, DIR_RD);
trace_term(t, TT_HTTP_SRV_7);
} else {
/* output was already closed */
fd_delete(t->srv_fd);
@ -3522,6 +3541,7 @@ int process_srv(struct session *t)
sess_change_server(t, NULL);
}
t->srv_state = SV_STCLOSE;
trace_term(t, TT_HTTP_SRV_8);
if (may_dequeue_tasks(t->srv, t->be))
process_srv_queue(t->srv);
@ -3542,6 +3562,7 @@ int process_srv(struct session *t)
if (!(rep->flags & BF_SHUTR_STATUS)) {
EV_FD_CLR(t->srv_fd, DIR_WR);
shutdown(t->srv_fd, SHUT_WR);
trace_term(t, TT_HTTP_SRV_9);
/* We must ensure that the read part is still alive when switching to shutw */
/* FIXME: is this still true ? */
EV_FD_SET(t->srv_fd, DIR_RD);
@ -3553,6 +3574,7 @@ int process_srv(struct session *t)
sess_change_server(t, NULL);
}
t->srv_state = SV_STCLOSE;
trace_term(t, TT_HTTP_SRV_10);
if (may_dequeue_tasks(t->srv, t->be))
process_srv_queue(t->srv);
@ -3565,6 +3587,7 @@ int process_srv(struct session *t)
rep->flags |= BF_READ_TIMEOUT;
if (!(req->flags & BF_SHUTW_STATUS)) {
EV_FD_CLR(t->srv_fd, DIR_RD);
trace_term(t, TT_HTTP_SRV_11);
} else {
fd_delete(t->srv_fd);
if (t->srv) {
@ -3572,6 +3595,7 @@ int process_srv(struct session *t)
sess_change_server(t, NULL);
}
t->srv_state = SV_STCLOSE;
trace_term(t, TT_HTTP_SRV_12);
if (may_dequeue_tasks(t->srv, t->be))
process_srv_queue(t->srv);
@ -3589,6 +3613,7 @@ int process_srv(struct session *t)
if (!(rep->flags & BF_SHUTR_STATUS)) {
EV_FD_CLR(t->srv_fd, DIR_WR);
shutdown(t->srv_fd, SHUT_WR);
trace_term(t, TT_HTTP_SRV_13);
/* We must ensure that the read part is still alive when switching to shutw */
/* FIXME: is this still needed ? */
EV_FD_SET(t->srv_fd, DIR_RD);
@ -3600,6 +3625,7 @@ int process_srv(struct session *t)
sess_change_server(t, NULL);
}
t->srv_state = SV_STCLOSE;
trace_term(t, TT_HTTP_SRV_14);
if (may_dequeue_tasks(t->srv, t->be))
process_srv_queue(t->srv);
@ -3689,6 +3715,7 @@ int produce_content(struct session *s)
/* unknown data source or internal error */
s->txn.status = 500;
client_retnclose(s, error_message(s, HTTP_ERR_500));
trace_term(s, TT_HTTP_CNT_1);
if (!(s->flags & SN_ERR_MASK))
s->flags |= SN_ERR_PRXCOND;
if (!(s->flags & SN_FINST_MASK))
@ -5056,6 +5083,7 @@ int stats_check_uri_auth(struct session *t, struct proxy *backend)
msg.len = sprintf(trash, HTTP_401_fmt, uri_auth->auth_realm);
txn->status = 401;
client_retnclose(t, &msg);
trace_term(t, TT_HTTP_URI_1);
t->analysis &= ~AN_REQ_ANY;
if (!(t->flags & SN_ERR_MASK))
t->flags |= SN_ERR_PRXCOND;

View File

@ -413,6 +413,7 @@ int uxst_event_accept(int fd) {
s->flags = 0;
s->analysis = 0;
s->term_trace = 0;
if ((t = pool_alloc2(pool2_task)) == NULL) {
Alert("out of memory in uxst_event_accept().\n");