MINOR: stream: maintain per-stream counters of the number of passes on code

Process_stream() is a complex function and a few times some lopos were
either witnessed or suspected. Each time this happens it's extremely
difficult to figure why because it involves combinations of analysers,
filters, errors etc.

Let's at least maintain a set of 4 counters per stream that report the
number of times we've been through each of the 4 most important blocks
(stconn changes, request analysers, response analysers, and propagation
of changes down). These ones are stored in the stream and reported in
"show sess all", just like they will be reported in panic dumps.
This commit is contained in:
Willy Tarreau 2024-10-22 16:35:04 +02:00
parent ce314cfb39
commit 37d5c6fe3a
2 changed files with 12 additions and 1 deletions

View File

@ -246,6 +246,10 @@ struct stream {
uint64_t lat_time; /* total latency time experienced */ uint64_t lat_time; /* total latency time experienced */
uint64_t cpu_time; /* total CPU time consumed */ uint64_t cpu_time; /* total CPU time consumed */
struct freq_ctr call_rate; /* stream task call rate without making progress */ struct freq_ctr call_rate; /* stream task call rate without making progress */
uint32_t passes_stconn; /* number of passes on the stconn evaluation code */
uint32_t passes_reqana; /* number of passes on the req analysers block */
uint32_t passes_resana; /* number of passes on the res analysers block */
uint32_t passes_propag; /* number of passes on the shut/err propag code */
unsigned short max_retries; /* Maximum number of connection retried (=0 is backend is not set) */ unsigned short max_retries; /* Maximum number of connection retried (=0 is backend is not set) */
short store_count; short store_count;

View File

@ -421,6 +421,7 @@ struct stream *stream_new(struct session *sess, struct stconn *sc, struct buffer
s->lat_time = s->cpu_time = 0; s->lat_time = s->cpu_time = 0;
s->call_rate.curr_tick = s->call_rate.curr_ctr = s->call_rate.prev_ctr = 0; s->call_rate.curr_tick = s->call_rate.curr_ctr = s->call_rate.prev_ctr = 0;
s->passes_stconn = s->passes_reqana = s->passes_resana = s->passes_propag = 0;
s->pcli_next_pid = 0; s->pcli_next_pid = 0;
s->pcli_flags = 0; s->pcli_flags = 0;
s->unique_id = IST_NULL; s->unique_id = IST_NULL;
@ -1840,6 +1841,7 @@ struct task *process_stream(struct task *t, void *context, unsigned int state)
* the client cannot have connect (hence retryable) errors. Also, the * the client cannot have connect (hence retryable) errors. Also, the
* connection setup code must be able to deal with any type of abort. * connection setup code must be able to deal with any type of abort.
*/ */
s->passes_stconn++;
srv = objt_server(s->target); srv = objt_server(s->target);
if (unlikely(scf->flags & SC_FL_ERROR)) { if (unlikely(scf->flags & SC_FL_ERROR)) {
if (sc_state_in(scf->state, SC_SB_EST|SC_SB_DIS)) { if (sc_state_in(scf->state, SC_SB_EST|SC_SB_DIS)) {
@ -1969,6 +1971,7 @@ struct task *process_stream(struct task *t, void *context, unsigned int state)
*/ */
resync_request: resync_request:
s->passes_reqana++;
/* Analyse request */ /* Analyse request */
if (((req->flags & ~rqf_last) & CF_MASK_ANALYSER) || if (((req->flags & ~rqf_last) & CF_MASK_ANALYSER) ||
((scf->flags ^ scf_flags) & (SC_FL_EOS|SC_FL_ABRT_DONE|SC_FL_ABRT_WANTED)) || ((scf->flags ^ scf_flags) & (SC_FL_EOS|SC_FL_ABRT_DONE|SC_FL_ABRT_WANTED)) ||
@ -2073,6 +2076,7 @@ struct task *process_stream(struct task *t, void *context, unsigned int state)
req_ana_back = req->analysers; req_ana_back = req->analysers;
resync_response: resync_response:
s->passes_resana++;
/* Analyse response */ /* Analyse response */
if (((res->flags & ~rpf_last) & CF_MASK_ANALYSER) || if (((res->flags & ~rpf_last) & CF_MASK_ANALYSER) ||
@ -2155,7 +2159,7 @@ struct task *process_stream(struct task *t, void *context, unsigned int state)
* both buffers. * both buffers.
*/ */
s->passes_propag++;
/* /*
* Now we propagate unhandled errors to the stream. Normally * Now we propagate unhandled errors to the stream. Normally
* we're just in a data phase here since it means we have not * we're just in a data phase here since it means we have not
@ -3308,6 +3312,9 @@ void strm_dump_to_buffer(struct buffer *buf, const struct stream *strm, const ch
strm->conn_err_type, strm->srv_conn, strm->pend_pos, strm->conn_err_type, strm->srv_conn, strm->pend_pos,
LIST_INLIST(&strm->buffer_wait.list), strm->stream_epoch); LIST_INLIST(&strm->buffer_wait.list), strm->stream_epoch);
chunk_appendf(buf, "%s p_stc=%u p_req=%u p_res=%u p_prp=%u\n", pfx,
strm->passes_stconn, strm->passes_reqana, strm->passes_resana, strm->passes_propag);
chunk_appendf(buf, chunk_appendf(buf,
"%s frontend=%s (id=%u mode=%s), listener=%s (id=%u)", pfx, "%s frontend=%s (id=%u mode=%s), listener=%s (id=%u)", pfx,
HA_ANON_STR(anon_key, strm_fe(strm)->id), strm_fe(strm)->uuid, proxy_mode_str(strm_fe(strm)->mode), HA_ANON_STR(anon_key, strm_fe(strm)->id), strm_fe(strm)->uuid, proxy_mode_str(strm_fe(strm)->mode),