MINOR: activity/cli: support sorting task profiling by total CPU time

The new "bytime" sorting criterion uses the reported CPU time instead of
the usage. This is convenient to spot tasks that are mostly reponsible
for the CPU usage in a running process. It supports both the detailed
and the aggregated format. The output looks like this:

> show profiling tasks bytime
Tasks activity:
  function                      calls   cpu_tot   cpu_avg   lat_tot   lat_avg
  qc_io_cb                     117739   1.961m    999.1us   37.45s    318.1us <- h3_snd_buf@src/h3.c:1084 tasklet_wakeup
  process_stream              7376273   1.384m    11.26us   1.013h    494.2us <- stream_new@src/stream.c:563 task_wakeup
  process_stream              8104400   1.133m    8.389us   1.130h    502.0us <- sc_notify@src/stconn.c:1209 task_wakeup
  qc_io_cb                      43280   45.76s    1.057ms   13.95s    322.3us <- qc_stream_desc_ack@src/quic_stream.c:128 tasklet_wakeup
  h1_io_cb                   11025715   24.82s    2.251us   5.406m    29.42us <- sock_conn_iocb@src/sock.c:869 tasklet_wakeup
  quic_conn_app_io_cb          312861   23.86s    76.27us   2.373s    7.584us <- qc_lstnr_pkt_rcv@src/xprt_quic.c:6184 tasklet_wakeup_after
  qc_io_cb                      37063   12.65s    341.4us   6.409s    172.9us <- qc_treat_acked_tx_frm@src/xprt_quic.c:1695 tasklet_wakeup
  h1_io_cb                    4783520   11.79s    2.463us   1.419h    1.068ms <- conn_subscribe@src/connection.c:732 tasklet_wakeup
  sc_conn_io_cb              12269693   11.51s    938.0ns   2.117h    621.2us <- sc_app_chk_rcv_conn@src/stconn.c:762 tasklet_wakeup
  sc_conn_io_cb               6479006   10.94s    1.689us   7.984m    73.93us <- h1_wake_stream_for_recv@src/mux_h1.c:2600 tasklet_wakeup
  qc_io_cb                      12011   10.72s    892.5us   2.120s    176.5us <- qcc_release_remote_stream@src/mux_quic.c:1200 tasklet_wakeup
  h2_io_cb                     246423   6.225s    25.26us   56.52s    229.4us <- h2_snd_buf@src/mux_h2.c:6712 tasklet_wakeup
  h2_io_cb                     137744   6.076s    44.11us   16.59s    120.4us <- sock_conn_iocb@src/sock.c:869 tasklet_wakeup
  quic_lstnr_dghdlr            323575   3.062s    9.462us   3.424m    634.9us <- quic_lstnr_dgram_dispatch@src/quic_sock.c:255 tasklet_wakeup
  sc_conn_io_cb               1206939   1.616s    1.338us   27.62m    1.373ms <- qcs_notify_send@src/mux_quic.c:529 tasklet_wakeup
  h2_io_cb                     212370   251.2ms   1.182us   6.476s    30.49us <- h2c_restart_reading@src/mux_h2.c:856 tasklet_wakeup
  h1_io_cb                      44109   197.0ms   4.466us   31.89s    723.0us <- h1_takeover@src/mux_h1.c:4085 tasklet_wakeup
  quic_conn_app_io_cb            3029   87.59ms   28.92us   999.0ms   329.8us <- qc_process_timer@src/xprt_quic.c:4635 tasklet_wakeup
  task_run_applet                  40   35.77ms   894.3us   4.407ms   110.2us <- sc_applet_create@src/stconn.c:489 appctx_wakeup
  task_run_applet                  18   27.36ms   1.520ms   19.56us   1.086us <- sc_app_chk_snd_applet@src/stconn.c:996 appctx_wakeup
  sc_conn_io_cb                  2186   11.76ms   5.377us   963.0ms   440.5us <- h1_wake_stream_for_send@src/mux_h1.c:2610 tasklet_wakeup
  qc_io_cb                          8   9.880ms   1.235ms   5.871ms   733.9us <- qcs_consume@src/mux_quic.c:800 tasklet_wakeup
  quic_conn_io_cb                   4   5.951ms   1.488ms   38.85us   9.713us <- qc_lstnr_pkt_rcv@src/xprt_quic.c:6184 tasklet_wakeup_after
  qc_io_cb                        101   4.975ms   49.26us   13.91ms   137.8us <- qc_process_timer@src/xprt_quic.c:4602 tasklet_wakeup
  h1_io_cb                       2186   1.809ms   827.0ns   720.2ms   329.5us <- sock_conn_iocb@src/sock.c:849 tasklet_wakeup
  qc_process_timer               3031   1.735ms   572.0ns   1.153s    380.3us <- wake_expired_tasks@src/task.c:344 task_wakeup
  accept_queue_process            359   1.362ms   3.793us   80.32ms   223.7us <- listener_accept@src/listener.c:1099 tasklet_wakeup
  quic_conn_app_io_cb               2   921.1us   460.6us   203.1us   101.5us <- qc_xprt_start@src/xprt_quic.c:7122 tasklet_wakeup
  h1_timeout_task                2618   526.8us   201.0ns   1.121s    428.4us <- h1_release@src/mux_h1.c:1087 task_wakeup
  process_resolvers               316   283.3us   896.0ns   14.96ms   47.33us <- wake_expired_tasks@src/task.c:429 task_drop_running
  sc_conn_io_cb                   420   235.6us   560.0ns   116.7ms   277.8us <- h2s_notify_recv@src/mux_h2.c:1298 tasklet_wakeup
  qc_idle_timer_task                1   225.5us   225.5us   506.0ns   506.0ns <- wake_expired_tasks@src/task.c:344 task_wakeup
  accept_queue_process             36   153.0us   4.250us   5.834ms   162.1us <- accept_queue_process@src/listener.c:165 tasklet_wakeup
  sc_conn_io_cb                    18   54.05us   3.003us   11.50us   638.0ns <- sock_conn_iocb@src/sock.c:869 tasklet_wakeup
  h2_io_cb                          6   38.88us   6.480us   2.089ms   348.2us <- h2_do_shutw@src/mux_h2.c:4656 tasklet_wakeup
  srv_cleanup_idle_conns           54   37.72us   698.0ns   14.21ms   263.1us <- wake_expired_tasks@src/task.c:429 task_drop_running
  sc_conn_io_cb                    50   32.86us   657.0ns   28.83ms   576.5us <- qcs_notify_recv@src/mux_quic.c:519 tasklet_wakeup
  qc_io_cb                          2   30.25us   15.12us   6.093us   3.046us <- qc_init@src/mux_quic.c:2057 tasklet_wakeup
  srv_cleanup_toremove_conns        1   27.16us   27.16us   905.6us   905.6us <- srv_cleanup_idle_conns@src/server.c:5948 task_wakeup
  task_run_applet                  39   19.61us   502.0ns   818.7us   20.99us <- run_tasks_from_lists@src/task.c:652 task_drop_running
  quic_accept_run                   2   15.46us   7.727us   305.5us   152.8us <- quic_accept_push_qc@src/quic_sock.c:458 tasklet_wakeup
  h2_timeout_task                  32   12.91us   403.0ns   4.207ms   131.5us <- h2_release@src/mux_h2.c:1191 task_wakeup
  quic_conn_app_io_cb               1   9.645us   9.645us   1.445us   1.445us <- qc_process_timer@src/xprt_quic.c:4589 tasklet_wakeup

> show profiling tasks bytime aggr
Tasks activity:
  function                      calls   cpu_tot   cpu_avg   lat_tot   lat_avg
  qc_io_cb                     212301   3.147m    889.5us   1.009m    285.2us
  process_stream             15503573   2.519m    9.747us   2.148h    498.7us
  h1_io_cb                   15916733   36.95s    2.321us   1.535h    347.1us
  quic_conn_app_io_cb          318845   24.21s    75.92us   3.410s    10.70us
  sc_conn_io_cb              20037058   24.19s    1.207us   2.737h    491.8us
  h2_io_cb                     596543   12.55s    21.04us   1.326m    133.4us
  quic_lstnr_dghdlr            326624   3.094s    9.473us   3.462m    635.9us
  task_run_applet                 100   64.43ms   644.3us   5.285ms   52.85us
  quic_conn_io_cb                   4   5.951ms   1.488ms   38.85us   9.713us
  qc_process_timer               3061   1.750ms   571.0ns   1.162s    379.5us
  accept_queue_process            396   1.521ms   3.840us   86.16ms   217.6us
  h1_timeout_task                2618   526.8us   201.0ns   1.121s    428.4us
  process_resolvers               319   286.0us   896.0ns   16.82ms   52.73us
  qc_idle_timer_task                1   225.5us   225.5us   506.0ns   506.0ns
  srv_cleanup_idle_conns           54   37.72us   698.0ns   14.21ms   263.1us
  srv_cleanup_toremove_conns        1   27.16us   27.16us   905.6us   905.6us
  quic_accept_run                   2   15.46us   7.727us   305.5us   152.8us
  h2_timeout_task                  32   12.91us   403.0ns   4.207ms   131.5us
This commit is contained in:
Willy Tarreau 2022-09-08 16:38:10 +02:00
parent dc89b1806c
commit e86bc35672
2 changed files with 34 additions and 10 deletions

View File

@ -2866,7 +2866,7 @@ show pools
as the SIGQUIT when running in foreground except that it does not flush
the pools.
show profiling [{all | status | tasks | memory}] [byaddr] [aggr] [<max_lines>]
show profiling [{all | status | tasks | memory}] [byaddr|bytime|aggr|<max_lines>]*
Dumps the current profiling settings, one per line, as well as the command
needed to change them. When tasks profiling is enabled, some per-function
statistics collected by the scheduler will also be emitted, with a summary
@ -2877,9 +2877,10 @@ show profiling [{all | status | tasks | memory}] [byaddr] [aggr] [<max_lines>]
profiling by specifying the respective keywords; by default all profiling
information are dumped. It is also possible to limit the number of lines
of output of each category by specifying a numeric limit. If is possible to
request that the output is sorted by address instead of usage, e.g. to ease
comparisons between subsequent calls, and to aggregate task activity by
called function instead of seeing the details. Please note that profiling is
request that the output is sorted by address or by total execution time
instead of usage, e.g. to ease comparisons between subsequent calls or to
check what needs to be optimized, and to aggregate task activity by called
function instead of seeing the details. Please note that profiling is
essentially aimed at developers since it gives hints about where CPU cycles
or memory are wasted in the code. There is nothing useful to monitor there.

View File

@ -27,7 +27,7 @@ struct show_prof_ctx {
int dump_step; /* 0,1,2,4,5,6; see cli_iohandler_show_profiling() */
int linenum; /* next line to be dumped (starts at 0) */
int maxcnt; /* max line count per step (0=not set) */
int by_addr; /* 0=sort by usage, 1=sort by address */
int by_what; /* 0=sort by usage, 1=sort by address, 2=sort by time */
int aggr; /* 0=dump raw, 1=aggregate on callee */
};
@ -502,6 +502,24 @@ static int cmp_sched_activity_addr(const void *a, const void *b)
return 0;
}
/* sort by cpu time first, then by inverse call count (to spot highest offenders) */
static int cmp_sched_activity_cpu(const void *a, const void *b)
{
const struct sched_activity *l = (const struct sched_activity *)a;
const struct sched_activity *r = (const struct sched_activity *)b;
if (l->cpu_time > r->cpu_time)
return -1;
else if (l->cpu_time < r->cpu_time)
return 1;
else if (l->calls < r->calls)
return -1;
else if (l->calls > r->calls)
return 1;
else
return 0;
}
#ifdef USE_MEMORY_PROFILING
/* used by qsort below */
static int cmp_memprof_stats(const void *a, const void *b)
@ -635,7 +653,7 @@ static int cli_io_handler_show_profiling(struct appctx *appctx)
memcpy(tmp_activity, sched_activity, sizeof(tmp_activity));
/* for addr sort and for callee aggregation we have to first sort by address */
if (ctx->aggr || ctx->by_addr)
if (ctx->aggr || ctx->by_what == 1) // sort by addr
qsort(tmp_activity, SCHED_ACT_HASH_BUCKETS, sizeof(tmp_activity[0]), cmp_sched_activity_addr);
if (ctx->aggr) {
@ -650,8 +668,10 @@ static int cli_io_handler_show_profiling(struct appctx *appctx)
}
}
if (!ctx->by_addr)
if (!ctx->by_what) // sort by usage
qsort(tmp_activity, SCHED_ACT_HASH_BUCKETS, sizeof(tmp_activity[0]), cmp_sched_activity_calls);
else if (ctx->by_what == 2) // by cpu_tot
qsort(tmp_activity, SCHED_ACT_HASH_BUCKETS, sizeof(tmp_activity[0]), cmp_sched_activity_cpu);
if (!ctx->linenum)
chunk_appendf(&trash, "Tasks activity:\n"
@ -716,7 +736,7 @@ static int cli_io_handler_show_profiling(struct appctx *appctx)
goto skip_mem;
memcpy(tmp_memstats, memprof_stats, sizeof(tmp_memstats));
if (ctx->by_addr)
if (ctx->by_what)
qsort(tmp_memstats, MEMPROF_HASH_BUCKETS+1, sizeof(tmp_memstats[0]), cmp_memprof_addr);
else
qsort(tmp_memstats, MEMPROF_HASH_BUCKETS+1, sizeof(tmp_memstats[0]), cmp_memprof_stats);
@ -826,7 +846,10 @@ static int cli_parse_show_profiling(char **args, char *payload, struct appctx *a
ctx->dump_step = 6; // will visit memory only
}
else if (strcmp(args[arg], "byaddr") == 0) {
ctx->by_addr = 1; // sort output by address instead of usage
ctx->by_what = 1; // sort output by address instead of usage
}
else if (strcmp(args[arg], "bytime") == 0) {
ctx->by_what = 2; // sort output by total time instead of usage
}
else if (strcmp(args[arg], "aggr") == 0) {
ctx->aggr = 1; // aggregate output by callee
@ -835,7 +858,7 @@ static int cli_parse_show_profiling(char **args, char *payload, struct appctx *a
ctx->maxcnt = atoi(args[arg]); // number of entries to dump
}
else
return cli_err(appctx, "Expects either 'all', 'status', 'tasks', 'memory', 'byaddr', 'aggr' or a max number of output lines.\n");
return cli_err(appctx, "Expects either 'all', 'status', 'tasks', 'memory', 'byaddr', 'bytime', 'aggr' or a max number of output lines.\n");
}
return 0;
}