From e86bc356725903d9b29c03d2b9751203afad2048 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Thu, 8 Sep 2022 16:38:10 +0200 Subject: [PATCH] MINOR: activity/cli: support sorting task profiling by total CPU time The new "bytime" sorting criterion uses the reported CPU time instead of the usage. This is convenient to spot tasks that are mostly reponsible for the CPU usage in a running process. It supports both the detailed and the aggregated format. The output looks like this: > show profiling tasks bytime Tasks activity: function calls cpu_tot cpu_avg lat_tot lat_avg qc_io_cb 117739 1.961m 999.1us 37.45s 318.1us <- h3_snd_buf@src/h3.c:1084 tasklet_wakeup process_stream 7376273 1.384m 11.26us 1.013h 494.2us <- stream_new@src/stream.c:563 task_wakeup process_stream 8104400 1.133m 8.389us 1.130h 502.0us <- sc_notify@src/stconn.c:1209 task_wakeup qc_io_cb 43280 45.76s 1.057ms 13.95s 322.3us <- qc_stream_desc_ack@src/quic_stream.c:128 tasklet_wakeup h1_io_cb 11025715 24.82s 2.251us 5.406m 29.42us <- sock_conn_iocb@src/sock.c:869 tasklet_wakeup quic_conn_app_io_cb 312861 23.86s 76.27us 2.373s 7.584us <- qc_lstnr_pkt_rcv@src/xprt_quic.c:6184 tasklet_wakeup_after qc_io_cb 37063 12.65s 341.4us 6.409s 172.9us <- qc_treat_acked_tx_frm@src/xprt_quic.c:1695 tasklet_wakeup h1_io_cb 4783520 11.79s 2.463us 1.419h 1.068ms <- conn_subscribe@src/connection.c:732 tasklet_wakeup sc_conn_io_cb 12269693 11.51s 938.0ns 2.117h 621.2us <- sc_app_chk_rcv_conn@src/stconn.c:762 tasklet_wakeup sc_conn_io_cb 6479006 10.94s 1.689us 7.984m 73.93us <- h1_wake_stream_for_recv@src/mux_h1.c:2600 tasklet_wakeup qc_io_cb 12011 10.72s 892.5us 2.120s 176.5us <- qcc_release_remote_stream@src/mux_quic.c:1200 tasklet_wakeup h2_io_cb 246423 6.225s 25.26us 56.52s 229.4us <- h2_snd_buf@src/mux_h2.c:6712 tasklet_wakeup h2_io_cb 137744 6.076s 44.11us 16.59s 120.4us <- sock_conn_iocb@src/sock.c:869 tasklet_wakeup quic_lstnr_dghdlr 323575 3.062s 9.462us 3.424m 634.9us <- quic_lstnr_dgram_dispatch@src/quic_sock.c:255 tasklet_wakeup sc_conn_io_cb 1206939 1.616s 1.338us 27.62m 1.373ms <- qcs_notify_send@src/mux_quic.c:529 tasklet_wakeup h2_io_cb 212370 251.2ms 1.182us 6.476s 30.49us <- h2c_restart_reading@src/mux_h2.c:856 tasklet_wakeup h1_io_cb 44109 197.0ms 4.466us 31.89s 723.0us <- h1_takeover@src/mux_h1.c:4085 tasklet_wakeup quic_conn_app_io_cb 3029 87.59ms 28.92us 999.0ms 329.8us <- qc_process_timer@src/xprt_quic.c:4635 tasklet_wakeup task_run_applet 40 35.77ms 894.3us 4.407ms 110.2us <- sc_applet_create@src/stconn.c:489 appctx_wakeup task_run_applet 18 27.36ms 1.520ms 19.56us 1.086us <- sc_app_chk_snd_applet@src/stconn.c:996 appctx_wakeup sc_conn_io_cb 2186 11.76ms 5.377us 963.0ms 440.5us <- h1_wake_stream_for_send@src/mux_h1.c:2610 tasklet_wakeup qc_io_cb 8 9.880ms 1.235ms 5.871ms 733.9us <- qcs_consume@src/mux_quic.c:800 tasklet_wakeup quic_conn_io_cb 4 5.951ms 1.488ms 38.85us 9.713us <- qc_lstnr_pkt_rcv@src/xprt_quic.c:6184 tasklet_wakeup_after qc_io_cb 101 4.975ms 49.26us 13.91ms 137.8us <- qc_process_timer@src/xprt_quic.c:4602 tasklet_wakeup h1_io_cb 2186 1.809ms 827.0ns 720.2ms 329.5us <- sock_conn_iocb@src/sock.c:849 tasklet_wakeup qc_process_timer 3031 1.735ms 572.0ns 1.153s 380.3us <- wake_expired_tasks@src/task.c:344 task_wakeup accept_queue_process 359 1.362ms 3.793us 80.32ms 223.7us <- listener_accept@src/listener.c:1099 tasklet_wakeup quic_conn_app_io_cb 2 921.1us 460.6us 203.1us 101.5us <- qc_xprt_start@src/xprt_quic.c:7122 tasklet_wakeup h1_timeout_task 2618 526.8us 201.0ns 1.121s 428.4us <- h1_release@src/mux_h1.c:1087 task_wakeup process_resolvers 316 283.3us 896.0ns 14.96ms 47.33us <- wake_expired_tasks@src/task.c:429 task_drop_running sc_conn_io_cb 420 235.6us 560.0ns 116.7ms 277.8us <- h2s_notify_recv@src/mux_h2.c:1298 tasklet_wakeup qc_idle_timer_task 1 225.5us 225.5us 506.0ns 506.0ns <- wake_expired_tasks@src/task.c:344 task_wakeup accept_queue_process 36 153.0us 4.250us 5.834ms 162.1us <- accept_queue_process@src/listener.c:165 tasklet_wakeup sc_conn_io_cb 18 54.05us 3.003us 11.50us 638.0ns <- sock_conn_iocb@src/sock.c:869 tasklet_wakeup h2_io_cb 6 38.88us 6.480us 2.089ms 348.2us <- h2_do_shutw@src/mux_h2.c:4656 tasklet_wakeup srv_cleanup_idle_conns 54 37.72us 698.0ns 14.21ms 263.1us <- wake_expired_tasks@src/task.c:429 task_drop_running sc_conn_io_cb 50 32.86us 657.0ns 28.83ms 576.5us <- qcs_notify_recv@src/mux_quic.c:519 tasklet_wakeup qc_io_cb 2 30.25us 15.12us 6.093us 3.046us <- qc_init@src/mux_quic.c:2057 tasklet_wakeup srv_cleanup_toremove_conns 1 27.16us 27.16us 905.6us 905.6us <- srv_cleanup_idle_conns@src/server.c:5948 task_wakeup task_run_applet 39 19.61us 502.0ns 818.7us 20.99us <- run_tasks_from_lists@src/task.c:652 task_drop_running quic_accept_run 2 15.46us 7.727us 305.5us 152.8us <- quic_accept_push_qc@src/quic_sock.c:458 tasklet_wakeup h2_timeout_task 32 12.91us 403.0ns 4.207ms 131.5us <- h2_release@src/mux_h2.c:1191 task_wakeup quic_conn_app_io_cb 1 9.645us 9.645us 1.445us 1.445us <- qc_process_timer@src/xprt_quic.c:4589 tasklet_wakeup > show profiling tasks bytime aggr Tasks activity: function calls cpu_tot cpu_avg lat_tot lat_avg qc_io_cb 212301 3.147m 889.5us 1.009m 285.2us process_stream 15503573 2.519m 9.747us 2.148h 498.7us h1_io_cb 15916733 36.95s 2.321us 1.535h 347.1us quic_conn_app_io_cb 318845 24.21s 75.92us 3.410s 10.70us sc_conn_io_cb 20037058 24.19s 1.207us 2.737h 491.8us h2_io_cb 596543 12.55s 21.04us 1.326m 133.4us quic_lstnr_dghdlr 326624 3.094s 9.473us 3.462m 635.9us task_run_applet 100 64.43ms 644.3us 5.285ms 52.85us quic_conn_io_cb 4 5.951ms 1.488ms 38.85us 9.713us qc_process_timer 3061 1.750ms 571.0ns 1.162s 379.5us accept_queue_process 396 1.521ms 3.840us 86.16ms 217.6us h1_timeout_task 2618 526.8us 201.0ns 1.121s 428.4us process_resolvers 319 286.0us 896.0ns 16.82ms 52.73us qc_idle_timer_task 1 225.5us 225.5us 506.0ns 506.0ns srv_cleanup_idle_conns 54 37.72us 698.0ns 14.21ms 263.1us srv_cleanup_toremove_conns 1 27.16us 27.16us 905.6us 905.6us quic_accept_run 2 15.46us 7.727us 305.5us 152.8us h2_timeout_task 32 12.91us 403.0ns 4.207ms 131.5us --- doc/management.txt | 9 +++++---- src/activity.c | 35 +++++++++++++++++++++++++++++------ 2 files changed, 34 insertions(+), 10 deletions(-) diff --git a/doc/management.txt b/doc/management.txt index e6bc9ca044..d335d53652 100644 --- a/doc/management.txt +++ b/doc/management.txt @@ -2866,7 +2866,7 @@ show pools as the SIGQUIT when running in foreground except that it does not flush the pools. -show profiling [{all | status | tasks | memory}] [byaddr] [aggr] [] +show profiling [{all | status | tasks | memory}] [byaddr|bytime|aggr|]* Dumps the current profiling settings, one per line, as well as the command needed to change them. When tasks profiling is enabled, some per-function statistics collected by the scheduler will also be emitted, with a summary @@ -2877,9 +2877,10 @@ show profiling [{all | status | tasks | memory}] [byaddr] [aggr] [] profiling by specifying the respective keywords; by default all profiling information are dumped. It is also possible to limit the number of lines of output of each category by specifying a numeric limit. If is possible to - request that the output is sorted by address instead of usage, e.g. to ease - comparisons between subsequent calls, and to aggregate task activity by - called function instead of seeing the details. Please note that profiling is + request that the output is sorted by address or by total execution time + instead of usage, e.g. to ease comparisons between subsequent calls or to + check what needs to be optimized, and to aggregate task activity by called + function instead of seeing the details. Please note that profiling is essentially aimed at developers since it gives hints about where CPU cycles or memory are wasted in the code. There is nothing useful to monitor there. diff --git a/src/activity.c b/src/activity.c index 44c2e287b7..27914e001f 100644 --- a/src/activity.c +++ b/src/activity.c @@ -27,7 +27,7 @@ struct show_prof_ctx { int dump_step; /* 0,1,2,4,5,6; see cli_iohandler_show_profiling() */ int linenum; /* next line to be dumped (starts at 0) */ int maxcnt; /* max line count per step (0=not set) */ - int by_addr; /* 0=sort by usage, 1=sort by address */ + int by_what; /* 0=sort by usage, 1=sort by address, 2=sort by time */ int aggr; /* 0=dump raw, 1=aggregate on callee */ }; @@ -502,6 +502,24 @@ static int cmp_sched_activity_addr(const void *a, const void *b) return 0; } +/* sort by cpu time first, then by inverse call count (to spot highest offenders) */ +static int cmp_sched_activity_cpu(const void *a, const void *b) +{ + const struct sched_activity *l = (const struct sched_activity *)a; + const struct sched_activity *r = (const struct sched_activity *)b; + + if (l->cpu_time > r->cpu_time) + return -1; + else if (l->cpu_time < r->cpu_time) + return 1; + else if (l->calls < r->calls) + return -1; + else if (l->calls > r->calls) + return 1; + else + return 0; +} + #ifdef USE_MEMORY_PROFILING /* used by qsort below */ static int cmp_memprof_stats(const void *a, const void *b) @@ -635,7 +653,7 @@ static int cli_io_handler_show_profiling(struct appctx *appctx) memcpy(tmp_activity, sched_activity, sizeof(tmp_activity)); /* for addr sort and for callee aggregation we have to first sort by address */ - if (ctx->aggr || ctx->by_addr) + if (ctx->aggr || ctx->by_what == 1) // sort by addr qsort(tmp_activity, SCHED_ACT_HASH_BUCKETS, sizeof(tmp_activity[0]), cmp_sched_activity_addr); if (ctx->aggr) { @@ -650,8 +668,10 @@ static int cli_io_handler_show_profiling(struct appctx *appctx) } } - if (!ctx->by_addr) + if (!ctx->by_what) // sort by usage qsort(tmp_activity, SCHED_ACT_HASH_BUCKETS, sizeof(tmp_activity[0]), cmp_sched_activity_calls); + else if (ctx->by_what == 2) // by cpu_tot + qsort(tmp_activity, SCHED_ACT_HASH_BUCKETS, sizeof(tmp_activity[0]), cmp_sched_activity_cpu); if (!ctx->linenum) chunk_appendf(&trash, "Tasks activity:\n" @@ -716,7 +736,7 @@ static int cli_io_handler_show_profiling(struct appctx *appctx) goto skip_mem; memcpy(tmp_memstats, memprof_stats, sizeof(tmp_memstats)); - if (ctx->by_addr) + if (ctx->by_what) qsort(tmp_memstats, MEMPROF_HASH_BUCKETS+1, sizeof(tmp_memstats[0]), cmp_memprof_addr); else qsort(tmp_memstats, MEMPROF_HASH_BUCKETS+1, sizeof(tmp_memstats[0]), cmp_memprof_stats); @@ -826,7 +846,10 @@ static int cli_parse_show_profiling(char **args, char *payload, struct appctx *a ctx->dump_step = 6; // will visit memory only } else if (strcmp(args[arg], "byaddr") == 0) { - ctx->by_addr = 1; // sort output by address instead of usage + ctx->by_what = 1; // sort output by address instead of usage + } + else if (strcmp(args[arg], "bytime") == 0) { + ctx->by_what = 2; // sort output by total time instead of usage } else if (strcmp(args[arg], "aggr") == 0) { ctx->aggr = 1; // aggregate output by callee @@ -835,7 +858,7 @@ static int cli_parse_show_profiling(char **args, char *payload, struct appctx *a ctx->maxcnt = atoi(args[arg]); // number of entries to dump } else - return cli_err(appctx, "Expects either 'all', 'status', 'tasks', 'memory', 'byaddr', 'aggr' or a max number of output lines.\n"); + return cli_err(appctx, "Expects either 'all', 'status', 'tasks', 'memory', 'byaddr', 'bytime', 'aggr' or a max number of output lines.\n"); } return 0; }