From 3e6f4ce627e588e9972e624f1f744c716e11b199 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 4 Nov 2021 12:03:27 +0200 Subject: [PATCH] mixin: exclude iowait and steal from CPU Utilisation (#2194) 'iowait' and 'steal' indicate specific idle/wait states, which shouldn't be counted into CPU Utilisation. Also see https://github.com/prometheus-operator/kube-prometheus/pull/796 and https://github.com/kubernetes-monitoring/kubernetes-mixin/pull/667. Per the iostat man page: %idle Show the percentage of time that the CPU or CPUs were idle and the system did not have an outstanding disk I/O request. %iowait Show the percentage of time that the CPU or CPUs were idle during which the system had an outstanding disk I/O request. %steal Show the percentage of time spent in involuntary wait by the virtual CPU or CPUs while the hypervisor was servicing another virtual processor. Signed-off-by: Julian Wiedmann --- docs/node-mixin/dashboards/node.libsonnet | 4 ++-- docs/node-mixin/rules/rules.libsonnet | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/node-mixin/dashboards/node.libsonnet b/docs/node-mixin/dashboards/node.libsonnet index fbc5b782..ef8d3ae2 100644 --- a/docs/node-mixin/dashboards/node.libsonnet +++ b/docs/node-mixin/dashboards/node.libsonnet @@ -23,9 +23,9 @@ local gauge = promgrafonnet.gauge; .addTarget(prometheus.target( ||| ( - (1 - rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance"}[$__rate_interval])) + (1 - sum without (mode) (rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode=~"idle|iowait|steal", instance="$instance"}[$__rate_interval]))) / ignoring(cpu) group_left - count without (cpu)( node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance"}) + count without (cpu, mode) (node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance"}) ) ||| % $._config, legendFormat='{{cpu}}', diff --git a/docs/node-mixin/rules/rules.libsonnet b/docs/node-mixin/rules/rules.libsonnet index df62e4d4..9c8eb90d 100644 --- a/docs/node-mixin/rules/rules.libsonnet +++ b/docs/node-mixin/rules/rules.libsonnet @@ -14,11 +14,11 @@ ||| % $._config, }, { - // CPU utilisation is % CPU is not idle. + // CPU utilisation is % CPU without {idle,iowait,steal}. record: 'instance:node_cpu_utilisation:rate%(rateInterval)s' % $._config, expr: ||| - 1 - avg without (cpu, mode) ( - rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle"}[%(rateInterval)s]) + 1 - avg without (cpu) ( + sum without (mode) (rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode=~"idle|iowait|steal"}[%(rateInterval)s])) ) ||| % $._config, },