Improvement of comments and panel titles

Signed-off-by: beorn7 <beorn@grafana.com>
This commit is contained in:
beorn7 2019-07-22 14:06:27 +02:00
parent e01d9f9e78
commit 36dc7451c9
2 changed files with 11 additions and 6 deletions

View File

@ -23,7 +23,8 @@ local g = import 'grafana-builder/grafana.libsonnet';
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
)
.addPanel(
// TODO: Is this a useful panel?
// TODO: Is this a useful panel? At least there should be some explanation how load
// average relates to the "CPU saturation" in the title.
g.panel('CPU Saturation (load1 per CPU)') +
g.queryPanel(|||
(
@ -58,6 +59,8 @@ local g = import 'grafana-builder/grafana.libsonnet';
g.panel('Disk IO Utilisation') +
// Full utilisation would be all disks on each node spending an average of
// 1 second per second doing I/O, normalize by metric cardinality for stacked charts.
// TODO: Does the partition by device make sense? Using the most utilized device per
// instance might make more sense.
g.queryPanel(|||
(
instance_device:node_disk_io_time_seconds:rate1m{%(nodeExporterSelector)s}
@ -113,7 +116,7 @@ local g = import 'grafana-builder/grafana.libsonnet';
.addRow(
g.row('Storage')
.addPanel(
g.panel('Disk Capacity') +
g.panel('Disk Space Utilisation') +
g.queryPanel(|||
(
sum without (device) (
@ -145,6 +148,8 @@ local g = import 'grafana-builder/grafana.libsonnet';
{ yaxes: g.yaxes('percentunit') },
)
.addPanel(
// TODO: Is this a useful panel? At least there should be some explanation how load
// average relates to the "CPU saturation" in the title.
g.panel('CPU Saturation (Load1)') +
g.queryPanel('instance:node_cpu_saturation_load1:{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Saturation') +
{ yaxes: g.yaxes('percentunit') },

View File

@ -28,7 +28,7 @@
// This is CPU saturation: 1min avg run queue length / number of CPUs.
// Can go over 1.
// TODO: There are situation where a run queue >1/core is just normal and fine.
// We need to clarify how to lead this metric and if its usage is helpful at all.
// We need to clarify how to read this metric and if its usage is helpful at all.
record: 'instance:node_load1_per_cpu:ratio',
expr: |||
(
@ -39,7 +39,7 @@
||| % $._config,
},
{
// Memory utilisation per node, normalized by per-node memory
// Memory utilisation (ratio of used memory per instance).
record: 'instance:node_memory_utilisation:ratio',
expr: |||
1 - (
@ -60,14 +60,14 @@
||| % $._config,
},
{
// Disk utilisation (seconds spent, 1 second rate)
// Disk utilisation (seconds spent, 1 second rate).
record: 'instance_device:node_disk_io_time_seconds:rate1m',
expr: |||
rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s}[1m])
||| % $._config,
},
{
// Disk saturation (weighted seconds spent, 1 second rate)
// Disk saturation (weighted seconds spent, 1 second rate).
record: 'instance_device:node_disk_io_time_weighted_seconds:rate1m',
expr: |||
rate(node_disk_io_time_weighted_seconds_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s}[1m])