Fix up some of the USE metrics.
Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
This commit is contained in:
parent
c34275d6e5
commit
642f67ffa1
|
@ -45,7 +45,7 @@ local g = import 'grafana-builder/grafana.libsonnet';
|
|||
// Full utilisation would be all disks on each node spending an average of
|
||||
// 1 sec per second doing I/O, normalize by node count for stacked charts
|
||||
g.queryPanel(|||
|
||||
instance:node_disk_utilisation:avg_irate / scalar(sum(up{%(nodeExporterSelector)s}))
|
||||
instance:node_disk_utilisation:sum_irate / scalar(sum(up{%(nodeExporterSelector)s}))
|
||||
||| % $._config, '{{instance}}', legendLink) +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
|
||||
|
@ -53,7 +53,7 @@ local g = import 'grafana-builder/grafana.libsonnet';
|
|||
.addPanel(
|
||||
g.panel('Disk IO Saturation') +
|
||||
g.queryPanel(|||
|
||||
instance:node_disk_saturation:avg_irate / scalar(sum(up{%(nodeExporterSelector)s}))
|
||||
instance:node_disk_saturation:sum_irate / scalar(sum(up{%(nodeExporterSelector)s}))
|
||||
||| % $._config, '{{instance}}', legendLink) +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
|
||||
|
@ -104,7 +104,7 @@ local g = import 'grafana-builder/grafana.libsonnet';
|
|||
g.row('Memory')
|
||||
.addPanel(
|
||||
g.panel('Memory Utilisation') +
|
||||
g.queryPanel('instance:node_memory_utilisation:{instance="$instance"}', 'Memory') +
|
||||
g.queryPanel('instance:node_memory_utilisation:ratio{instance="$instance"}', 'Memory') +
|
||||
{ yaxes: g.yaxes('percentunit') },
|
||||
)
|
||||
.addPanel(
|
||||
|
@ -117,12 +117,12 @@ local g = import 'grafana-builder/grafana.libsonnet';
|
|||
g.row('Disk')
|
||||
.addPanel(
|
||||
g.panel('Disk IO Utilisation') +
|
||||
g.queryPanel('instance:node_disk_utilisation:avg_irate{instance="$instance"}', 'Utilisation') +
|
||||
g.queryPanel('instance:node_disk_utilisation:sum_irate{instance="$instance"}', 'Utilisation') +
|
||||
{ yaxes: g.yaxes('percentunit') },
|
||||
)
|
||||
.addPanel(
|
||||
g.panel('Disk IO Saturation') +
|
||||
g.queryPanel('instance:node_disk_saturation:avg_irate{instance="$instance"}', 'Saturation') +
|
||||
g.queryPanel('instance:node_disk_saturation:sum_irate{instance="$instance"}', 'Saturation') +
|
||||
{ yaxes: g.yaxes('percentunit') },
|
||||
)
|
||||
)
|
||||
|
|
|
@ -29,20 +29,9 @@
|
|||
// Can go over 100%. >100% is bad.
|
||||
record: 'instance:node_cpu_saturation_load1:',
|
||||
expr: |||
|
||||
sum by (instance) (
|
||||
node_load1{%(nodeExporterSelector)s}
|
||||
)
|
||||
sum by (instance) (node_load1{%(nodeExporterSelector)s})
|
||||
/
|
||||
instance:node_num_cpu:sum
|
||||
||| % $._config,
|
||||
},
|
||||
{
|
||||
// Available memory per node
|
||||
record: 'instance:node_memory_bytes_available:sum',
|
||||
expr: |||
|
||||
sum by (instance) (
|
||||
(node_memory_MemFree{%(nodeExporterSelector)s} + node_memory_Cached{%(nodeExporterSelector)s} + node_memory_Buffers{%(nodeExporterSelector)s})
|
||||
)
|
||||
instance:node_num_cpu:sum
|
||||
||| % $._config,
|
||||
},
|
||||
{
|
||||
|
@ -58,17 +47,13 @@
|
|||
// Memory utilisation per node, normalized by per-node memory
|
||||
record: 'instance:node_memory_utilisation:ratio',
|
||||
expr: |||
|
||||
(instance:node_memory_bytes_total:sum - instance:node_memory_bytes_available:sum)
|
||||
/
|
||||
scalar(sum(instance:node_memory_bytes_total:sum))
|
||||
1 - (
|
||||
node_memory_MemAvailable{%(nodeExporterSelector)s}
|
||||
/
|
||||
node_memory_MemTotal{%(nodeExporterSelector)s}
|
||||
)
|
||||
|||,
|
||||
},
|
||||
{
|
||||
record: 'instance:node_memory_utilisation:',
|
||||
expr: |||
|
||||
1 - (instance:node_memory_bytes_available:sum / instance:node_memory_bytes_total:sum)
|
||||
||| % $._config,
|
||||
},
|
||||
{
|
||||
record: 'instance:node_memory_swap_io_bytes:sum_rate',
|
||||
expr: |||
|
||||
|
@ -79,19 +64,19 @@
|
|||
||| % $._config,
|
||||
},
|
||||
{
|
||||
// Disk utilisation (ms spent, by rate() it's bound by 1 second)
|
||||
record: 'instance:node_disk_utilisation:avg_irate',
|
||||
// Disk utilisation (ms spent, 1 second irate())
|
||||
record: 'instance:node_disk_utilisation:sum_irate',
|
||||
expr: |||
|
||||
avg by (instance) (
|
||||
sum by (instance) (
|
||||
irate(node_disk_io_time_ms{%(nodeExporterSelector)s,device=~"(sd|xvd).+"}[1m]) / 1e3
|
||||
)
|
||||
||| % $._config,
|
||||
},
|
||||
{
|
||||
// Disk saturation (ms spent, by rate() it's bound by 1 second)
|
||||
record: 'instance:node_disk_saturation:avg_irate',
|
||||
record: 'instance:node_disk_saturation:sum_irate',
|
||||
expr: |||
|
||||
avg by (instance) (
|
||||
sum by (instance) (
|
||||
irate(node_disk_io_time_weighted{%(nodeExporterSelector)s,device=~"(sd|xvd).+"}[1m]) / 1e3
|
||||
)
|
||||
||| % $._config,
|
||||
|
@ -100,8 +85,8 @@
|
|||
record: 'instance:node_net_utilisation:sum_irate',
|
||||
expr: |||
|
||||
sum by (instance) (
|
||||
(irate(node_network_receive_bytes{%(nodeExporterSelector)s,device="eth0"}[1m]) +
|
||||
irate(node_network_transmit_bytes{%(nodeExporterSelector)s,device="eth0"}[1m]))
|
||||
(irate(node_network_receive_bytes{%(nodeExporterSelector)s,device=~"eth[0-9]+"}[1m]) +
|
||||
irate(node_network_transmit_bytes{%(nodeExporterSelector)s,device=~"eth[0-9]+"}[1m]))
|
||||
)
|
||||
||| % $._config,
|
||||
},
|
||||
|
@ -109,8 +94,8 @@
|
|||
record: 'instance:node_net_saturation:sum_irate',
|
||||
expr: |||
|
||||
sum by (instance) (
|
||||
(irate(node_network_receive_drop{%(nodeExporterSelector)s,device="eth0"}[1m]) +
|
||||
irate(node_network_transmit_drop{%(nodeExporterSelector)s,device="eth0"}[1m]))
|
||||
(irate(node_network_receive_drop{%(nodeExporterSelector)s,device=~"eth[0-9]+"}[1m]) +
|
||||
irate(node_network_transmit_drop{%(nodeExporterSelector)s,device=~"eth[0-9]+"}[1m]))
|
||||
)
|
||||
||| % $._config,
|
||||
},
|
||||
|
|
Loading…
Reference in New Issue