Merge pull request #1530 from prometheus/beorn7/mixin
Fix the normalization for the cluster-wide dashboards
This commit is contained in:
commit
44774994fe
|
@ -2,7 +2,12 @@
|
|||
_config+:: {
|
||||
// Selectors are inserted between {} in Prometheus queries.
|
||||
|
||||
// Select the metrics coming from the node exporter.
|
||||
// Select the metrics coming from the node exporter. Note that all
|
||||
// the selected metrics are shown stacked on top of each other in
|
||||
// the 'USE Method / Cluster' dashboard. Consider disabling that
|
||||
// dashboard if mixing up all those metrics in the same dashboard
|
||||
// doesn't make sense (e.g. because they are coming from different
|
||||
// clusters).
|
||||
nodeExporterSelector: 'job="node"',
|
||||
|
||||
// Select the fstype for filesystem-related queries. If left
|
||||
|
|
|
@ -15,9 +15,8 @@ local g = import 'grafana-builder/grafana.libsonnet';
|
|||
instance:node_cpu_utilisation:rate1m{%(nodeExporterSelector)s}
|
||||
*
|
||||
instance:node_num_cpu:sum{%(nodeExporterSelector)s}
|
||||
/ ignoring (instance) group_left
|
||||
sum without (instance) (instance:node_num_cpu:sum{%(nodeExporterSelector)s})
|
||||
)
|
||||
/ scalar(sum(instance:node_num_cpu:sum{%(nodeExporterSelector)s}))
|
||||
||| % $._config, '{{instance}}', legendLink) +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
|
||||
|
@ -27,11 +26,8 @@ local g = import 'grafana-builder/grafana.libsonnet';
|
|||
// average relates to the "CPU saturation" in the title.
|
||||
g.panel('CPU Saturation (load1 per CPU)') +
|
||||
g.queryPanel(|||
|
||||
(
|
||||
instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s}
|
||||
/ ignoring (instance) group_left
|
||||
count without (instance) (instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s})
|
||||
)
|
||||
instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s}
|
||||
/ scalar(count(instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s}))
|
||||
||| % $._config, '{{instance}}', legendLink) +
|
||||
g.stack +
|
||||
// TODO: Does `max: 1` make sense? The stack can go over 1 in high-load scenarios.
|
||||
|
@ -43,11 +39,8 @@ local g = import 'grafana-builder/grafana.libsonnet';
|
|||
.addPanel(
|
||||
g.panel('Memory Utilisation') +
|
||||
g.queryPanel(|||
|
||||
(
|
||||
instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s}
|
||||
/ ignoring (instance) group_left
|
||||
count without (instance) (instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s})
|
||||
)
|
||||
instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s}
|
||||
/ scalar(count(instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s}))
|
||||
||| % $._config, '{{instance}}', legendLink) +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
|
||||
|
@ -123,11 +116,8 @@ local g = import 'grafana-builder/grafana.libsonnet';
|
|||
// TODO: Does the partition by device make sense? Using the most utilized device per
|
||||
// instance might make more sense.
|
||||
g.queryPanel(|||
|
||||
(
|
||||
instance_device:node_disk_io_time_seconds:rate1m{%(nodeExporterSelector)s}
|
||||
/ ignoring (instance, device) group_left
|
||||
count without (instance, device) (instance_device:node_disk_io_time_seconds:rate1m{%(nodeExporterSelector)s})
|
||||
)
|
||||
instance_device:node_disk_io_time_seconds:rate1m{%(nodeExporterSelector)s}
|
||||
/ scalar(count(instance_device:node_disk_io_time_seconds:rate1m{%(nodeExporterSelector)s}))
|
||||
||| % $._config, '{{instance}} {{device}}', legendLink) +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
|
||||
|
@ -135,11 +125,8 @@ local g = import 'grafana-builder/grafana.libsonnet';
|
|||
.addPanel(
|
||||
g.panel('Disk IO Saturation') +
|
||||
g.queryPanel(|||
|
||||
(
|
||||
instance_device:node_disk_io_time_weighted_seconds:rate1m{%(nodeExporterSelector)s}
|
||||
/ ignoring (instance, device) group_left
|
||||
count without (instance, device) (instance_device:node_disk_io_time_weighted_seconds:rate1m{%(nodeExporterSelector)s})
|
||||
)
|
||||
instance_device:node_disk_io_time_weighted_seconds:rate1m{%(nodeExporterSelector)s}
|
||||
/ scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate1m{%(nodeExporterSelector)s}))
|
||||
||| % $._config, '{{instance}} {{device}}', legendLink) +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
|
||||
|
@ -150,19 +137,12 @@ local g = import 'grafana-builder/grafana.libsonnet';
|
|||
.addPanel(
|
||||
g.panel('Disk Space Utilisation') +
|
||||
g.queryPanel(|||
|
||||
(
|
||||
sum without (device) (
|
||||
max without (fstype, mountpoint) (
|
||||
node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s} - node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s}
|
||||
)
|
||||
)
|
||||
/ ignoring (instance) group_left
|
||||
sum without (instance, device) (
|
||||
max without (fstype, mountpoint) (
|
||||
node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s}
|
||||
)
|
||||
sum without (device) (
|
||||
max without (fstype, mountpoint) (
|
||||
node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s} - node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s}
|
||||
)
|
||||
)
|
||||
/ scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s})))
|
||||
||| % $._config, '{{instance}}', legendLink) +
|
||||
g.stack +
|
||||
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
|
||||
|
|
Loading…
Reference in New Issue