Add multi-cluster support for Nodes dashboard (#2945)

Signed-off-by: Adrian Berger <adria.berger94@gmail.com>
This commit is contained in:
Adrian Berger 2024-03-08 14:41:36 +01:00 committed by GitHub
parent 8dbeac961e
commit cc49133321
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 56 additions and 39 deletions

View File

@ -27,6 +27,23 @@ local table = grafana70.panel.table;
type: 'datasource', type: 'datasource',
}, },
local clusterTemplatePrototype =
template.new(
'cluster',
'$datasource',
'',
hide=if config.showMultiCluster then '' else '2',
refresh='time',
label='Cluster',
),
local clusterTemplate =
if platform == 'Darwin' then
clusterTemplatePrototype
{ query: 'label_values(node_uname_info{%(nodeExporterSelector)s, sysname="Darwin"}, %(clusterLabel)s)' % config }
else
clusterTemplatePrototype
{ query: 'label_values(node_uname_info{%(nodeExporterSelector)s, sysname!="Darwin"}, %(clusterLabel)s)' % config },
local instanceTemplatePrototype = local instanceTemplatePrototype =
template.new( template.new(
'instance', 'instance',
@ -38,11 +55,10 @@ local table = grafana70.panel.table;
local instanceTemplate = local instanceTemplate =
if platform == 'Darwin' then if platform == 'Darwin' then
instanceTemplatePrototype instanceTemplatePrototype
{ query: 'label_values(node_uname_info{%(nodeExporterSelector)s, sysname="Darwin"}, instance)' % config } { query: 'label_values(node_uname_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster", sysname="Darwin"}, instance)' % config }
else else
instanceTemplatePrototype instanceTemplatePrototype
{ query: 'label_values(node_uname_info{%(nodeExporterSelector)s, sysname!="Darwin"}, instance)' % config }, { query: 'label_values(node_uname_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster", sysname!="Darwin"}, instance)' % config },
local idleCPU = local idleCPU =
graphPanel.new( graphPanel.new(
@ -57,9 +73,9 @@ local table = grafana70.panel.table;
.addTarget(prometheus.target( .addTarget(prometheus.target(
||| |||
( (
(1 - sum without (mode) (rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode=~"idle|iowait|steal", instance="$instance"}[$__rate_interval]))) (1 - sum without (mode) (rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode=~"idle|iowait|steal", instance="$instance", %(clusterLabel)s="$cluster"}[$__rate_interval])))
/ ignoring(cpu) group_left / ignoring(cpu) group_left
count without (cpu, mode) (node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance"}) count without (cpu, mode) (node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance", %(clusterLabel)s="$cluster"})
) )
||| % config, ||| % config,
legendFormat='{{cpu}}', legendFormat='{{cpu}}',
@ -75,10 +91,10 @@ local table = grafana70.panel.table;
min=0, min=0,
fill=0, fill=0,
) )
.addTarget(prometheus.target('node_load1{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='1m load average')) .addTarget(prometheus.target('node_load1{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='1m load average'))
.addTarget(prometheus.target('node_load5{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='5m load average')) .addTarget(prometheus.target('node_load5{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='5m load average'))
.addTarget(prometheus.target('node_load15{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='15m load average')) .addTarget(prometheus.target('node_load15{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='15m load average'))
.addTarget(prometheus.target('count(node_cpu_seconds_total{%(nodeExporterSelector)s, instance="$instance", mode="idle"})' % config, legendFormat='logical cores')), .addTarget(prometheus.target('count(node_cpu_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", mode="idle"})' % config, legendFormat='logical cores')),
local memoryGraphPanelPrototype = local memoryGraphPanelPrototype =
graphPanel.new( graphPanel.new(
@ -94,44 +110,44 @@ local table = grafana70.panel.table;
.addTarget(prometheus.target( .addTarget(prometheus.target(
||| |||
( (
node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance"} node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}
- -
node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance"} node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}
- -
node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance"} node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}
- -
node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance"} node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}
) )
||| % config, ||| % config,
legendFormat='memory used' legendFormat='memory used'
)) ))
.addTarget(prometheus.target('node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='memory buffers')) .addTarget(prometheus.target('node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='memory buffers'))
.addTarget(prometheus.target('node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='memory cached')) .addTarget(prometheus.target('node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='memory cached'))
.addTarget(prometheus.target('node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='memory free')) .addTarget(prometheus.target('node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='memory free'))
else if platform == 'Darwin' then else if platform == 'Darwin' then
// not useful to stack // not useful to stack
memoryGraphPanelPrototype { stack: false } memoryGraphPanelPrototype { stack: false }
.addTarget(prometheus.target('node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='Physical Memory')) .addTarget(prometheus.target('node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='Physical Memory'))
.addTarget(prometheus.target( .addTarget(prometheus.target(
||| |||
( (
node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance"} - node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} -
node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance"} + node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} +
node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance"} + node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} +
node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance"} node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}
) )
||| % config, legendFormat='Memory Used' ||| % config, legendFormat='Memory Used'
)) ))
.addTarget(prometheus.target( .addTarget(prometheus.target(
||| |||
( (
node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance"} - node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} -
node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance"} node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}
) )
||| % config, legendFormat='App Memory' ||| % config, legendFormat='App Memory'
)) ))
.addTarget(prometheus.target('node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='Wired Memory')) .addTarget(prometheus.target('node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='Wired Memory'))
.addTarget(prometheus.target('node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='Compressed')), .addTarget(prometheus.target('node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='Compressed')),
// NOTE: avg() is used to circumvent a label change caused by a node_exporter rollout. // NOTE: avg() is used to circumvent a label change caused by a node_exporter rollout.
local memoryGaugePanelPrototype = local memoryGaugePanelPrototype =
@ -155,8 +171,8 @@ local table = grafana70.panel.table;
||| |||
100 - 100 -
( (
avg(node_memory_MemAvailable_bytes{%(nodeExporterSelector)s, instance="$instance"}) / avg(node_memory_MemAvailable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) /
avg(node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance"}) avg(node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"})
* 100 * 100
) )
||| % config, ||| % config,
@ -168,12 +184,12 @@ local table = grafana70.panel.table;
||| |||
( (
( (
avg(node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance"}) - avg(node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) -
avg(node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance"}) + avg(node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) +
avg(node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance"}) + avg(node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) +
avg(node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance"}) avg(node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"})
) / ) /
avg(node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance"}) avg(node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"})
) )
* *
100 100
@ -190,17 +206,17 @@ local table = grafana70.panel.table;
) )
// TODO: Does it make sense to have those three in the same panel? // TODO: Does it make sense to have those three in the same panel?
.addTarget(prometheus.target( .addTarget(prometheus.target(
'rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__rate_interval])' % config, 'rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config,
legendFormat='{{device}} read', legendFormat='{{device}} read',
intervalFactor=1, intervalFactor=1,
)) ))
.addTarget(prometheus.target( .addTarget(prometheus.target(
'rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__rate_interval])' % config, 'rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config,
legendFormat='{{device}} written', legendFormat='{{device}} written',
intervalFactor=1, intervalFactor=1,
)) ))
.addTarget(prometheus.target( .addTarget(prometheus.target(
'rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__rate_interval])' % config, 'rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config,
legendFormat='{{device}} io time', legendFormat='{{device}} io time',
intervalFactor=1, intervalFactor=1,
)) + )) +
@ -232,7 +248,7 @@ local table = grafana70.panel.table;
.addThresholdStep(color='red', value=0.9) .addThresholdStep(color='red', value=0.9)
.addTarget(prometheus.target( .addTarget(prometheus.target(
||| |||
max by (mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, instance="$instance", %(fsSelector)s, %(fsMountpointSelector)s}) max by (mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(fsSelector)s, %(fsMountpointSelector)s})
||| % config, ||| % config,
legendFormat='', legendFormat='',
instant=true, instant=true,
@ -240,7 +256,7 @@ local table = grafana70.panel.table;
)) ))
.addTarget(prometheus.target( .addTarget(prometheus.target(
||| |||
max by (mountpoint) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(fsSelector)s, %(fsMountpointSelector)s}) max by (mountpoint) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(fsSelector)s, %(fsMountpointSelector)s})
||| % config, ||| % config,
legendFormat='', legendFormat='',
instant=true, instant=true,
@ -421,7 +437,7 @@ local table = grafana70.panel.table;
fill=0, fill=0,
) )
.addTarget(prometheus.target( .addTarget(prometheus.target(
'rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[$__rate_interval]) * 8' % config, 'rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", device!="lo"}[$__rate_interval]) * 8' % config,
legendFormat='{{device}}', legendFormat='{{device}}',
intervalFactor=1, intervalFactor=1,
)), )),
@ -437,7 +453,7 @@ local table = grafana70.panel.table;
fill=0, fill=0,
) )
.addTarget(prometheus.target( .addTarget(prometheus.target(
'rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[$__rate_interval]) * 8' % config, 'rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", device!="lo"}[$__rate_interval]) * 8' % config,
legendFormat='{{device}}', legendFormat='{{device}}',
intervalFactor=1, intervalFactor=1,
)), )),
@ -473,6 +489,7 @@ local table = grafana70.panel.table;
local templates = local templates =
[ [
prometheusDatasourceTemplate, prometheusDatasourceTemplate,
clusterTemplate,
instanceTemplate, instanceTemplate,
], ],