Update mixin dashboard (#4078)

Update and rewrite the mixin dashboard to use the grafonnet ([1])
library.
Grafana has deprecated angular plugins ([2]) as used by grafonnet-lib
([3]) with removal pending for grafana version 12.
Additionally grafonnet-lib is deprecated/unmaintained in favor of
grafonnet.
Therefore the mixin dashboard has been updated to use grafonnet.

[1]
https://github.com/grafana/grafonnet

[2]
https://grafana.com/docs/grafana/latest/developers/angular_deprecation/

[3]
https://github.com/grafana/grafonnet-lib

Signed-off-by: Jan Horstmann <horstmann@osism.tech>
This commit is contained in:
janhorstmann 2024-10-29 10:59:51 +01:00 committed by GitHub
parent d04ef60a16
commit bd70e73fc7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 187 additions and 138 deletions

View File

@ -1,154 +1,173 @@
local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet'; local grafana = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet';
local dashboard = grafana.dashboard; local dashboard = grafana.dashboard;
local row = grafana.row; local prometheus = grafana.query.prometheus;
local prometheus = grafana.prometheus; local variable = dashboard.variable;
local template = grafana.template; local panel = grafana.panel;
local graphPanel = grafana.graphPanel; local row = panel.row;
{ {
grafanaDashboards+:: { grafanaDashboards+:: {
local amQuerySelector = std.join(',', ['%s=~"$%s"' % [label, label] for label in std.split($._config.alertmanagerClusterLabels, ',')]), local amQuerySelector = std.join(',', ['%s=~"$%s"' % [label, label] for label in std.split($._config.alertmanagerClusterLabels, ',')]),
local amNameDashboardLegend = std.join('/', ['{{%s}}' % [label] for label in std.split($._config.alertmanagerNameLabels, ',')]), local amNameDashboardLegend = std.join('/', ['{{%s}}' % [label] for label in std.split($._config.alertmanagerNameLabels, ',')]),
local alertmanagerClusterSelectorTemplates = local datasource =
variable.datasource.new('datasource', 'prometheus')
+ variable.datasource.generalOptions.withLabel('Data Source')
+ variable.datasource.generalOptions.withCurrent('Prometheus')
+ variable.datasource.generalOptions.showOnDashboard.withLabelAndValue(),
local alertmanagerClusterSelectorVariables =
[ [
template.new( variable.query.new(label)
name=label, + variable.query.generalOptions.withLabel(label)
label=label, + variable.query.withDatasourceFromVariable(datasource)
datasource='$datasource', + variable.query.queryTypes.withLabelValues(label, metric='alertmanager_alerts')
query='label_values(alertmanager_alerts, %s)' % label, + variable.query.generalOptions.withCurrent('')
current='', + variable.query.refresh.onTime()
refresh=2, + variable.query.selectionOptions.withIncludeAll(false)
includeAll=false, + variable.query.withSort(type='alphabetical')
sort=1
)
for label in std.split($._config.alertmanagerClusterLabels, ',') for label in std.split($._config.alertmanagerClusterLabels, ',')
], ],
local integrationTemplate = local integrationVariable =
template.new( variable.query.new('integration')
name='integration', + variable.query.withDatasourceFromVariable(datasource)
datasource='$datasource', + variable.query.queryTypes.withLabelValues('integration', metric='alertmanager_notifications_total{integration=~"%s"}' % $._config.alertmanagerCriticalIntegrationsRegEx)
query='label_values(alertmanager_notifications_total{integration=~"%s"}, integration)' % $._config.alertmanagerCriticalIntegrationsRegEx, + variable.query.generalOptions.withCurrent('$__all')
current='all', + variable.datasource.generalOptions.showOnDashboard.withNothing()
hide='2', // Always hide + variable.query.refresh.onTime()
refresh=2, + variable.query.selectionOptions.withIncludeAll(true)
includeAll=true, + variable.query.withSort(type='alphabetical'),
sort=1
), local panelTimeSeriesStdOptions =
{}
+ panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal')
+ panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10)
+ panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never')
+ panel.timeSeries.options.legend.withShowLegend(false)
+ panel.timeSeries.options.tooltip.withMode('multi')
+ panel.timeSeries.queryOptions.withDatasource('prometheus', '$datasource'),
'alertmanager-overview.json': 'alertmanager-overview.json':
local alerts = local alerts =
graphPanel.new( panel.timeSeries.new('Alerts')
'Alerts', + panel.timeSeries.panelOptions.withDescription('current set of alerts stored in the Alertmanager')
description='current set of alerts stored in the Alertmanager', + panel.timeSeries.standardOptions.withUnit('none')
datasource='$datasource', + panelTimeSeriesStdOptions
span=6, + panel.timeSeries.queryOptions.withTargets([
format='none', prometheus.new(
stack=true, '$datasource',
fill=1, 'sum(alertmanager_alerts{%(amQuerySelector)s}) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector },
legend_show=false, )
) + prometheus.withIntervalFactor(2)
.addTarget(prometheus.target('sum(alertmanager_alerts{%(amQuerySelector)s}) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s' % $._config { amNameDashboardLegend: amNameDashboardLegend })); + prometheus.withLegendFormat('%(amNameDashboardLegend)s' % $._config { amNameDashboardLegend: amNameDashboardLegend }),
]);
local alertsRate = local alertsRate =
graphPanel.new( panel.timeSeries.new('Alerts receive rate')
'Alerts receive rate', + panel.timeSeries.panelOptions.withDescription('rate of successful and invalid alerts received by the Alertmanager')
description='rate of successful and invalid alerts received by the Alertmanager', + panel.timeSeries.standardOptions.withUnit('ops')
datasource='$datasource', + panelTimeSeriesStdOptions
span=6, + panel.timeSeries.queryOptions.withTargets([
format='ops', prometheus.new(
stack=true, '$datasource',
fill=1, 'sum(rate(alertmanager_alerts_received_total{%(amQuerySelector)s}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector },
legend_show=false, )
) + prometheus.withIntervalFactor(2)
.addTarget(prometheus.target('sum(rate(alertmanager_alerts_received_total{%(amQuerySelector)s}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Received' % $._config { amNameDashboardLegend: amNameDashboardLegend })) + prometheus.withLegendFormat('%(amNameDashboardLegend)s Received' % $._config { amNameDashboardLegend: amNameDashboardLegend }),
.addTarget(prometheus.target('sum(rate(alertmanager_alerts_invalid_total{%(amQuerySelector)s}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Invalid' % $._config { amNameDashboardLegend: amNameDashboardLegend })); prometheus.new(
'$datasource',
'sum(rate(alertmanager_alerts_invalid_total{%(amQuerySelector)s}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector },
)
+ prometheus.withIntervalFactor(2)
+ prometheus.withLegendFormat('%(amNameDashboardLegend)s Invalid' % $._config { amNameDashboardLegend: amNameDashboardLegend }),
]);
local notifications = local notifications =
graphPanel.new( panel.timeSeries.new('$integration: Notifications Send Rate')
'$integration: Notifications Send Rate', + panel.timeSeries.panelOptions.withDescription('rate of successful and invalid notifications sent by the Alertmanager')
description='rate of successful and invalid notifications sent by the Alertmanager', + panel.timeSeries.standardOptions.withUnit('ops')
datasource='$datasource', + panelTimeSeriesStdOptions
format='ops', + panel.timeSeries.panelOptions.withRepeat('integration')
stack=true, + panel.timeSeries.queryOptions.withTargets([
fill=1, prometheus.new(
legend_show=false, '$datasource',
repeat='integration' 'sum(rate(alertmanager_notifications_total{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (integration,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector },
) )
.addTarget(prometheus.target('sum(rate(alertmanager_notifications_total{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (integration,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Total' % $._config { amNameDashboardLegend: amNameDashboardLegend })) + prometheus.withIntervalFactor(2)
.addTarget(prometheus.target('sum(rate(alertmanager_notifications_failed_total{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (integration,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Failed' % $._config { amNameDashboardLegend: amNameDashboardLegend })); + prometheus.withLegendFormat('%(amNameDashboardLegend)s Total' % $._config { amNameDashboardLegend: amNameDashboardLegend }),
prometheus.new(
'$datasource',
'sum(rate(alertmanager_notifications_failed_total{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (integration,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector },
)
+ prometheus.withIntervalFactor(2)
+ prometheus.withLegendFormat('%(amNameDashboardLegend)s Failed' % $._config { amNameDashboardLegend: amNameDashboardLegend }),
]);
local notificationDuration = local notificationDuration =
graphPanel.new( panel.timeSeries.new('$integration: Notification Duration')
'$integration: Notification Duration', + panel.timeSeries.panelOptions.withDescription('latency of notifications sent by the Alertmanager')
description='latency of notifications sent by the Alertmanager', + panel.timeSeries.standardOptions.withUnit('s')
datasource='$datasource', + panelTimeSeriesStdOptions
format='s', + panel.timeSeries.panelOptions.withRepeat('integration')
stack=false, + panel.timeSeries.queryOptions.withTargets([
fill=1, prometheus.new(
legend_show=false, '$datasource',
repeat='integration' |||
) histogram_quantile(0.99,
.addTarget(prometheus.target( sum(rate(alertmanager_notification_latency_seconds_bucket{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (le,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)
||| )
histogram_quantile(0.99, ||| % $._config { amQuerySelector: amQuerySelector },
sum(rate(alertmanager_notification_latency_seconds_bucket{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (le,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s) )
) + prometheus.withIntervalFactor(2)
||| % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s 99th Percentile' % $._config { amNameDashboardLegend: amNameDashboardLegend } + prometheus.withLegendFormat('%(amNameDashboardLegend)s 99th Percentile' % $._config { amNameDashboardLegend: amNameDashboardLegend }),
)) prometheus.new(
.addTarget(prometheus.target( '$datasource',
||| |||
histogram_quantile(0.50, histogram_quantile(0.50,
sum(rate(alertmanager_notification_latency_seconds_bucket{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (le,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s) sum(rate(alertmanager_notification_latency_seconds_bucket{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (le,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)
) )
||| % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Median' % $._config { amNameDashboardLegend: amNameDashboardLegend } ||| % $._config { amQuerySelector: amQuerySelector },
)) )
.addTarget(prometheus.target( + prometheus.withIntervalFactor(2)
||| + prometheus.withLegendFormat('%(amNameDashboardLegend)s Median' % $._config { amNameDashboardLegend: amNameDashboardLegend }),
sum(rate(alertmanager_notification_latency_seconds_sum{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s) prometheus.new(
/ '$datasource',
sum(rate(alertmanager_notification_latency_seconds_count{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s) |||
||| % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Average' % $._config { amNameDashboardLegend: amNameDashboardLegend } sum(rate(alertmanager_notification_latency_seconds_sum{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)
)); /
sum(rate(alertmanager_notification_latency_seconds_count{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)
||| % $._config { amQuerySelector: amQuerySelector },
)
+ prometheus.withIntervalFactor(2)
+ prometheus.withLegendFormat('%(amNameDashboardLegend)s Average' % $._config { amNameDashboardLegend: amNameDashboardLegend }),
]);
dashboard.new( dashboard.new('%sOverview' % $._config.dashboardNamePrefix)
'%sOverview' % $._config.dashboardNamePrefix, + dashboard.time.withFrom('now-1h')
time_from='now-1h', + dashboard.withTags($._config.dashboardTags)
tags=($._config.dashboardTags), + dashboard.withTimezone('utc')
timezone='utc', + dashboard.timepicker.withRefreshIntervals('30s')
refresh='30s', + dashboard.graphTooltip.withSharedCrosshair()
graphTooltip='shared_crosshair', + dashboard.withUid('alertmanager-overview')
uid='alertmanager-overview' + dashboard.withVariables(
) [datasource]
.addTemplate( + alertmanagerClusterSelectorVariables
{ + [integrationVariable]
current: { )
text: 'Prometheus', + dashboard.withPanels(
value: 'Prometheus', grafana.util.grid.makeGrid([
}, row.new('Alerts')
hide: 0, + row.withPanels([
label: 'Data Source', alerts,
name: 'datasource', alertsRate
options: [], ]),
query: 'prometheus', row.new('Notifications')
refresh: 1, + row.withPanels([
regex: '', notifications,
type: 'datasource', notificationDuration
}, ])
) ], panelWidth=12, panelHeight=7)
.addTemplates(alertmanagerClusterSelectorTemplates) )
.addTemplate(integrationTemplate)
.addRow(
row.new('Alerts')
.addPanel(alerts)
.addPanel(alertsRate)
)
.addRow(
row.new('Notifications')
.addPanel(notifications)
.addPanel(notificationDuration)
),
}, },
} }

View File

@ -4,11 +4,11 @@
{ {
"source": { "source": {
"git": { "git": {
"remote": "https://github.com/grafana/grafonnet-lib.git", "remote": "https://github.com/grafana/grafonnet.git",
"subdir": "grafonnet" "subdir": "gen/grafonnet-latest"
} }
}, },
"version": "master" "version": "main"
} }
], ],
"legacyImports": false "legacyImports": false

View File

@ -4,12 +4,42 @@
{ {
"source": { "source": {
"git": { "git": {
"remote": "https://github.com/grafana/grafonnet-lib.git", "remote": "https://github.com/grafana/grafonnet.git",
"subdir": "grafonnet" "subdir": "gen/grafonnet-latest"
} }
}, },
"version": "55cf4ee53ced2b6d3ce96ecce9fb813b4465be98", "version": "1ce5aec95ce32336fe47c8881361847c475b5254",
"sum": "4/sUV0Kk+o8I+wlYxL9R6EPhL/NiLfYHk+NXlU64RUk=" "sum": "64fMUPI3frXGj4X1FqFd1t7r04w3CUSmXaDcJ23EYbQ="
},
{
"source": {
"git": {
"remote": "https://github.com/grafana/grafonnet.git",
"subdir": "gen/grafonnet-v11.1.0"
}
},
"version": "1ce5aec95ce32336fe47c8881361847c475b5254",
"sum": "41w7p/rwrNsITqNHMXtGSJAfAyKmnflg6rFhKBduUxM="
},
{
"source": {
"git": {
"remote": "https://github.com/jsonnet-libs/docsonnet.git",
"subdir": "doc-util"
}
},
"version": "6ac6c69685b8c29c54515448eaca583da2d88150",
"sum": "BrAL/k23jq+xy9oA7TWIhUx07dsA/QLm3g7ktCwe//U="
},
{
"source": {
"git": {
"remote": "https://github.com/jsonnet-libs/xtd.git",
"subdir": ""
}
},
"version": "63d430b69a95741061c2f7fc9d84b1a778511d9c",
"sum": "qiZi3axUSXCVzKUF83zSAxklwrnitMmrDK4XAfjPMdE="
} }
], ],
"legacyImports": false "legacyImports": false