Update mixin dashboard (#4078)
Update and rewrite the mixin dashboard to use the grafonnet ([1]) library. Grafana has deprecated angular plugins ([2]) as used by grafonnet-lib ([3]) with removal pending for grafana version 12. Additionally grafonnet-lib is deprecated/unmaintained in favor of grafonnet. Therefore the mixin dashboard has been updated to use grafonnet. [1] https://github.com/grafana/grafonnet [2] https://grafana.com/docs/grafana/latest/developers/angular_deprecation/ [3] https://github.com/grafana/grafonnet-lib Signed-off-by: Jan Horstmann <horstmann@osism.tech>
This commit is contained in:
parent
d04ef60a16
commit
bd70e73fc7
|
@ -1,154 +1,173 @@
|
|||
local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet';
|
||||
local grafana = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet';
|
||||
local dashboard = grafana.dashboard;
|
||||
local row = grafana.row;
|
||||
local prometheus = grafana.prometheus;
|
||||
local template = grafana.template;
|
||||
local graphPanel = grafana.graphPanel;
|
||||
local prometheus = grafana.query.prometheus;
|
||||
local variable = dashboard.variable;
|
||||
local panel = grafana.panel;
|
||||
local row = panel.row;
|
||||
|
||||
{
|
||||
grafanaDashboards+:: {
|
||||
|
||||
local amQuerySelector = std.join(',', ['%s=~"$%s"' % [label, label] for label in std.split($._config.alertmanagerClusterLabels, ',')]),
|
||||
local amNameDashboardLegend = std.join('/', ['{{%s}}' % [label] for label in std.split($._config.alertmanagerNameLabels, ',')]),
|
||||
|
||||
local alertmanagerClusterSelectorTemplates =
|
||||
local datasource =
|
||||
variable.datasource.new('datasource', 'prometheus')
|
||||
+ variable.datasource.generalOptions.withLabel('Data Source')
|
||||
+ variable.datasource.generalOptions.withCurrent('Prometheus')
|
||||
+ variable.datasource.generalOptions.showOnDashboard.withLabelAndValue(),
|
||||
|
||||
local alertmanagerClusterSelectorVariables =
|
||||
[
|
||||
template.new(
|
||||
name=label,
|
||||
label=label,
|
||||
datasource='$datasource',
|
||||
query='label_values(alertmanager_alerts, %s)' % label,
|
||||
current='',
|
||||
refresh=2,
|
||||
includeAll=false,
|
||||
sort=1
|
||||
)
|
||||
variable.query.new(label)
|
||||
+ variable.query.generalOptions.withLabel(label)
|
||||
+ variable.query.withDatasourceFromVariable(datasource)
|
||||
+ variable.query.queryTypes.withLabelValues(label, metric='alertmanager_alerts')
|
||||
+ variable.query.generalOptions.withCurrent('')
|
||||
+ variable.query.refresh.onTime()
|
||||
+ variable.query.selectionOptions.withIncludeAll(false)
|
||||
+ variable.query.withSort(type='alphabetical')
|
||||
for label in std.split($._config.alertmanagerClusterLabels, ',')
|
||||
],
|
||||
|
||||
local integrationTemplate =
|
||||
template.new(
|
||||
name='integration',
|
||||
datasource='$datasource',
|
||||
query='label_values(alertmanager_notifications_total{integration=~"%s"}, integration)' % $._config.alertmanagerCriticalIntegrationsRegEx,
|
||||
current='all',
|
||||
hide='2', // Always hide
|
||||
refresh=2,
|
||||
includeAll=true,
|
||||
sort=1
|
||||
),
|
||||
local integrationVariable =
|
||||
variable.query.new('integration')
|
||||
+ variable.query.withDatasourceFromVariable(datasource)
|
||||
+ variable.query.queryTypes.withLabelValues('integration', metric='alertmanager_notifications_total{integration=~"%s"}' % $._config.alertmanagerCriticalIntegrationsRegEx)
|
||||
+ variable.query.generalOptions.withCurrent('$__all')
|
||||
+ variable.datasource.generalOptions.showOnDashboard.withNothing()
|
||||
+ variable.query.refresh.onTime()
|
||||
+ variable.query.selectionOptions.withIncludeAll(true)
|
||||
+ variable.query.withSort(type='alphabetical'),
|
||||
|
||||
local panelTimeSeriesStdOptions =
|
||||
{}
|
||||
+ panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal')
|
||||
+ panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10)
|
||||
+ panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never')
|
||||
+ panel.timeSeries.options.legend.withShowLegend(false)
|
||||
+ panel.timeSeries.options.tooltip.withMode('multi')
|
||||
+ panel.timeSeries.queryOptions.withDatasource('prometheus', '$datasource'),
|
||||
|
||||
'alertmanager-overview.json':
|
||||
local alerts =
|
||||
graphPanel.new(
|
||||
'Alerts',
|
||||
description='current set of alerts stored in the Alertmanager',
|
||||
datasource='$datasource',
|
||||
span=6,
|
||||
format='none',
|
||||
stack=true,
|
||||
fill=1,
|
||||
legend_show=false,
|
||||
)
|
||||
.addTarget(prometheus.target('sum(alertmanager_alerts{%(amQuerySelector)s}) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s' % $._config { amNameDashboardLegend: amNameDashboardLegend }));
|
||||
panel.timeSeries.new('Alerts')
|
||||
+ panel.timeSeries.panelOptions.withDescription('current set of alerts stored in the Alertmanager')
|
||||
+ panel.timeSeries.standardOptions.withUnit('none')
|
||||
+ panelTimeSeriesStdOptions
|
||||
+ panel.timeSeries.queryOptions.withTargets([
|
||||
prometheus.new(
|
||||
'$datasource',
|
||||
'sum(alertmanager_alerts{%(amQuerySelector)s}) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector },
|
||||
)
|
||||
+ prometheus.withIntervalFactor(2)
|
||||
+ prometheus.withLegendFormat('%(amNameDashboardLegend)s' % $._config { amNameDashboardLegend: amNameDashboardLegend }),
|
||||
]);
|
||||
|
||||
local alertsRate =
|
||||
graphPanel.new(
|
||||
'Alerts receive rate',
|
||||
description='rate of successful and invalid alerts received by the Alertmanager',
|
||||
datasource='$datasource',
|
||||
span=6,
|
||||
format='ops',
|
||||
stack=true,
|
||||
fill=1,
|
||||
legend_show=false,
|
||||
)
|
||||
.addTarget(prometheus.target('sum(rate(alertmanager_alerts_received_total{%(amQuerySelector)s}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Received' % $._config { amNameDashboardLegend: amNameDashboardLegend }))
|
||||
.addTarget(prometheus.target('sum(rate(alertmanager_alerts_invalid_total{%(amQuerySelector)s}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Invalid' % $._config { amNameDashboardLegend: amNameDashboardLegend }));
|
||||
panel.timeSeries.new('Alerts receive rate')
|
||||
+ panel.timeSeries.panelOptions.withDescription('rate of successful and invalid alerts received by the Alertmanager')
|
||||
+ panel.timeSeries.standardOptions.withUnit('ops')
|
||||
+ panelTimeSeriesStdOptions
|
||||
+ panel.timeSeries.queryOptions.withTargets([
|
||||
prometheus.new(
|
||||
'$datasource',
|
||||
'sum(rate(alertmanager_alerts_received_total{%(amQuerySelector)s}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector },
|
||||
)
|
||||
+ prometheus.withIntervalFactor(2)
|
||||
+ prometheus.withLegendFormat('%(amNameDashboardLegend)s Received' % $._config { amNameDashboardLegend: amNameDashboardLegend }),
|
||||
prometheus.new(
|
||||
'$datasource',
|
||||
'sum(rate(alertmanager_alerts_invalid_total{%(amQuerySelector)s}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector },
|
||||
)
|
||||
+ prometheus.withIntervalFactor(2)
|
||||
+ prometheus.withLegendFormat('%(amNameDashboardLegend)s Invalid' % $._config { amNameDashboardLegend: amNameDashboardLegend }),
|
||||
]);
|
||||
|
||||
local notifications =
|
||||
graphPanel.new(
|
||||
'$integration: Notifications Send Rate',
|
||||
description='rate of successful and invalid notifications sent by the Alertmanager',
|
||||
datasource='$datasource',
|
||||
format='ops',
|
||||
stack=true,
|
||||
fill=1,
|
||||
legend_show=false,
|
||||
repeat='integration'
|
||||
)
|
||||
.addTarget(prometheus.target('sum(rate(alertmanager_notifications_total{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (integration,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Total' % $._config { amNameDashboardLegend: amNameDashboardLegend }))
|
||||
.addTarget(prometheus.target('sum(rate(alertmanager_notifications_failed_total{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (integration,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Failed' % $._config { amNameDashboardLegend: amNameDashboardLegend }));
|
||||
panel.timeSeries.new('$integration: Notifications Send Rate')
|
||||
+ panel.timeSeries.panelOptions.withDescription('rate of successful and invalid notifications sent by the Alertmanager')
|
||||
+ panel.timeSeries.standardOptions.withUnit('ops')
|
||||
+ panelTimeSeriesStdOptions
|
||||
+ panel.timeSeries.panelOptions.withRepeat('integration')
|
||||
+ panel.timeSeries.queryOptions.withTargets([
|
||||
prometheus.new(
|
||||
'$datasource',
|
||||
'sum(rate(alertmanager_notifications_total{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (integration,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector },
|
||||
)
|
||||
+ prometheus.withIntervalFactor(2)
|
||||
+ prometheus.withLegendFormat('%(amNameDashboardLegend)s Total' % $._config { amNameDashboardLegend: amNameDashboardLegend }),
|
||||
prometheus.new(
|
||||
'$datasource',
|
||||
'sum(rate(alertmanager_notifications_failed_total{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (integration,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector },
|
||||
)
|
||||
+ prometheus.withIntervalFactor(2)
|
||||
+ prometheus.withLegendFormat('%(amNameDashboardLegend)s Failed' % $._config { amNameDashboardLegend: amNameDashboardLegend }),
|
||||
]);
|
||||
|
||||
local notificationDuration =
|
||||
graphPanel.new(
|
||||
'$integration: Notification Duration',
|
||||
description='latency of notifications sent by the Alertmanager',
|
||||
datasource='$datasource',
|
||||
format='s',
|
||||
stack=false,
|
||||
fill=1,
|
||||
legend_show=false,
|
||||
repeat='integration'
|
||||
)
|
||||
.addTarget(prometheus.target(
|
||||
|||
|
||||
histogram_quantile(0.99,
|
||||
sum(rate(alertmanager_notification_latency_seconds_bucket{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (le,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)
|
||||
panel.timeSeries.new('$integration: Notification Duration')
|
||||
+ panel.timeSeries.panelOptions.withDescription('latency of notifications sent by the Alertmanager')
|
||||
+ panel.timeSeries.standardOptions.withUnit('s')
|
||||
+ panelTimeSeriesStdOptions
|
||||
+ panel.timeSeries.panelOptions.withRepeat('integration')
|
||||
+ panel.timeSeries.queryOptions.withTargets([
|
||||
prometheus.new(
|
||||
'$datasource',
|
||||
|||
|
||||
histogram_quantile(0.99,
|
||||
sum(rate(alertmanager_notification_latency_seconds_bucket{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (le,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)
|
||||
)
|
||||
||| % $._config { amQuerySelector: amQuerySelector },
|
||||
)
|
||||
||| % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s 99th Percentile' % $._config { amNameDashboardLegend: amNameDashboardLegend }
|
||||
))
|
||||
.addTarget(prometheus.target(
|
||||
|||
|
||||
histogram_quantile(0.50,
|
||||
sum(rate(alertmanager_notification_latency_seconds_bucket{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (le,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)
|
||||
+ prometheus.withIntervalFactor(2)
|
||||
+ prometheus.withLegendFormat('%(amNameDashboardLegend)s 99th Percentile' % $._config { amNameDashboardLegend: amNameDashboardLegend }),
|
||||
prometheus.new(
|
||||
'$datasource',
|
||||
|||
|
||||
histogram_quantile(0.50,
|
||||
sum(rate(alertmanager_notification_latency_seconds_bucket{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (le,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)
|
||||
)
|
||||
||| % $._config { amQuerySelector: amQuerySelector },
|
||||
)
|
||||
||| % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Median' % $._config { amNameDashboardLegend: amNameDashboardLegend }
|
||||
))
|
||||
.addTarget(prometheus.target(
|
||||
|||
|
||||
sum(rate(alertmanager_notification_latency_seconds_sum{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)
|
||||
/
|
||||
sum(rate(alertmanager_notification_latency_seconds_count{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)
|
||||
||| % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Average' % $._config { amNameDashboardLegend: amNameDashboardLegend }
|
||||
));
|
||||
+ prometheus.withIntervalFactor(2)
|
||||
+ prometheus.withLegendFormat('%(amNameDashboardLegend)s Median' % $._config { amNameDashboardLegend: amNameDashboardLegend }),
|
||||
prometheus.new(
|
||||
'$datasource',
|
||||
|||
|
||||
sum(rate(alertmanager_notification_latency_seconds_sum{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)
|
||||
/
|
||||
sum(rate(alertmanager_notification_latency_seconds_count{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)
|
||||
||| % $._config { amQuerySelector: amQuerySelector },
|
||||
)
|
||||
+ prometheus.withIntervalFactor(2)
|
||||
+ prometheus.withLegendFormat('%(amNameDashboardLegend)s Average' % $._config { amNameDashboardLegend: amNameDashboardLegend }),
|
||||
]);
|
||||
|
||||
dashboard.new(
|
||||
'%sOverview' % $._config.dashboardNamePrefix,
|
||||
time_from='now-1h',
|
||||
tags=($._config.dashboardTags),
|
||||
timezone='utc',
|
||||
refresh='30s',
|
||||
graphTooltip='shared_crosshair',
|
||||
uid='alertmanager-overview'
|
||||
)
|
||||
.addTemplate(
|
||||
{
|
||||
current: {
|
||||
text: 'Prometheus',
|
||||
value: 'Prometheus',
|
||||
},
|
||||
hide: 0,
|
||||
label: 'Data Source',
|
||||
name: 'datasource',
|
||||
options: [],
|
||||
query: 'prometheus',
|
||||
refresh: 1,
|
||||
regex: '',
|
||||
type: 'datasource',
|
||||
},
|
||||
)
|
||||
.addTemplates(alertmanagerClusterSelectorTemplates)
|
||||
.addTemplate(integrationTemplate)
|
||||
.addRow(
|
||||
row.new('Alerts')
|
||||
.addPanel(alerts)
|
||||
.addPanel(alertsRate)
|
||||
)
|
||||
.addRow(
|
||||
row.new('Notifications')
|
||||
.addPanel(notifications)
|
||||
.addPanel(notificationDuration)
|
||||
),
|
||||
dashboard.new('%sOverview' % $._config.dashboardNamePrefix)
|
||||
+ dashboard.time.withFrom('now-1h')
|
||||
+ dashboard.withTags($._config.dashboardTags)
|
||||
+ dashboard.withTimezone('utc')
|
||||
+ dashboard.timepicker.withRefreshIntervals('30s')
|
||||
+ dashboard.graphTooltip.withSharedCrosshair()
|
||||
+ dashboard.withUid('alertmanager-overview')
|
||||
+ dashboard.withVariables(
|
||||
[datasource]
|
||||
+ alertmanagerClusterSelectorVariables
|
||||
+ [integrationVariable]
|
||||
)
|
||||
+ dashboard.withPanels(
|
||||
grafana.util.grid.makeGrid([
|
||||
row.new('Alerts')
|
||||
+ row.withPanels([
|
||||
alerts,
|
||||
alertsRate
|
||||
]),
|
||||
row.new('Notifications')
|
||||
+ row.withPanels([
|
||||
notifications,
|
||||
notificationDuration
|
||||
])
|
||||
], panelWidth=12, panelHeight=7)
|
||||
)
|
||||
},
|
||||
}
|
||||
|
|
|
@ -4,11 +4,11 @@
|
|||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/grafana/grafonnet-lib.git",
|
||||
"subdir": "grafonnet"
|
||||
"remote": "https://github.com/grafana/grafonnet.git",
|
||||
"subdir": "gen/grafonnet-latest"
|
||||
}
|
||||
},
|
||||
"version": "master"
|
||||
"version": "main"
|
||||
}
|
||||
],
|
||||
"legacyImports": false
|
||||
|
|
|
@ -4,12 +4,42 @@
|
|||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/grafana/grafonnet-lib.git",
|
||||
"subdir": "grafonnet"
|
||||
"remote": "https://github.com/grafana/grafonnet.git",
|
||||
"subdir": "gen/grafonnet-latest"
|
||||
}
|
||||
},
|
||||
"version": "55cf4ee53ced2b6d3ce96ecce9fb813b4465be98",
|
||||
"sum": "4/sUV0Kk+o8I+wlYxL9R6EPhL/NiLfYHk+NXlU64RUk="
|
||||
"version": "1ce5aec95ce32336fe47c8881361847c475b5254",
|
||||
"sum": "64fMUPI3frXGj4X1FqFd1t7r04w3CUSmXaDcJ23EYbQ="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/grafana/grafonnet.git",
|
||||
"subdir": "gen/grafonnet-v11.1.0"
|
||||
}
|
||||
},
|
||||
"version": "1ce5aec95ce32336fe47c8881361847c475b5254",
|
||||
"sum": "41w7p/rwrNsITqNHMXtGSJAfAyKmnflg6rFhKBduUxM="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/jsonnet-libs/docsonnet.git",
|
||||
"subdir": "doc-util"
|
||||
}
|
||||
},
|
||||
"version": "6ac6c69685b8c29c54515448eaca583da2d88150",
|
||||
"sum": "BrAL/k23jq+xy9oA7TWIhUx07dsA/QLm3g7ktCwe//U="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/jsonnet-libs/xtd.git",
|
||||
"subdir": ""
|
||||
}
|
||||
},
|
||||
"version": "63d430b69a95741061c2f7fc9d84b1a778511d9c",
|
||||
"sum": "qiZi3axUSXCVzKUF83zSAxklwrnitMmrDK4XAfjPMdE="
|
||||
}
|
||||
],
|
||||
"legacyImports": false
|
||||
|
|
Loading…
Reference in New Issue