From bd70e73fc75ef6efea6ec5952115b9a471050e71 Mon Sep 17 00:00:00 2001 From: janhorstmann Date: Tue, 29 Oct 2024 10:59:51 +0100 Subject: [PATCH] Update mixin dashboard (#4078) Update and rewrite the mixin dashboard to use the grafonnet ([1]) library. Grafana has deprecated angular plugins ([2]) as used by grafonnet-lib ([3]) with removal pending for grafana version 12. Additionally grafonnet-lib is deprecated/unmaintained in favor of grafonnet. Therefore the mixin dashboard has been updated to use grafonnet. [1] https://github.com/grafana/grafonnet [2] https://grafana.com/docs/grafana/latest/developers/angular_deprecation/ [3] https://github.com/grafana/grafonnet-lib Signed-off-by: Jan Horstmann --- .../dashboards/overview.libsonnet | 281 ++++++++++-------- doc/alertmanager-mixin/jsonnetfile.json | 6 +- doc/alertmanager-mixin/jsonnetfile.lock.json | 38 ++- 3 files changed, 187 insertions(+), 138 deletions(-) diff --git a/doc/alertmanager-mixin/dashboards/overview.libsonnet b/doc/alertmanager-mixin/dashboards/overview.libsonnet index 27c02d25..e1b099d3 100644 --- a/doc/alertmanager-mixin/dashboards/overview.libsonnet +++ b/doc/alertmanager-mixin/dashboards/overview.libsonnet @@ -1,154 +1,173 @@ -local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet'; +local grafana = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; local dashboard = grafana.dashboard; -local row = grafana.row; -local prometheus = grafana.prometheus; -local template = grafana.template; -local graphPanel = grafana.graphPanel; +local prometheus = grafana.query.prometheus; +local variable = dashboard.variable; +local panel = grafana.panel; +local row = panel.row; { grafanaDashboards+:: { - local amQuerySelector = std.join(',', ['%s=~"$%s"' % [label, label] for label in std.split($._config.alertmanagerClusterLabels, ',')]), local amNameDashboardLegend = std.join('/', ['{{%s}}' % [label] for label in std.split($._config.alertmanagerNameLabels, ',')]), - local alertmanagerClusterSelectorTemplates = + local datasource = + variable.datasource.new('datasource', 'prometheus') + + variable.datasource.generalOptions.withLabel('Data Source') + + variable.datasource.generalOptions.withCurrent('Prometheus') + + variable.datasource.generalOptions.showOnDashboard.withLabelAndValue(), + + local alertmanagerClusterSelectorVariables = [ - template.new( - name=label, - label=label, - datasource='$datasource', - query='label_values(alertmanager_alerts, %s)' % label, - current='', - refresh=2, - includeAll=false, - sort=1 - ) + variable.query.new(label) + + variable.query.generalOptions.withLabel(label) + + variable.query.withDatasourceFromVariable(datasource) + + variable.query.queryTypes.withLabelValues(label, metric='alertmanager_alerts') + + variable.query.generalOptions.withCurrent('') + + variable.query.refresh.onTime() + + variable.query.selectionOptions.withIncludeAll(false) + + variable.query.withSort(type='alphabetical') for label in std.split($._config.alertmanagerClusterLabels, ',') ], - local integrationTemplate = - template.new( - name='integration', - datasource='$datasource', - query='label_values(alertmanager_notifications_total{integration=~"%s"}, integration)' % $._config.alertmanagerCriticalIntegrationsRegEx, - current='all', - hide='2', // Always hide - refresh=2, - includeAll=true, - sort=1 - ), + local integrationVariable = + variable.query.new('integration') + + variable.query.withDatasourceFromVariable(datasource) + + variable.query.queryTypes.withLabelValues('integration', metric='alertmanager_notifications_total{integration=~"%s"}' % $._config.alertmanagerCriticalIntegrationsRegEx) + + variable.query.generalOptions.withCurrent('$__all') + + variable.datasource.generalOptions.showOnDashboard.withNothing() + + variable.query.refresh.onTime() + + variable.query.selectionOptions.withIncludeAll(true) + + variable.query.withSort(type='alphabetical'), + + local panelTimeSeriesStdOptions = + {} + + panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal') + + panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + panel.timeSeries.options.legend.withShowLegend(false) + + panel.timeSeries.options.tooltip.withMode('multi') + + panel.timeSeries.queryOptions.withDatasource('prometheus', '$datasource'), 'alertmanager-overview.json': local alerts = - graphPanel.new( - 'Alerts', - description='current set of alerts stored in the Alertmanager', - datasource='$datasource', - span=6, - format='none', - stack=true, - fill=1, - legend_show=false, - ) - .addTarget(prometheus.target('sum(alertmanager_alerts{%(amQuerySelector)s}) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s' % $._config { amNameDashboardLegend: amNameDashboardLegend })); + panel.timeSeries.new('Alerts') + + panel.timeSeries.panelOptions.withDescription('current set of alerts stored in the Alertmanager') + + panel.timeSeries.standardOptions.withUnit('none') + + panelTimeSeriesStdOptions + + panel.timeSeries.queryOptions.withTargets([ + prometheus.new( + '$datasource', + 'sum(alertmanager_alerts{%(amQuerySelector)s}) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, + ) + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('%(amNameDashboardLegend)s' % $._config { amNameDashboardLegend: amNameDashboardLegend }), + ]); local alertsRate = - graphPanel.new( - 'Alerts receive rate', - description='rate of successful and invalid alerts received by the Alertmanager', - datasource='$datasource', - span=6, - format='ops', - stack=true, - fill=1, - legend_show=false, - ) - .addTarget(prometheus.target('sum(rate(alertmanager_alerts_received_total{%(amQuerySelector)s}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Received' % $._config { amNameDashboardLegend: amNameDashboardLegend })) - .addTarget(prometheus.target('sum(rate(alertmanager_alerts_invalid_total{%(amQuerySelector)s}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Invalid' % $._config { amNameDashboardLegend: amNameDashboardLegend })); + panel.timeSeries.new('Alerts receive rate') + + panel.timeSeries.panelOptions.withDescription('rate of successful and invalid alerts received by the Alertmanager') + + panel.timeSeries.standardOptions.withUnit('ops') + + panelTimeSeriesStdOptions + + panel.timeSeries.queryOptions.withTargets([ + prometheus.new( + '$datasource', + 'sum(rate(alertmanager_alerts_received_total{%(amQuerySelector)s}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, + ) + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('%(amNameDashboardLegend)s Received' % $._config { amNameDashboardLegend: amNameDashboardLegend }), + prometheus.new( + '$datasource', + 'sum(rate(alertmanager_alerts_invalid_total{%(amQuerySelector)s}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, + ) + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('%(amNameDashboardLegend)s Invalid' % $._config { amNameDashboardLegend: amNameDashboardLegend }), + ]); local notifications = - graphPanel.new( - '$integration: Notifications Send Rate', - description='rate of successful and invalid notifications sent by the Alertmanager', - datasource='$datasource', - format='ops', - stack=true, - fill=1, - legend_show=false, - repeat='integration' - ) - .addTarget(prometheus.target('sum(rate(alertmanager_notifications_total{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (integration,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Total' % $._config { amNameDashboardLegend: amNameDashboardLegend })) - .addTarget(prometheus.target('sum(rate(alertmanager_notifications_failed_total{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (integration,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Failed' % $._config { amNameDashboardLegend: amNameDashboardLegend })); + panel.timeSeries.new('$integration: Notifications Send Rate') + + panel.timeSeries.panelOptions.withDescription('rate of successful and invalid notifications sent by the Alertmanager') + + panel.timeSeries.standardOptions.withUnit('ops') + + panelTimeSeriesStdOptions + + panel.timeSeries.panelOptions.withRepeat('integration') + + panel.timeSeries.queryOptions.withTargets([ + prometheus.new( + '$datasource', + 'sum(rate(alertmanager_notifications_total{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (integration,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, + ) + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('%(amNameDashboardLegend)s Total' % $._config { amNameDashboardLegend: amNameDashboardLegend }), + prometheus.new( + '$datasource', + 'sum(rate(alertmanager_notifications_failed_total{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (integration,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, + ) + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('%(amNameDashboardLegend)s Failed' % $._config { amNameDashboardLegend: amNameDashboardLegend }), + ]); local notificationDuration = - graphPanel.new( - '$integration: Notification Duration', - description='latency of notifications sent by the Alertmanager', - datasource='$datasource', - format='s', - stack=false, - fill=1, - legend_show=false, - repeat='integration' - ) - .addTarget(prometheus.target( - ||| - histogram_quantile(0.99, - sum(rate(alertmanager_notification_latency_seconds_bucket{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (le,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s) - ) - ||| % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s 99th Percentile' % $._config { amNameDashboardLegend: amNameDashboardLegend } - )) - .addTarget(prometheus.target( - ||| - histogram_quantile(0.50, - sum(rate(alertmanager_notification_latency_seconds_bucket{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (le,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s) - ) - ||| % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Median' % $._config { amNameDashboardLegend: amNameDashboardLegend } - )) - .addTarget(prometheus.target( - ||| - sum(rate(alertmanager_notification_latency_seconds_sum{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s) - / - sum(rate(alertmanager_notification_latency_seconds_count{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s) - ||| % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Average' % $._config { amNameDashboardLegend: amNameDashboardLegend } - )); + panel.timeSeries.new('$integration: Notification Duration') + + panel.timeSeries.panelOptions.withDescription('latency of notifications sent by the Alertmanager') + + panel.timeSeries.standardOptions.withUnit('s') + + panelTimeSeriesStdOptions + + panel.timeSeries.panelOptions.withRepeat('integration') + + panel.timeSeries.queryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + histogram_quantile(0.99, + sum(rate(alertmanager_notification_latency_seconds_bucket{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (le,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s) + ) + ||| % $._config { amQuerySelector: amQuerySelector }, + ) + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('%(amNameDashboardLegend)s 99th Percentile' % $._config { amNameDashboardLegend: amNameDashboardLegend }), + prometheus.new( + '$datasource', + ||| + histogram_quantile(0.50, + sum(rate(alertmanager_notification_latency_seconds_bucket{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (le,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s) + ) + ||| % $._config { amQuerySelector: amQuerySelector }, + ) + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('%(amNameDashboardLegend)s Median' % $._config { amNameDashboardLegend: amNameDashboardLegend }), + prometheus.new( + '$datasource', + ||| + sum(rate(alertmanager_notification_latency_seconds_sum{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s) + / + sum(rate(alertmanager_notification_latency_seconds_count{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s) + ||| % $._config { amQuerySelector: amQuerySelector }, + ) + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('%(amNameDashboardLegend)s Average' % $._config { amNameDashboardLegend: amNameDashboardLegend }), + ]); - dashboard.new( - '%sOverview' % $._config.dashboardNamePrefix, - time_from='now-1h', - tags=($._config.dashboardTags), - timezone='utc', - refresh='30s', - graphTooltip='shared_crosshair', - uid='alertmanager-overview' - ) - .addTemplate( - { - current: { - text: 'Prometheus', - value: 'Prometheus', - }, - hide: 0, - label: 'Data Source', - name: 'datasource', - options: [], - query: 'prometheus', - refresh: 1, - regex: '', - type: 'datasource', - }, - ) - .addTemplates(alertmanagerClusterSelectorTemplates) - .addTemplate(integrationTemplate) - .addRow( - row.new('Alerts') - .addPanel(alerts) - .addPanel(alertsRate) - ) - .addRow( - row.new('Notifications') - .addPanel(notifications) - .addPanel(notificationDuration) - ), + dashboard.new('%sOverview' % $._config.dashboardNamePrefix) + + dashboard.time.withFrom('now-1h') + + dashboard.withTags($._config.dashboardTags) + + dashboard.withTimezone('utc') + + dashboard.timepicker.withRefreshIntervals('30s') + + dashboard.graphTooltip.withSharedCrosshair() + + dashboard.withUid('alertmanager-overview') + + dashboard.withVariables( + [datasource] + + alertmanagerClusterSelectorVariables + + [integrationVariable] + ) + + dashboard.withPanels( + grafana.util.grid.makeGrid([ + row.new('Alerts') + + row.withPanels([ + alerts, + alertsRate + ]), + row.new('Notifications') + + row.withPanels([ + notifications, + notificationDuration + ]) + ], panelWidth=12, panelHeight=7) + ) }, } diff --git a/doc/alertmanager-mixin/jsonnetfile.json b/doc/alertmanager-mixin/jsonnetfile.json index 650733a0..2d56d912 100644 --- a/doc/alertmanager-mixin/jsonnetfile.json +++ b/doc/alertmanager-mixin/jsonnetfile.json @@ -4,11 +4,11 @@ { "source": { "git": { - "remote": "https://github.com/grafana/grafonnet-lib.git", - "subdir": "grafonnet" + "remote": "https://github.com/grafana/grafonnet.git", + "subdir": "gen/grafonnet-latest" } }, - "version": "master" + "version": "main" } ], "legacyImports": false diff --git a/doc/alertmanager-mixin/jsonnetfile.lock.json b/doc/alertmanager-mixin/jsonnetfile.lock.json index 803febc8..0479bb93 100644 --- a/doc/alertmanager-mixin/jsonnetfile.lock.json +++ b/doc/alertmanager-mixin/jsonnetfile.lock.json @@ -4,12 +4,42 @@ { "source": { "git": { - "remote": "https://github.com/grafana/grafonnet-lib.git", - "subdir": "grafonnet" + "remote": "https://github.com/grafana/grafonnet.git", + "subdir": "gen/grafonnet-latest" } }, - "version": "55cf4ee53ced2b6d3ce96ecce9fb813b4465be98", - "sum": "4/sUV0Kk+o8I+wlYxL9R6EPhL/NiLfYHk+NXlU64RUk=" + "version": "1ce5aec95ce32336fe47c8881361847c475b5254", + "sum": "64fMUPI3frXGj4X1FqFd1t7r04w3CUSmXaDcJ23EYbQ=" + }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/grafonnet.git", + "subdir": "gen/grafonnet-v11.1.0" + } + }, + "version": "1ce5aec95ce32336fe47c8881361847c475b5254", + "sum": "41w7p/rwrNsITqNHMXtGSJAfAyKmnflg6rFhKBduUxM=" + }, + { + "source": { + "git": { + "remote": "https://github.com/jsonnet-libs/docsonnet.git", + "subdir": "doc-util" + } + }, + "version": "6ac6c69685b8c29c54515448eaca583da2d88150", + "sum": "BrAL/k23jq+xy9oA7TWIhUx07dsA/QLm3g7ktCwe//U=" + }, + { + "source": { + "git": { + "remote": "https://github.com/jsonnet-libs/xtd.git", + "subdir": "" + } + }, + "version": "63d430b69a95741061c2f7fc9d84b1a778511d9c", + "sum": "qiZi3axUSXCVzKUF83zSAxklwrnitMmrDK4XAfjPMdE=" } ], "legacyImports": false