From 50861d586a794204718f2c8a96b1bf5989764ebb Mon Sep 17 00:00:00 2001 From: Tom Wilkie Date: Tue, 7 Aug 2018 14:18:33 +0200 Subject: [PATCH] Alert if more than 1% of alerts fail for a given integration. Signed-off-by: Tom Wilkie --- documentation/prometheus-mixin/alerts.libsonnet | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/documentation/prometheus-mixin/alerts.libsonnet b/documentation/prometheus-mixin/alerts.libsonnet index 40cf06d66a..cf45037292 100644 --- a/documentation/prometheus-mixin/alerts.libsonnet +++ b/documentation/prometheus-mixin/alerts.libsonnet @@ -33,14 +33,14 @@ { alert: 'PromAlertsFailed', expr: ||| - sum(increase(alertmanager_notifications_failed_total{%(alertmanagerSelector)s}[5m])) by (namespace) > 0 + 100 * rate(alertmanager_notifications_failed_total{%(alertmanagerSelector)s}[5m]) / rate(alertmanager_notifications_total{%(alertmanagerSelector)s}[5m]) > 1 ||| % $._config, 'for': '5m', labels: { severity: 'critical', }, annotations: { - message: 'Alertmanager failed to send an alert.', + message: 'Alertmanager failed to send {{ printf "%.1f" $value }}% alerts to {{ $labels.integration }}.', }, }, {