From fdcd423dfea2a6a0c468f2cf0c52a28775e478a1 Mon Sep 17 00:00:00 2001 From: Niko Smeds Date: Mon, 8 Nov 2021 15:05:15 -0800 Subject: [PATCH] Increase time range for PrometheusHAGroupCrashlooping alert Signed-off-by: Niko Smeds --- documentation/prometheus-mixin/alerts.libsonnet | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/documentation/prometheus-mixin/alerts.libsonnet b/documentation/prometheus-mixin/alerts.libsonnet index baf2f2f0f..7fec72b99 100644 --- a/documentation/prometheus-mixin/alerts.libsonnet +++ b/documentation/prometheus-mixin/alerts.libsonnet @@ -391,7 +391,7 @@ and ( count by (%(prometheusHAGroupLabels)s) ( - changes(process_start_time_seconds{%(prometheusSelector)s}[30m]) > 1 + changes(process_start_time_seconds{%(prometheusSelector)s}[1h]) > 1 ) / count by (%(prometheusHAGroupLabels)s) ( @@ -403,7 +403,7 @@ or ( count by (%(prometheusHAGroupLabels)s) ( - changes(process_start_time_seconds{%(prometheusSelector)s}[30m]) > 4 + changes(process_start_time_seconds{%(prometheusSelector)s}[1h]) > 4 ) / count by (%(prometheusHAGroupLabels)s) ( @@ -418,7 +418,7 @@ }, annotations: { summary: 'More than half of the Prometheus instances within the same HA group are crashlooping.', - description: '{{ $value | humanizePercentage }} of Prometheus instances within the %(prometheusHAGroupName)s HA group have had at least 5 total restarts or 2 unclean restarts in the last 30m.' % $._config, + description: '{{ $value | humanizePercentage }} of Prometheus instances within the %(prometheusHAGroupName)s HA group have had at least 5 total restarts or 2 unclean restarts in the last 1h.' % $._config, }, }, ],