Merge pull request #9700 from nikosmeds/nikosmeds/hagroupcrashlooping-mixin-60m

Increase time range for PrometheusHAGroupCrashlooping alert
This commit is contained in:
Björn Rabenstein 2021-11-19 12:53:55 +01:00 committed by GitHub
commit 2234798f60
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 2 additions and 2 deletions

View File

@ -391,7 +391,7 @@
and
(
count by (%(prometheusHAGroupLabels)s) (
changes(process_start_time_seconds{%(prometheusSelector)s}[30m]) > 1
changes(process_start_time_seconds{%(prometheusSelector)s}[1h]) > 1
)
/
count by (%(prometheusHAGroupLabels)s) (
@ -418,7 +418,7 @@
},
annotations: {
summary: 'More than half of the Prometheus instances within the same HA group are crashlooping.',
description: '{{ $value | humanizePercentage }} of Prometheus instances within the %(prometheusHAGroupName)s HA group have had at least 5 total restarts or 2 unclean restarts in the last 30m.' % $._config,
description: '{{ $value | humanizePercentage }} of Prometheus instances within the %(prometheusHAGroupName)s HA group have had at least 5 total restarts in the last 30m or 2 unclean restarts in the last 1h.' % $._config,
},
},
],