Increase time range for PrometheusHAGroupCrashlooping alert
Signed-off-by: Niko Smeds <nikosmeds@gmail.com>
This commit is contained in:
parent
5e06527190
commit
fdcd423dfe
|
@ -391,7 +391,7 @@
|
||||||
and
|
and
|
||||||
(
|
(
|
||||||
count by (%(prometheusHAGroupLabels)s) (
|
count by (%(prometheusHAGroupLabels)s) (
|
||||||
changes(process_start_time_seconds{%(prometheusSelector)s}[30m]) > 1
|
changes(process_start_time_seconds{%(prometheusSelector)s}[1h]) > 1
|
||||||
)
|
)
|
||||||
/
|
/
|
||||||
count by (%(prometheusHAGroupLabels)s) (
|
count by (%(prometheusHAGroupLabels)s) (
|
||||||
|
@ -403,7 +403,7 @@
|
||||||
or
|
or
|
||||||
(
|
(
|
||||||
count by (%(prometheusHAGroupLabels)s) (
|
count by (%(prometheusHAGroupLabels)s) (
|
||||||
changes(process_start_time_seconds{%(prometheusSelector)s}[30m]) > 4
|
changes(process_start_time_seconds{%(prometheusSelector)s}[1h]) > 4
|
||||||
)
|
)
|
||||||
/
|
/
|
||||||
count by (%(prometheusHAGroupLabels)s) (
|
count by (%(prometheusHAGroupLabels)s) (
|
||||||
|
@ -418,7 +418,7 @@
|
||||||
},
|
},
|
||||||
annotations: {
|
annotations: {
|
||||||
summary: 'More than half of the Prometheus instances within the same HA group are crashlooping.',
|
summary: 'More than half of the Prometheus instances within the same HA group are crashlooping.',
|
||||||
description: '{{ $value | humanizePercentage }} of Prometheus instances within the %(prometheusHAGroupName)s HA group have had at least 5 total restarts or 2 unclean restarts in the last 30m.' % $._config,
|
description: '{{ $value | humanizePercentage }} of Prometheus instances within the %(prometheusHAGroupName)s HA group have had at least 5 total restarts or 2 unclean restarts in the last 1h.' % $._config,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
|
Loading…
Reference in New Issue