Protect gauge-based alerts against failed scrapes
Signed-off-by: beorn7 <beorn@grafana.com>
This commit is contained in:
parent
52707535b8
commit
9a2177949d
|
@ -7,9 +7,11 @@
|
||||||
{
|
{
|
||||||
alert: 'PrometheusBadConfig',
|
alert: 'PrometheusBadConfig',
|
||||||
expr: |||
|
expr: |||
|
||||||
prometheus_config_last_reload_successful{%(prometheusSelector)s} == 0
|
# Without max_over_time, failed scrapes could create false negatives, see
|
||||||
|
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||||
|
max_over_time(prometheus_config_last_reload_successful{%(prometheusSelector)s}[5m]) == 0
|
||||||
||| % $._config,
|
||| % $._config,
|
||||||
'for': '15m',
|
'for': '10m',
|
||||||
labels: {
|
labels: {
|
||||||
severity: 'critical',
|
severity: 'critical',
|
||||||
},
|
},
|
||||||
|
@ -21,10 +23,12 @@
|
||||||
{
|
{
|
||||||
alert: 'PrometheusNotificationQueueRunningFull',
|
alert: 'PrometheusNotificationQueueRunningFull',
|
||||||
expr: |||
|
expr: |||
|
||||||
|
# Without min_over_time, failed scrapes could create false negatives, see
|
||||||
|
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||||
(
|
(
|
||||||
predict_linear(prometheus_notifications_queue_length{%(prometheusSelector)s}[5m], 60 * 30)
|
predict_linear(prometheus_notifications_queue_length{%(prometheusSelector)s}[5m], 60 * 30)
|
||||||
>
|
>
|
||||||
prometheus_notifications_queue_capacity{%(prometheusSelector)s}
|
min_over_time(prometheus_notifications_queue_capacity{%(prometheusSelector)s}[5m])
|
||||||
)
|
)
|
||||||
||| % $._config,
|
||| % $._config,
|
||||||
'for': '15m',
|
'for': '15m',
|
||||||
|
@ -79,7 +83,9 @@
|
||||||
{
|
{
|
||||||
alert: 'PrometheusNotConnectedToAlertmanagers',
|
alert: 'PrometheusNotConnectedToAlertmanagers',
|
||||||
expr: |||
|
expr: |||
|
||||||
prometheus_notifications_alertmanagers_discovered{%(prometheusSelector)s} < 1
|
# Without max_over_time, failed scrapes could create false negatives, see
|
||||||
|
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||||
|
max_over_time(prometheus_notifications_alertmanagers_discovered{%(prometheusSelector)s}[5m]) < 1
|
||||||
||| % $._config,
|
||| % $._config,
|
||||||
'for': '10m',
|
'for': '10m',
|
||||||
labels: {
|
labels: {
|
||||||
|
@ -201,10 +207,12 @@
|
||||||
{
|
{
|
||||||
alert: 'PrometheusRemoteWriteBehind',
|
alert: 'PrometheusRemoteWriteBehind',
|
||||||
expr: |||
|
expr: |||
|
||||||
|
# Without max_over_time, failed scrapes could create false negatives, see
|
||||||
|
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||||
(
|
(
|
||||||
prometheus_remote_storage_highest_timestamp_in_seconds{%(prometheusSelector)s}
|
max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{%(prometheusSelector)s}[5m])
|
||||||
- on(job, instance) group_right
|
- on(job, instance) group_right
|
||||||
prometheus_remote_storage_queue_highest_sent_timestamp_seconds{%(prometheusSelector)s}
|
max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{%(prometheusSelector)s}[5m])
|
||||||
)
|
)
|
||||||
> 120
|
> 120
|
||||||
||| % $._config,
|
||| % $._config,
|
||||||
|
|
Loading…
Reference in New Issue