alertmanager/test/acceptance/inhibit_test.go
stuart nelson 80f2eeb2ca
Fix resolved alerts still inhibiting (#1331)
* inhibit: update inhibition cache when alerts resolve

Signed-off-by: Simon Pasquier <spasquie@redhat.com>

* inhibit: remove unnecessary fmt.Sprintf

Signed-off-by: Simon Pasquier <spasquie@redhat.com>

* inhibit: add unit tests

Signed-off-by: Simon Pasquier <spasquie@redhat.com>

* inhibit: use NopLogger in tests

Signed-off-by: Simon Pasquier <spasquie@redhat.com>

* Update old alert with result of merge with new

On ingest, alerts with matching fingerprints are
merged if the new alert's start and end times
overlap with the old alert's.

The merge creates a new alert, which is then
updated in the internal alert store.

The original alert is not updated (because merge
creates a copy), so it is never marked as resolved
in the inhibitor's reference to it.

The code within the inhibitor relies on skipping
over resolved alerts, but because the old alert is
never updated it is never marked as resolved. Thus
it continues to inhibit other alerts until it is
cleaned up by the internal GC.

This commit updates the struct of the old alert
with the result of the merge with the new alert.

An alternative would be to always update the
inhibitor's internal cache of alerts regardless of
an alert's resolve status.

Signed-off-by: stuart nelson <stuartnelson3@gmail.com>

* Update inhibitor cache even if alert is resolved

This seems like a better choice than the previous
commit. I think it is more sane to have the
inhibitor update its own cache, rather than having
one of its pointers updated externally.

Signed-off-by: stuart nelson <stuartnelson3@gmail.com>
2018-04-18 16:26:04 +02:00

153 lines
4.3 KiB
Go

// Copyright 2015 Prometheus Team
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package test
import (
"fmt"
"testing"
"time"
. "github.com/prometheus/alertmanager/test"
)
func TestInhibiting(t *testing.T) {
t.Parallel()
// This integration test checks that alerts can be inhibited and that an
// inhibited alert will be notified again as soon as the inhibiting alert
// gets resolved.
conf := `
route:
receiver: "default"
group_by: []
group_wait: 1s
group_interval: 1s
repeat_interval: 1s
receivers:
- name: "default"
webhook_configs:
- url: 'http://%s'
inhibit_rules:
- source_match:
alertname: JobDown
target_match:
alertname: InstanceDown
equal:
- job
- zone
`
at := NewAcceptanceTest(t, &AcceptanceOpts{
Tolerance: 150 * time.Millisecond,
})
co := at.Collector("webhook")
wh := NewWebhook(co)
am := at.Alertmanager(fmt.Sprintf(conf, wh.Address()))
am.Push(At(1), Alert("alertname", "test1", "job", "testjob", "zone", "aa"))
am.Push(At(1), Alert("alertname", "InstanceDown", "job", "testjob", "zone", "aa"))
am.Push(At(1), Alert("alertname", "InstanceDown", "job", "testjob", "zone", "ab"))
// This JobDown in zone aa should inhibit InstanceDown in zone aa in the
// second batch of notifications.
am.Push(At(2.2), Alert("alertname", "JobDown", "job", "testjob", "zone", "aa"))
// InstanceDown in zone aa should fire again in the third batch of
// notifications once JobDown in zone aa gets resolved.
am.Push(At(3.6), Alert("alertname", "JobDown", "job", "testjob", "zone", "aa").Active(2.2, 3.6))
co.Want(Between(2, 2.5),
Alert("alertname", "test1", "job", "testjob", "zone", "aa").Active(1),
Alert("alertname", "InstanceDown", "job", "testjob", "zone", "aa").Active(1),
Alert("alertname", "InstanceDown", "job", "testjob", "zone", "ab").Active(1),
)
co.Want(Between(3, 3.5),
Alert("alertname", "test1", "job", "testjob", "zone", "aa").Active(1),
Alert("alertname", "InstanceDown", "job", "testjob", "zone", "ab").Active(1),
Alert("alertname", "JobDown", "job", "testjob", "zone", "aa").Active(2.2),
)
co.Want(Between(4, 4.5),
Alert("alertname", "test1", "job", "testjob", "zone", "aa").Active(1),
Alert("alertname", "InstanceDown", "job", "testjob", "zone", "aa").Active(1),
Alert("alertname", "InstanceDown", "job", "testjob", "zone", "ab").Active(1),
Alert("alertname", "JobDown", "job", "testjob", "zone", "aa").Active(2.2, 3.6),
)
at.Run()
}
func TestAlwaysInhibiting(t *testing.T) {
t.Parallel()
// This integration test checks that when inhibited and inhibiting alerts
// gets resolved at the same time, the final notification contains both
// alerts.
conf := `
route:
receiver: "default"
group_by: []
group_wait: 1s
group_interval: 1s
repeat_interval: 1s
receivers:
- name: "default"
webhook_configs:
- url: 'http://%s'
inhibit_rules:
- source_match:
alertname: JobDown
target_match:
alertname: InstanceDown
equal:
- job
- zone
`
at := NewAcceptanceTest(t, &AcceptanceOpts{
Tolerance: 150 * time.Millisecond,
})
co := at.Collector("webhook")
wh := NewWebhook(co)
am := at.Alertmanager(fmt.Sprintf(conf, wh.Address()))
am.Push(At(1), Alert("alertname", "InstanceDown", "job", "testjob", "zone", "aa"))
am.Push(At(1), Alert("alertname", "JobDown", "job", "testjob", "zone", "aa"))
am.Push(At(2.6), Alert("alertname", "JobDown", "job", "testjob", "zone", "aa").Active(1, 2.6))
am.Push(At(2.6), Alert("alertname", "InstanceDown", "job", "testjob", "zone", "aa").Active(1, 2.6))
co.Want(Between(2, 2.5),
Alert("alertname", "JobDown", "job", "testjob", "zone", "aa").Active(1),
)
co.Want(Between(3, 3.5),
Alert("alertname", "InstanceDown", "job", "testjob", "zone", "aa").Active(1, 2.6),
Alert("alertname", "JobDown", "job", "testjob", "zone", "aa").Active(1, 2.6),
)
at.Run()
}