mirror of
https://github.com/prometheus/alertmanager
synced 2024-12-15 10:55:42 +00:00
80f2eeb2ca
* inhibit: update inhibition cache when alerts resolve Signed-off-by: Simon Pasquier <spasquie@redhat.com> * inhibit: remove unnecessary fmt.Sprintf Signed-off-by: Simon Pasquier <spasquie@redhat.com> * inhibit: add unit tests Signed-off-by: Simon Pasquier <spasquie@redhat.com> * inhibit: use NopLogger in tests Signed-off-by: Simon Pasquier <spasquie@redhat.com> * Update old alert with result of merge with new On ingest, alerts with matching fingerprints are merged if the new alert's start and end times overlap with the old alert's. The merge creates a new alert, which is then updated in the internal alert store. The original alert is not updated (because merge creates a copy), so it is never marked as resolved in the inhibitor's reference to it. The code within the inhibitor relies on skipping over resolved alerts, but because the old alert is never updated it is never marked as resolved. Thus it continues to inhibit other alerts until it is cleaned up by the internal GC. This commit updates the struct of the old alert with the result of the merge with the new alert. An alternative would be to always update the inhibitor's internal cache of alerts regardless of an alert's resolve status. Signed-off-by: stuart nelson <stuartnelson3@gmail.com> * Update inhibitor cache even if alert is resolved This seems like a better choice than the previous commit. I think it is more sane to have the inhibitor update its own cache, rather than having one of its pointers updated externally. Signed-off-by: stuart nelson <stuartnelson3@gmail.com>
386 lines
9.4 KiB
Go
386 lines
9.4 KiB
Go
// Copyright 2016 Prometheus Team
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package inhibit
|
|
|
|
import (
|
|
"reflect"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/go-kit/kit/log"
|
|
"github.com/kylelemons/godebug/pretty"
|
|
"github.com/prometheus/common/model"
|
|
|
|
"github.com/prometheus/alertmanager/config"
|
|
"github.com/prometheus/alertmanager/provider"
|
|
"github.com/prometheus/alertmanager/types"
|
|
)
|
|
|
|
var nopLogger = log.NewNopLogger()
|
|
|
|
func TestInhibitRuleHasEqual(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
now := time.Now()
|
|
cases := []struct {
|
|
initial map[model.Fingerprint]*types.Alert
|
|
equal model.LabelNames
|
|
input model.LabelSet
|
|
result bool
|
|
}{
|
|
{
|
|
// No source alerts at all.
|
|
initial: map[model.Fingerprint]*types.Alert{},
|
|
input: model.LabelSet{"a": "b"},
|
|
result: false,
|
|
},
|
|
{
|
|
// No equal labels, any source alerts satisfies the requirement.
|
|
initial: map[model.Fingerprint]*types.Alert{1: &types.Alert{}},
|
|
input: model.LabelSet{"a": "b"},
|
|
result: true,
|
|
},
|
|
{
|
|
// Matching but already resolved.
|
|
initial: map[model.Fingerprint]*types.Alert{
|
|
1: &types.Alert{
|
|
Alert: model.Alert{
|
|
Labels: model.LabelSet{"a": "b", "b": "f"},
|
|
StartsAt: now.Add(-time.Minute),
|
|
EndsAt: now.Add(-time.Second),
|
|
},
|
|
},
|
|
2: &types.Alert{
|
|
Alert: model.Alert{
|
|
Labels: model.LabelSet{"a": "b", "b": "c"},
|
|
StartsAt: now.Add(-time.Minute),
|
|
EndsAt: now.Add(-time.Second),
|
|
},
|
|
},
|
|
},
|
|
equal: model.LabelNames{"a", "b"},
|
|
input: model.LabelSet{"a": "b", "b": "c"},
|
|
result: false,
|
|
},
|
|
{
|
|
// Matching and unresolved.
|
|
initial: map[model.Fingerprint]*types.Alert{
|
|
1: &types.Alert{
|
|
Alert: model.Alert{
|
|
Labels: model.LabelSet{"a": "b", "c": "d"},
|
|
StartsAt: now.Add(-time.Minute),
|
|
EndsAt: now.Add(-time.Second),
|
|
},
|
|
},
|
|
2: &types.Alert{
|
|
Alert: model.Alert{
|
|
Labels: model.LabelSet{"a": "b", "c": "f"},
|
|
StartsAt: now.Add(-time.Minute),
|
|
EndsAt: now.Add(time.Hour),
|
|
},
|
|
},
|
|
},
|
|
equal: model.LabelNames{"a"},
|
|
input: model.LabelSet{"a": "b"},
|
|
result: true,
|
|
},
|
|
{
|
|
// Equal label does not match.
|
|
initial: map[model.Fingerprint]*types.Alert{
|
|
1: &types.Alert{
|
|
Alert: model.Alert{
|
|
Labels: model.LabelSet{"a": "c", "c": "d"},
|
|
StartsAt: now.Add(-time.Minute),
|
|
EndsAt: now.Add(-time.Second),
|
|
},
|
|
},
|
|
2: &types.Alert{
|
|
Alert: model.Alert{
|
|
Labels: model.LabelSet{"a": "c", "c": "f"},
|
|
StartsAt: now.Add(-time.Minute),
|
|
EndsAt: now.Add(-time.Second),
|
|
},
|
|
},
|
|
},
|
|
equal: model.LabelNames{"a"},
|
|
input: model.LabelSet{"a": "b"},
|
|
result: false,
|
|
},
|
|
}
|
|
|
|
for _, c := range cases {
|
|
r := &InhibitRule{
|
|
Equal: map[model.LabelName]struct{}{},
|
|
scache: map[model.Fingerprint]*types.Alert{},
|
|
}
|
|
for _, ln := range c.equal {
|
|
r.Equal[ln] = struct{}{}
|
|
}
|
|
for k, v := range c.initial {
|
|
r.scache[k] = v
|
|
}
|
|
|
|
if _, have := r.hasEqual(c.input); have != c.result {
|
|
t.Errorf("Unexpected result %t, expected %t", have, c.result)
|
|
}
|
|
if !reflect.DeepEqual(r.scache, c.initial) {
|
|
t.Errorf("Cache state unexpectedly changed")
|
|
t.Errorf(pretty.Compare(r.scache, c.initial))
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestInhibitRuleMatches(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
// Simple inhibut rule
|
|
cr := config.InhibitRule{
|
|
SourceMatch: map[string]string{"s": "1"},
|
|
TargetMatch: map[string]string{"t": "1"},
|
|
Equal: model.LabelNames{"e"},
|
|
}
|
|
m := types.NewMarker()
|
|
ih := NewInhibitor(nil, []*config.InhibitRule{&cr}, m, nopLogger)
|
|
ir := ih.rules[0]
|
|
now := time.Now()
|
|
// Active alert that matches the source filter
|
|
sourceAlert := types.Alert{
|
|
Alert: model.Alert{
|
|
Labels: model.LabelSet{"s": "1", "e": "1"},
|
|
StartsAt: now.Add(-time.Minute),
|
|
EndsAt: now.Add(time.Hour),
|
|
},
|
|
}
|
|
ir.scache = map[model.Fingerprint]*types.Alert{1: &sourceAlert}
|
|
|
|
cases := []struct {
|
|
target model.LabelSet
|
|
expected bool
|
|
}{
|
|
{
|
|
// Matches target filter, inhibited
|
|
target: model.LabelSet{"t": "1", "e": "1"},
|
|
expected: true,
|
|
},
|
|
{
|
|
// Matches target filter (plus noise), inhibited
|
|
target: model.LabelSet{"t": "1", "t2": "1", "e": "1"},
|
|
expected: true,
|
|
},
|
|
{
|
|
// Doesn't match target filter, not inhibited
|
|
target: model.LabelSet{"t": "0", "e": "1"},
|
|
expected: false,
|
|
},
|
|
{
|
|
// Matches both source and target filters, not inhibited
|
|
target: model.LabelSet{"s": "1", "t": "1", "e": "1"},
|
|
expected: false,
|
|
},
|
|
{
|
|
// Matches target filter, equal label doesn't match, not inhibited
|
|
target: model.LabelSet{"t": "1", "e": "0"},
|
|
expected: false,
|
|
},
|
|
}
|
|
|
|
for _, c := range cases {
|
|
if actual := ih.Mutes(c.target); actual != c.expected {
|
|
t.Errorf("Expected (*Inhibitor).Mutes(%v) to return %t but got %t", c.target, c.expected, actual)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestInhibitRuleGC(t *testing.T) {
|
|
// TODO(fabxc): add now() injection function to Resolved() to remove
|
|
// dependency on machine time in this test.
|
|
now := time.Now()
|
|
newAlert := func(start, end time.Duration) *types.Alert {
|
|
return &types.Alert{
|
|
Alert: model.Alert{
|
|
Labels: model.LabelSet{"a": "b"},
|
|
StartsAt: now.Add(start * time.Minute),
|
|
EndsAt: now.Add(end * time.Minute),
|
|
},
|
|
}
|
|
}
|
|
|
|
before := map[model.Fingerprint]*types.Alert{
|
|
0: newAlert(-10, -5),
|
|
1: newAlert(10, 20),
|
|
2: newAlert(-10, 10),
|
|
3: newAlert(-10, -1),
|
|
}
|
|
after := map[model.Fingerprint]*types.Alert{
|
|
1: newAlert(10, 20),
|
|
2: newAlert(-10, 10),
|
|
}
|
|
|
|
r := &InhibitRule{scache: before}
|
|
r.gc()
|
|
|
|
if !reflect.DeepEqual(r.scache, after) {
|
|
t.Errorf("Unexpected cache state after GC")
|
|
t.Errorf(pretty.Compare(r.scache, after))
|
|
}
|
|
}
|
|
|
|
type fakeAlerts struct {
|
|
alerts []*types.Alert
|
|
finished chan struct{}
|
|
}
|
|
|
|
func newFakeAlerts(alerts []*types.Alert) *fakeAlerts {
|
|
return &fakeAlerts{
|
|
alerts: alerts,
|
|
finished: make(chan struct{}),
|
|
}
|
|
}
|
|
|
|
func (f *fakeAlerts) GetPending() provider.AlertIterator { return nil }
|
|
func (f *fakeAlerts) Get(model.Fingerprint) (*types.Alert, error) { return nil, nil }
|
|
func (f *fakeAlerts) Put(...*types.Alert) error { return nil }
|
|
func (f *fakeAlerts) Subscribe() provider.AlertIterator {
|
|
ch := make(chan *types.Alert)
|
|
done := make(chan struct{})
|
|
go func() {
|
|
for _, a := range f.alerts {
|
|
ch <- a
|
|
}
|
|
// Send another (meaningless) alert to make sure that the inhibitor has
|
|
// processed everything.
|
|
ch <- &types.Alert{
|
|
Alert: model.Alert{
|
|
Labels: model.LabelSet{},
|
|
StartsAt: time.Now(),
|
|
},
|
|
}
|
|
close(f.finished)
|
|
<-done
|
|
}()
|
|
return provider.NewAlertIterator(ch, done, nil)
|
|
}
|
|
|
|
func TestInhibit(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
now := time.Now()
|
|
inhibitRule := func() *config.InhibitRule {
|
|
return &config.InhibitRule{
|
|
SourceMatch: map[string]string{"s": "1"},
|
|
TargetMatch: map[string]string{"t": "1"},
|
|
Equal: model.LabelNames{"e"},
|
|
}
|
|
}
|
|
// alertOne is muted by alertTwo when it is active.
|
|
alertOne := func() *types.Alert {
|
|
return &types.Alert{
|
|
Alert: model.Alert{
|
|
Labels: model.LabelSet{"t": "1", "e": "f"},
|
|
StartsAt: now.Add(-time.Minute),
|
|
EndsAt: now.Add(time.Hour),
|
|
},
|
|
}
|
|
}
|
|
alertTwo := func(resolved bool) *types.Alert {
|
|
var end time.Time
|
|
if resolved {
|
|
end = now.Add(-time.Second)
|
|
} else {
|
|
end = now.Add(time.Hour)
|
|
}
|
|
return &types.Alert{
|
|
Alert: model.Alert{
|
|
Labels: model.LabelSet{"s": "1", "e": "f"},
|
|
StartsAt: now.Add(-time.Minute),
|
|
EndsAt: end,
|
|
},
|
|
}
|
|
}
|
|
|
|
type exp struct {
|
|
lbls model.LabelSet
|
|
muted bool
|
|
}
|
|
for i, tc := range []struct {
|
|
alerts []*types.Alert
|
|
expected []exp
|
|
}{
|
|
{
|
|
// alertOne shouldn't be muted since alertTwo hasn't fired.
|
|
alerts: []*types.Alert{alertOne()},
|
|
expected: []exp{
|
|
{
|
|
lbls: model.LabelSet{"t": "1", "e": "f"},
|
|
muted: false,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
// alertOne should be muted by alertTwo which is active.
|
|
alerts: []*types.Alert{alertOne(), alertTwo(false)},
|
|
expected: []exp{
|
|
{
|
|
lbls: model.LabelSet{"t": "1", "e": "f"},
|
|
muted: true,
|
|
},
|
|
{
|
|
lbls: model.LabelSet{"s": "1", "e": "f"},
|
|
muted: false,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
// alertOne shouldn't be muted since alertTwo is resolved.
|
|
alerts: []*types.Alert{alertOne(), alertTwo(false), alertTwo(true)},
|
|
expected: []exp{
|
|
{
|
|
lbls: model.LabelSet{"t": "1", "e": "f"},
|
|
muted: false,
|
|
},
|
|
{
|
|
lbls: model.LabelSet{"s": "1", "e": "f"},
|
|
muted: false,
|
|
},
|
|
},
|
|
},
|
|
} {
|
|
ap := newFakeAlerts(tc.alerts)
|
|
mk := types.NewMarker()
|
|
inhibitor := NewInhibitor(ap, []*config.InhibitRule{inhibitRule()}, mk, nopLogger)
|
|
|
|
go func() {
|
|
for ap.finished != nil {
|
|
select {
|
|
case <-ap.finished:
|
|
ap.finished = nil
|
|
default:
|
|
}
|
|
}
|
|
inhibitor.Stop()
|
|
}()
|
|
inhibitor.Run()
|
|
|
|
for _, expected := range tc.expected {
|
|
if inhibitor.Mutes(expected.lbls) != expected.muted {
|
|
mute := "unmuted"
|
|
if expected.muted {
|
|
mute = "muted"
|
|
}
|
|
t.Errorf("tc: %d, expected alert with labels %q to be %s", i, expected.lbls, mute)
|
|
}
|
|
}
|
|
}
|
|
}
|