From f7c8a4b28ab26d738547a06bfb47627e4916b767 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Wed, 26 May 2021 19:33:52 +0200 Subject: [PATCH 1/2] Add test to expose issue #2426 Signed-off-by: beorn7 --- silence/silence_test.go | 68 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/silence/silence_test.go b/silence/silence_test.go index 879529c1..9ec6c62f 100644 --- a/silence/silence_test.go +++ b/silence/silence_test.go @@ -25,6 +25,7 @@ import ( "github.com/matttproud/golang_protobuf_extensions/pbutil" pb "github.com/prometheus/alertmanager/silence/silencepb" "github.com/prometheus/alertmanager/types" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" ) @@ -904,6 +905,73 @@ func TestSilenceExpireWithZeroRetention(t *testing.T) { require.Equal(t, 3, count) } +func TestSilencer(t *testing.T) { + ss, err := New(Options{Retention: time.Hour}) + require.NoError(t, err) + + now := time.Now() + ss.now = func() time.Time { return now } + + m := types.NewMarker(prometheus.NewRegistry()) + s := NewSilencer(ss, m, nil) + + require.False(t, s.Mutes(model.LabelSet{"foo": "bar"}), "expected alert not silenced without any silences") + + _, err = ss.Set(&pb.Silence{ + Matchers: []*pb.Matcher{{Name: "foo", Pattern: "baz"}}, + StartsAt: now.Add(-time.Hour), + EndsAt: now.Add(5 * time.Minute), + }) + require.NoError(t, err) + + require.False(t, s.Mutes(model.LabelSet{"foo": "bar"}), "expected alert not silenced by non-matching silence") + + id, err := ss.Set(&pb.Silence{ + Matchers: []*pb.Matcher{{Name: "foo", Pattern: "bar"}}, + StartsAt: now.Add(-time.Hour), + EndsAt: now.Add(5 * time.Minute), + }) + require.NoError(t, err) + require.NotEmpty(t, id) + + require.True(t, s.Mutes(model.LabelSet{"foo": "bar"}), "expected alert silenced by matching silence") + + now = now.Add(time.Hour) // One hour passes, silence expires. + + require.False(t, s.Mutes(model.LabelSet{"foo": "bar"}), "expected alert not silenced by expired silence") + + // Update silence to start in the future. + _, err = ss.Set(&pb.Silence{ + Id: id, + Matchers: []*pb.Matcher{{Name: "foo", Pattern: "bar"}}, + StartsAt: now.Add(time.Hour), + EndsAt: now.Add(3 * time.Hour), + }) + require.NoError(t, err) + + require.False(t, s.Mutes(model.LabelSet{"foo": "bar"}), "expected alert not silenced by future silence") + + now = now.Add(2 * time.Hour) // Two hours pass, silence becomes active. + + // Exposes issue #2426. + require.True(t, s.Mutes(model.LabelSet{"foo": "bar"}), "expected alert silenced by activated silence") + + _, err = ss.Set(&pb.Silence{ + Matchers: []*pb.Matcher{{Name: "foo", Pattern: "b..", Type: pb.Matcher_REGEXP}}, + StartsAt: now.Add(time.Hour), + EndsAt: now.Add(3 * time.Hour), + }) + require.NoError(t, err) + + // Note that issue #2426 doesn't apply anymore because we added a new silence. + require.True(t, s.Mutes(model.LabelSet{"foo": "bar"}), "expected alert still silenced by activated silence") + + now = now.Add(2 * time.Hour) // Two hours pass, first silence expires, overlapping second silence becomes active. + + // Another variant of issue #2426 (overlapping silences). + require.True(t, s.Mutes(model.LabelSet{"foo": "bar"}), "expected alert silenced by activated second silence") +} + func TestValidateMatcher(t *testing.T) { cases := []struct { m *pb.Matcher From e84c2651965ace0d217a2d9bcb2e9a44b8be6058 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Fri, 21 May 2021 00:13:16 +0200 Subject: [PATCH 2/2] Include pending silences for future muting decisions Previously, if a pending silence existed for an alert, and it later became active without any silences getting added in the meantime, we would miss the existence of that newly active silence. Signed-off-by: beorn7 --- notify/notify_test.go | 2 +- provider/mem/mem_test.go | 3 +- silence/silence.go | 74 +++++++++++--------- silence/silence_test.go | 3 +- types/types.go | 144 +++++++++++++++++---------------------- 5 files changed, 109 insertions(+), 117 deletions(-) diff --git a/notify/notify_test.go b/notify/notify_test.go index 026e2fb4..d714a0e4 100644 --- a/notify/notify_test.go +++ b/notify/notify_test.go @@ -673,7 +673,7 @@ func TestMuteStageWithSilences(t *testing.T) { // Set the second alert as previously silenced with an old version // number. This is expected to get unsilenced by the stage. - marker.SetSilenced(inAlerts[1].Fingerprint(), 0, "123") + marker.SetSilenced(inAlerts[1].Fingerprint(), 0, []string{"123"}, nil) _, alerts, err := stage.Exec(context.Background(), log.NewNopLogger(), inAlerts...) if err != nil { diff --git a/provider/mem/mem_test.go b/provider/mem/mem_test.go index fa8a1ee8..ea8a9fdc 100644 --- a/provider/mem/mem_test.go +++ b/provider/mem/mem_test.go @@ -296,7 +296,8 @@ func TestAlertsGC(t *testing.T) { } for _, a := range insert { - marker.SetActive(a.Fingerprint()) + marker.SetSilenced(a.Fingerprint(), 0, nil, nil) + marker.SetInhibited(a.Fingerprint()) if !marker.Active(a.Fingerprint()) { t.Errorf("error setting status: %v", a) } diff --git a/silence/silence.go b/silence/silence.go index 0533c83d..e07b2d7f 100644 --- a/silence/silence.go +++ b/silence/silence.go @@ -113,17 +113,19 @@ func NewSilencer(s *Silences, m types.Marker, l log.Logger) *Silencer { // Mutes implements the Muter interface. func (s *Silencer) Mutes(lset model.LabelSet) bool { fp := lset.Fingerprint() - ids, markerVersion, _ := s.marker.Silenced(fp) + activeIDs, pendingIDs, markerVersion, _ := s.marker.Silenced(fp) var ( err error - sils []*pb.Silence + allSils []*pb.Silence newVersion = markerVersion ) if markerVersion == s.silences.Version() { + totalSilences := len(activeIDs) + len(pendingIDs) // No new silences added, just need to check which of the old - // silences are still relevant. - if len(ids) == 0 { + // silences are still relevant and which of the pending ones + // have become active. + if totalSilences == 0 { // Super fast path: No silences ever applied to this // alert, none have been added. We are done. return false @@ -134,47 +136,55 @@ func (s *Silencer) Mutes(lset model.LabelSet) bool { // markerVersion because the Query call might already return a // newer version, which is not the version our old list of // applicable silences is based on. - sils, _, err = s.silences.Query( - QIDs(ids...), - QState(types.SilenceStateActive), + allIDs := append(append(make([]string, 0, totalSilences), activeIDs...), pendingIDs...) + allSils, _, err = s.silences.Query( + QIDs(allIDs...), + QState(types.SilenceStateActive, types.SilenceStatePending), ) } else { // New silences have been added, do a full query. - sils, newVersion, err = s.silences.Query( - QState(types.SilenceStateActive), + allSils, newVersion, err = s.silences.Query( + QState(types.SilenceStateActive, types.SilenceStatePending), QMatches(lset), ) } if err != nil { level.Error(s.logger).Log("msg", "Querying silences failed, alerts might not get silenced correctly", "err", err) } - if len(sils) == 0 { - s.marker.SetSilenced(fp, newVersion) + if len(allSils) == 0 { + // Easy case, neither active nor pending silences anymore. + s.marker.SetSilenced(fp, newVersion, nil, nil) return false } - idsChanged := len(sils) != len(ids) - if !idsChanged { - // Length is the same, but is the content the same? - for i, s := range sils { - if ids[i] != s.Id { - idsChanged = true - break - } + // It is still possible that nothing has changed, but finding out is not + // much less effort than just recreating the IDs from the query + // result. So let's do it in any case. Note that we cannot reuse the + // current ID slices for concurrency reasons. + activeIDs, pendingIDs = nil, nil + now := s.silences.now() + for _, sil := range allSils { + switch getState(sil, now) { + case types.SilenceStatePending: + pendingIDs = append(pendingIDs, sil.Id) + case types.SilenceStateActive: + activeIDs = append(activeIDs, sil.Id) + default: + // Do nothing, silence has expired in the meantime. } } - if idsChanged { - // Need to recreate ids. - ids = make([]string, len(sils)) - for i, s := range sils { - ids[i] = s.Id - } - sort.Strings(ids) // For comparability. - } - if idsChanged || newVersion != markerVersion { - // Update marker only if something changed. - s.marker.SetSilenced(fp, newVersion, ids...) - } - return true + level.Debug(s.logger).Log( + "msg", "determined current silences state", + "now", now, + "total", len(allSils), + "active", len(activeIDs), + "pending", len(pendingIDs), + ) + sort.Strings(activeIDs) + sort.Strings(pendingIDs) + + s.marker.SetSilenced(fp, newVersion, activeIDs, pendingIDs) + + return len(activeIDs) > 0 } // Silences holds a silence state that can be modified, queried, and snapshot. diff --git a/silence/silence_test.go b/silence/silence_test.go index 9ec6c62f..b2215788 100644 --- a/silence/silence_test.go +++ b/silence/silence_test.go @@ -22,6 +22,7 @@ import ( "testing" "time" + "github.com/go-kit/kit/log" "github.com/matttproud/golang_protobuf_extensions/pbutil" pb "github.com/prometheus/alertmanager/silence/silencepb" "github.com/prometheus/alertmanager/types" @@ -913,7 +914,7 @@ func TestSilencer(t *testing.T) { ss.now = func() time.Time { return now } m := types.NewMarker(prometheus.NewRegistry()) - s := NewSilencer(ss, m, nil) + s := NewSilencer(ss, m, log.NewNopLogger()) require.False(t, s.Mutes(model.LabelSet{"foo": "bar"}), "expected alert not silenced without any silences") diff --git a/types/types.go b/types/types.go index 314b289f..a412453d 100644 --- a/types/types.go +++ b/types/types.go @@ -44,29 +44,29 @@ type AlertStatus struct { SilencedBy []string `json:"silencedBy"` InhibitedBy []string `json:"inhibitedBy"` + // For internal tracking, not exposed in the API. + pendingSilences []string silencesVersion int } // Marker helps to mark alerts as silenced and/or inhibited. // All methods are goroutine-safe. type Marker interface { - // SetActive sets the provided alert to AlertStateActive and deletes all - // SilencedBy and InhibitedBy entries. - SetActive(alert model.Fingerprint) // SetSilenced replaces the previous SilencedBy by the provided IDs of - // silences, including the version number of the silences state. The set - // of provided IDs is supposed to represent the complete set of relevant - // silences. If no ID is provided and InhibitedBy is already empty, this - // call is equivalent to SetActive. Otherwise, it sets - // AlertStateSuppressed. - SetSilenced(alert model.Fingerprint, version int, silenceIDs ...string) + // active and pending silences, including the version number of the + // silences state. The set of provided IDs is supposed to represent the + // complete set of relevant silences. If no active silence IDs are provided and + // InhibitedBy is already empty, it sets the provided alert to AlertStateActive. + // Otherwise, it sets the provided alert to AlertStateSuppressed. + SetSilenced(alert model.Fingerprint, version int, activeSilenceIDs []string, pendingSilenceIDs []string) // SetInhibited replaces the previous InhibitedBy by the provided IDs of // alerts. In contrast to SetSilenced, the set of provided IDs is not // expected to represent the complete set of inhibiting alerts. (In // practice, this method is only called with one or zero IDs. However, - // this expectation might change in the future.) If no ID is provided and - // SilencedBy is already empty, this call is equivalent to - // SetActive. Otherwise, it sets AlertStateSuppressed. + // this expectation might change in the future.) If no IDs are provided + // and InhibitedBy is already empty, it sets the provided alert to + // AlertStateActive. Otherwise, it sets the provided alert to + // AlertStateSuppressed. SetInhibited(alert model.Fingerprint, alertIDs ...string) // Count alerts of the given state(s). With no state provided, count all @@ -79,13 +79,13 @@ type Marker interface { Delete(model.Fingerprint) // Various methods to inquire if the given alert is in a certain - // AlertState. Silenced also returns all the silencing silences, while - // Inhibited may return only a subset of inhibiting alerts. Silenced - // also returns the version of the silences state the result is based - // on. + // AlertState. Silenced also returns all the active and pending + // silences, while Inhibited may return only a subset of inhibiting + // alerts. Silenced also returns the version of the silences state the + // result is based on. Unprocessed(model.Fingerprint) bool Active(model.Fingerprint) bool - Silenced(model.Fingerprint) ([]string, int, bool) + Silenced(model.Fingerprint) (activeIDs []string, pendingIDs []string, version int, silenced bool) Inhibited(model.Fingerprint) ([]string, bool) } @@ -148,58 +148,7 @@ func (m *memMarker) Count(states ...AlertState) int { } // SetSilenced implements Marker. -func (m *memMarker) SetSilenced(alert model.Fingerprint, version int, ids ...string) { - m.mtx.Lock() - - s, found := m.m[alert] - if !found { - s = &AlertStatus{} - m.m[alert] = s - } - s.silencesVersion = version - - // If there are any silence or alert IDs associated with the - // fingerprint, it is suppressed. Otherwise, set it to - // AlertStateUnprocessed. - if len(ids) == 0 && len(s.InhibitedBy) == 0 { - m.mtx.Unlock() - m.SetActive(alert) - return - } - - s.State = AlertStateSuppressed - s.SilencedBy = ids - - m.mtx.Unlock() -} - -// SetInhibited implements Marker. -func (m *memMarker) SetInhibited(alert model.Fingerprint, ids ...string) { - m.mtx.Lock() - - s, found := m.m[alert] - if !found { - s = &AlertStatus{} - m.m[alert] = s - } - - // If there are any silence or alert IDs associated with the - // fingerprint, it is suppressed. Otherwise, set it to - // AlertStateUnprocessed. - if len(ids) == 0 && len(s.SilencedBy) == 0 { - m.mtx.Unlock() - m.SetActive(alert) - return - } - - s.State = AlertStateSuppressed - s.InhibitedBy = ids - - m.mtx.Unlock() -} - -// SetActive implements Marker. -func (m *memMarker) SetActive(alert model.Fingerprint) { +func (m *memMarker) SetSilenced(alert model.Fingerprint, version int, activeIDs []string, pendingIDs []string) { m.mtx.Lock() defer m.mtx.Unlock() @@ -208,10 +157,42 @@ func (m *memMarker) SetActive(alert model.Fingerprint) { s = &AlertStatus{} m.m[alert] = s } + s.SilencedBy = activeIDs + s.pendingSilences = pendingIDs + s.silencesVersion = version - s.State = AlertStateActive - s.SilencedBy = []string{} - s.InhibitedBy = []string{} + // If there are any silence or alert IDs associated with the + // fingerprint, it is suppressed. Otherwise, set it to + // AlertStateActive. + if len(activeIDs) == 0 && len(s.InhibitedBy) == 0 { + s.State = AlertStateActive + return + } + + s.State = AlertStateSuppressed +} + +// SetInhibited implements Marker. +func (m *memMarker) SetInhibited(alert model.Fingerprint, ids ...string) { + m.mtx.Lock() + defer m.mtx.Unlock() + + s, found := m.m[alert] + if !found { + s = &AlertStatus{} + m.m[alert] = s + } + s.InhibitedBy = ids + + // If there are any silence or alert IDs associated with the + // fingerprint, it is suppressed. Otherwise, set it to + // AlertStateActive. + if len(ids) == 0 && len(s.SilencedBy) == 0 { + s.State = AlertStateActive + return + } + + s.State = AlertStateSuppressed } // Status implements Marker. @@ -219,15 +200,14 @@ func (m *memMarker) Status(alert model.Fingerprint) AlertStatus { m.mtx.RLock() defer m.mtx.RUnlock() - s, found := m.m[alert] - if !found { - s = &AlertStatus{ - State: AlertStateUnprocessed, - SilencedBy: []string{}, - InhibitedBy: []string{}, - } + if s, found := m.m[alert]; found { + return *s + } + return AlertStatus{ + State: AlertStateUnprocessed, + SilencedBy: []string{}, + InhibitedBy: []string{}, } - return *s } // Delete implements Marker. @@ -258,9 +238,9 @@ func (m *memMarker) Inhibited(alert model.Fingerprint) ([]string, bool) { // Silenced returns whether the alert for the given Fingerprint is in the // Silenced state, any associated silence IDs, and the silences state version // the result is based on. -func (m *memMarker) Silenced(alert model.Fingerprint) ([]string, int, bool) { +func (m *memMarker) Silenced(alert model.Fingerprint) (activeIDs []string, pendingIDs []string, version int, silenced bool) { s := m.Status(alert) - return s.SilencedBy, s.silencesVersion, + return s.SilencedBy, s.pendingSilences, s.silencesVersion, s.State == AlertStateSuppressed && len(s.SilencedBy) > 0 }