From 22db73fbf7ea8361e58387aee2bd2fb81df64ba0 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Fri, 22 Feb 2019 19:57:27 +0100 Subject: [PATCH 1/4] Modify the self-inhibition prevention semantics This has been discussed in #666 (issue of hell...). As concluded there, the cleanest semantics is most likely the following: "An alert that matches both target and source side cannot inhibit alerts for which the same is true." The two open questions were: 1. How difficult is the implementation? 2. Is it needed? This relatively simple commit proves that the answer to (1) is: Not very difficult. (This also includes a performance-improving simplification, which would have been possible without a change of semantics.) The answer to (2) is twofold: For one, the original use case in #666 wasn't solved by our interim solution. What we solved is the case where the self-inhibition is triggered by a wide target match, i.e. I have a specific alert that should inhibit a whole group of target alerts without inhibiting itself. What we did _not_ solve is the inverted case: Self-inhibition by a wide source match, i.e. an alert that should only fire if none of a whole group of source alert fires. I mean, we "fixed" it as in, the target alert will never be inhibited, but @lmb in #666 wanted the alert to be inhibited _sometimes_ (just not _always_). The other part is that I think that the asymmetry in our interim solution will at some point haunt us. Thus, I really would like to get this change in before we do a 1.0 release. In practice, I expect this to be only relevant in very rare cases. But those cases will be most difficult to reason with, and I claim that the solution in this commit is matching what humans intuitively expect. Signed-off-by: beorn7 --- inhibit/inhibit.go | 19 ++++++++--- inhibit/inhibit_test.go | 74 +++++++++++++++++++++++++++++------------ 2 files changed, 67 insertions(+), 26 deletions(-) diff --git a/inhibit/inhibit.go b/inhibit/inhibit.go index f5a5f593..bb2fa60e 100644 --- a/inhibit/inhibit.go +++ b/inhibit/inhibit.go @@ -126,8 +126,13 @@ func (ih *Inhibitor) Mutes(lset model.LabelSet) bool { fp := lset.Fingerprint() for _, r := range ih.rules { - // Only inhibit if target matchers match but source matchers don't. - if inhibitedByFP, eq := r.hasEqual(lset); !r.SourceMatchers.Match(lset) && r.TargetMatchers.Match(lset) && eq { + if !r.TargetMatchers.Match(lset) { + // If target side of rule doesn't match, we don't need to look any further. + continue + } + // If we are here, the target side matches. If the source side matches, too, we + // need to exclude inhibiting alerts for which the same is true. + if inhibitedByFP, eq := r.hasEqual(lset, r.SourceMatchers.Match(lset)); eq { ih.marker.SetInhibited(fp, inhibitedByFP.String()) return true } @@ -191,9 +196,10 @@ func NewInhibitRule(cr *config.InhibitRule) *InhibitRule { } } -// hasEqual checks whether the source cache contains alerts matching -// the equal labels for the given label set. -func (r *InhibitRule) hasEqual(lset model.LabelSet) (model.Fingerprint, bool) { +// hasEqual checks whether the source cache contains alerts matching the equal +// labels for the given label set. If excludeTwoSidedMatch is true, alerts that +// match both the source and the target side of the rule are disregarded. +func (r *InhibitRule) hasEqual(lset model.LabelSet, excludeTwoSidedMatch bool) (model.Fingerprint, bool) { Outer: for a := range r.scache.List() { // The cache might be stale and contain resolved alerts. @@ -205,6 +211,9 @@ Outer: continue Outer } } + if excludeTwoSidedMatch && r.TargetMatchers.Match(a.Labels) { + continue Outer + } return a.Fingerprint(), true } return model.Fingerprint(0), false diff --git a/inhibit/inhibit_test.go b/inhibit/inhibit_test.go index 7cb06a58..1b2d99f8 100644 --- a/inhibit/inhibit_test.go +++ b/inhibit/inhibit_test.go @@ -131,7 +131,7 @@ func TestInhibitRuleHasEqual(t *testing.T) { r.scache.Set(v) } - if _, have := r.hasEqual(c.input); have != c.result { + if _, have := r.hasEqual(c.input, false); have != c.result { t.Errorf("Unexpected result %t, expected %t", have, c.result) } } @@ -140,55 +140,87 @@ func TestInhibitRuleHasEqual(t *testing.T) { func TestInhibitRuleMatches(t *testing.T) { t.Parallel() - // Simple inhibut rule - cr := config.InhibitRule{ - SourceMatch: map[string]string{"s": "1"}, - TargetMatch: map[string]string{"t": "1"}, + rule1 := config.InhibitRule{ + SourceMatch: map[string]string{"s1": "1"}, + TargetMatch: map[string]string{"t1": "1"}, + Equal: model.LabelNames{"e"}, + } + rule2 := config.InhibitRule{ + SourceMatch: map[string]string{"s2": "1"}, + TargetMatch: map[string]string{"t2": "1"}, Equal: model.LabelNames{"e"}, } m := types.NewMarker(prometheus.NewRegistry()) - ih := NewInhibitor(nil, []*config.InhibitRule{&cr}, m, nopLogger) - ir := ih.rules[0] + ih := NewInhibitor(nil, []*config.InhibitRule{&rule1, &rule2}, m, nopLogger) now := time.Now() - // Active alert that matches the source filter - sourceAlert := &types.Alert{ + // Active alert that matches the source filter of rule1. + sourceAlert1 := &types.Alert{ Alert: model.Alert{ - Labels: model.LabelSet{"s": "1", "e": "1"}, + Labels: model.LabelSet{"s1": "1", "t1": "2", "e": "1"}, + StartsAt: now.Add(-time.Minute), + EndsAt: now.Add(time.Hour), + }, + } + // Active alert that matches the source filter _and_ the target filter of rule2. + sourceAlert2 := &types.Alert{ + Alert: model.Alert{ + Labels: model.LabelSet{"s2": "1", "t2": "1", "e": "1"}, StartsAt: now.Add(-time.Minute), EndsAt: now.Add(time.Hour), }, } - ir.scache = store.NewAlerts(5 * time.Minute) - ir.scache.Set(sourceAlert) + ih.rules[0].scache = store.NewAlerts(5 * time.Minute) + ih.rules[0].scache.Set(sourceAlert1) + ih.rules[1].scache = store.NewAlerts(5 * time.Minute) + ih.rules[1].scache.Set(sourceAlert2) cases := []struct { target model.LabelSet expected bool }{ { - // Matches target filter, inhibited - target: model.LabelSet{"t": "1", "e": "1"}, + // Matches target filter of rule1, inhibited. + target: model.LabelSet{"t1": "1", "e": "1"}, expected: true, }, { - // Matches target filter (plus noise), inhibited - target: model.LabelSet{"t": "1", "t2": "1", "e": "1"}, + // Matches target filter of rule2, inhibited. + target: model.LabelSet{"t2": "1", "e": "1"}, expected: true, }, { - // Doesn't match target filter, not inhibited - target: model.LabelSet{"t": "0", "e": "1"}, + // Matches target filter of rule1 (plus noise), inhibited. + target: model.LabelSet{"t1": "1", "t3": "1", "e": "1"}, + expected: true, + }, + { + // Matches target filter of rule1 plus rule2, inhibited. + target: model.LabelSet{"t1": "1", "t2": "1", "e": "1"}, + expected: true, + }, + { + // Doesn't match target filter, not inhibited. + target: model.LabelSet{"t1": "0", "e": "1"}, expected: false, }, { - // Matches both source and target filters, not inhibited - target: model.LabelSet{"s": "1", "t": "1", "e": "1"}, + // Matches both source and target filters of rule1, + // inhibited because sourceAlert1 matches only the + // source filter of rule1. + target: model.LabelSet{"s1": "1", "t1": "1", "e": "1"}, + expected: true, + }, + { + // Matches both source and target filters of rule2, + // inhibited because sourceAlert2 matches also both the + // source and target filterof rule1. + target: model.LabelSet{"s2": "1", "t2": "1", "e": "1"}, expected: false, }, { // Matches target filter, equal label doesn't match, not inhibited - target: model.LabelSet{"t": "1", "e": "0"}, + target: model.LabelSet{"t1": "1", "e": "0"}, expected: false, }, } From 33638b14123f70172c78563abe6053f71abbc6e7 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Mon, 25 Feb 2019 17:18:35 +0100 Subject: [PATCH 2/4] Fix doc comment Signed-off-by: beorn7 --- inhibit/inhibit_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inhibit/inhibit_test.go b/inhibit/inhibit_test.go index 1b2d99f8..5d55032f 100644 --- a/inhibit/inhibit_test.go +++ b/inhibit/inhibit_test.go @@ -213,8 +213,8 @@ func TestInhibitRuleMatches(t *testing.T) { }, { // Matches both source and target filters of rule2, - // inhibited because sourceAlert2 matches also both the - // source and target filterof rule1. + // not inhibited because sourceAlert2 matches also both the + // source and target filter of rule1. target: model.LabelSet{"s2": "1", "t2": "1", "e": "1"}, expected: false, }, From 12671bd261fe75fb77acb952a49c7647c19ffe2d Mon Sep 17 00:00:00 2001 From: beorn7 Date: Mon, 25 Feb 2019 17:11:43 +0100 Subject: [PATCH 3/4] Improve doc comments for Marker and friends This clarifies a bunch of things I have run into during code reading in preparation for some performance improvements around muting. It also moves doc comments from places where they don't show up in godoc to visible places. It also fixes golint warnings. Signed-off-by: beorn7 --- inhibit/inhibit.go | 13 ++++++---- types/types.go | 63 +++++++++++++++++++++++++++++++++++----------- 2 files changed, 56 insertions(+), 20 deletions(-) diff --git a/inhibit/inhibit.go b/inhibit/inhibit.go index bb2fa60e..e49eb42a 100644 --- a/inhibit/inhibit.go +++ b/inhibit/inhibit.go @@ -29,8 +29,9 @@ import ( "github.com/prometheus/alertmanager/types" ) -// An Inhibitor determines whether a given label set is muted -// based on the currently active alerts and a set of inhibition rules. +// An Inhibitor determines whether a given label set is muted based on the +// currently active alerts and a set of inhibition rules. It implements the +// Muter interface. type Inhibitor struct { alerts provider.Alerts rules []*InhibitRule @@ -121,7 +122,8 @@ func (ih *Inhibitor) Stop() { } } -// Mutes returns true iff the given label set is muted. +// Mutes returns true iff the given label set is muted. It implements the Muter +// interface. func (ih *Inhibitor) Mutes(lset model.LabelSet) bool { fp := lset.Fingerprint() @@ -197,8 +199,9 @@ func NewInhibitRule(cr *config.InhibitRule) *InhibitRule { } // hasEqual checks whether the source cache contains alerts matching the equal -// labels for the given label set. If excludeTwoSidedMatch is true, alerts that -// match both the source and the target side of the rule are disregarded. +// labels for the given label set. If so, the fingerprint of one of those alerts +// is returned. If excludeTwoSidedMatch is true, alerts that match both the +// source and the target side of the rule are disregarded. func (r *InhibitRule) hasEqual(lset model.LabelSet, excludeTwoSidedMatch bool) (model.Fingerprint, bool) { Outer: for a := range r.scache.List() { diff --git a/types/types.go b/types/types.go index 223f96ea..3c1e7a0d 100644 --- a/types/types.go +++ b/types/types.go @@ -22,15 +22,22 @@ import ( "github.com/prometheus/common/model" ) +// AlertState is used as part of AlertStatus. type AlertState string +// Possible values for AlertState. const ( AlertStateUnprocessed AlertState = "unprocessed" AlertStateActive AlertState = "active" AlertStateSuppressed AlertState = "suppressed" ) -// AlertStatus stores the state and values associated with an Alert. +// AlertStatus stores the state of an alert and, as applicable, the IDs of +// silences silencing the alert and of other alerts inhibiting the alert. Note +// that currently, SilencedBy is supposed to be the complete set of the relevant +// silences while InhibitedBy may contain only a subset of the inhibiting alerts +// – in practice exactly one ID. (This somewhat confusing semantics might change +// in the future.) type AlertStatus struct { State AlertState `json:"state"` SilencedBy []string `json:"silencedBy"` @@ -40,15 +47,36 @@ type AlertStatus struct { // Marker helps to mark alerts as silenced and/or inhibited. // All methods are goroutine-safe. type Marker interface { + // SetActive sets the provided alert to AlertStateActive and deletes all + // SilencedBy and InhibitedBy entries. SetActive(alert model.Fingerprint) - SetInhibited(alert model.Fingerprint, ids ...string) - SetSilenced(alert model.Fingerprint, ids ...string) + // SetSilenced replaces the previous SilencedBy by the provided IDs of + // silences. The set of provided IDs is supposed to represent the + // complete set of relevant silences. If no ID is provided and + // InhibitedBy is already empty, this call is equivalent + // SetActive. Otherwise, it sets AlertStateSuppressed. + SetSilenced(alert model.Fingerprint, silenceIDs ...string) + // SetInhibited replaces the previous InhibitedBy by the provided IDs of + // alerts. In contrast to SetSilenced, the set of provided IDs is not + // expected to represent the complete set of inhibiting alerts. (In + // practice, this method is only called with one or zero IDs. However, + // this expectation might change in the future.) If no ID is provided and + // SilencedBy is already empty, this call is equivalent to + // SetActive. Otherwise, it sets AlertStateSuppressed. + SetInhibited(alert model.Fingerprint, alertIDs ...string) + // Count alerts of the given state(s). With no state provided, count all + // alerts. Count(...AlertState) int + // Status of the given alert. Status(model.Fingerprint) AlertStatus + // Delete the given alert. Delete(model.Fingerprint) + // Various methods to inquire if the given alert is in a certain + // AlertState. Silenced also returns all the silencing silences, while + // Inhibited may return only a subset of inhibiting alerts. Unprocessed(model.Fingerprint) bool Active(model.Fingerprint) bool Silenced(model.Fingerprint) ([]string, bool) @@ -93,7 +121,7 @@ func (m *memMarker) registerMetrics(r prometheus.Registerer) { r.MustRegister(alertsSuppressed) } -// Count alerts of a given state. +// Count implements Marker. func (m *memMarker) Count(states ...AlertState) int { count := 0 @@ -114,7 +142,7 @@ func (m *memMarker) Count(states ...AlertState) int { return count } -// SetSilenced sets the AlertStatus to suppressed and stores the associated silence IDs. +// SetSilenced implements Marker. func (m *memMarker) SetSilenced(alert model.Fingerprint, ids ...string) { m.mtx.Lock() @@ -139,7 +167,7 @@ func (m *memMarker) SetSilenced(alert model.Fingerprint, ids ...string) { m.mtx.Unlock() } -// SetInhibited sets the AlertStatus to suppressed and stores the associated alert IDs. +// SetInhibited implements Marker. func (m *memMarker) SetInhibited(alert model.Fingerprint, ids ...string) { m.mtx.Lock() @@ -164,6 +192,7 @@ func (m *memMarker) SetInhibited(alert model.Fingerprint, ids ...string) { m.mtx.Unlock() } +// SetActive implements Marker. func (m *memMarker) SetActive(alert model.Fingerprint) { m.mtx.Lock() defer m.mtx.Unlock() @@ -182,7 +211,7 @@ func (m *memMarker) SetActive(alert model.Fingerprint) { s.InhibitedBy = []string{} } -// Status returns the AlertStatus for the given Fingerprint. +// Status implements Marker. func (m *memMarker) Status(alert model.Fingerprint) AlertStatus { m.mtx.RLock() defer m.mtx.RUnlock() @@ -198,7 +227,7 @@ func (m *memMarker) Status(alert model.Fingerprint) AlertStatus { return *s } -// Delete deletes the given Fingerprint from the internal cache. +// Delete implements Marker. func (m *memMarker) Delete(alert model.Fingerprint) { m.mtx.Lock() defer m.mtx.Unlock() @@ -206,20 +235,17 @@ func (m *memMarker) Delete(alert model.Fingerprint) { delete(m.m, alert) } -// Unprocessed returns whether the alert for the given Fingerprint is in the -// Unprocessed state. +// Unprocessed implements Marker. func (m *memMarker) Unprocessed(alert model.Fingerprint) bool { return m.Status(alert).State == AlertStateUnprocessed } -// Active returns whether the alert for the given Fingerprint is in the Active -// state. +// Active implements Marker. func (m *memMarker) Active(alert model.Fingerprint) bool { return m.Status(alert).State == AlertStateActive } -// Inhibited returns whether the alert for the given Fingerprint is in the -// Inhibited state and any associated alert IDs. +// Inhibited implements Marker. func (m *memMarker) Inhibited(alert model.Fingerprint) ([]string, bool) { s := m.Status(alert) return s.InhibitedBy, @@ -361,7 +387,9 @@ func (a *Alert) Merge(o *Alert) *Alert { return &res } -// A Muter determines whether a given label set is muted. +// A Muter determines whether a given label set is muted. Implementers that +// maintain an underlying Marker are expected to update it during a call of +// Mutes. type Muter interface { Mutes(model.LabelSet) bool } @@ -408,18 +436,23 @@ func (s *Silence) Expired() bool { return s.StartsAt.Equal(s.EndsAt) } +// SilenceStatus stores the state of a silence. type SilenceStatus struct { State SilenceState `json:"state"` } +// SilenceState is used as part of SilenceStatus. type SilenceState string +// Possible values for SilenceState. const ( SilenceStateExpired SilenceState = "expired" SilenceStateActive SilenceState = "active" SilenceStatePending SilenceState = "pending" ) +// CalcSilenceState returns the SilenceState that a silence with the given start +// and end time would have right now. func CalcSilenceState(start, end time.Time) SilenceState { current := time.Now() if current.Before(start) { From f7df3743da074c07150c5f96aeee6cab5294c4c3 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Tue, 26 Feb 2019 12:40:53 +0100 Subject: [PATCH 4/4] Another doc comment fix for inhibition tests Signed-off-by: beorn7 --- inhibit/inhibit_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inhibit/inhibit_test.go b/inhibit/inhibit_test.go index 5d55032f..825ff539 100644 --- a/inhibit/inhibit_test.go +++ b/inhibit/inhibit_test.go @@ -214,7 +214,7 @@ func TestInhibitRuleMatches(t *testing.T) { { // Matches both source and target filters of rule2, // not inhibited because sourceAlert2 matches also both the - // source and target filter of rule1. + // source and target filter of rule2. target: model.LabelSet{"s2": "1", "t2": "1", "e": "1"}, expected: false, },