Merge pull request #1764 from prometheus/beorn7/muting

Modify the self-inhibition prevention semantics
2025-02-19 20:27:07 +00:00 · 2019-02-26 14:01:22 +01:00 · 2019-02-26 14:01:22 +01:00 · 891f368c51
commit 891f368c51
parent c7de536129 0f634debfd
3 changed files with 121 additions and 44 deletions
--- a/inhibit/inhibit.go
+++ b/inhibit/inhibit.go
@ -29,8 +29,9 @@ import (
 	"github.com/prometheus/alertmanager/types"
 )

-// An Inhibitor determines whether a given label set is muted
-// based on the currently active alerts and a set of inhibition rules.
+// An Inhibitor determines whether a given label set is muted based on the
+// currently active alerts and a set of inhibition rules. It implements the
+// Muter interface.
 type Inhibitor struct {
 	alerts provider.Alerts
 	rules  []*InhibitRule
@ -121,13 +122,19 @@ func (ih *Inhibitor) Stop() {
 	}
 }

-// Mutes returns true iff the given label set is muted.
+// Mutes returns true iff the given label set is muted. It implements the Muter
+// interface.
 func (ih *Inhibitor) Mutes(lset model.LabelSet) bool {
 	fp := lset.Fingerprint()

 	for _, r := range ih.rules {
-		// Only inhibit if target matchers match but source matchers don't.
-		if inhibitedByFP, eq := r.hasEqual(lset); !r.SourceMatchers.Match(lset) && r.TargetMatchers.Match(lset) && eq {
+		if !r.TargetMatchers.Match(lset) {
+			// If target side of rule doesn't match, we don't need to look any further.
+			continue
+		}
+		// If we are here, the target side matches. If the source side matches, too, we
+		// need to exclude inhibiting alerts for which the same is true.
+		if inhibitedByFP, eq := r.hasEqual(lset, r.SourceMatchers.Match(lset)); eq {
 			ih.marker.SetInhibited(fp, inhibitedByFP.String())
 			return true
 		}
@ -191,9 +198,11 @@ func NewInhibitRule(cr *config.InhibitRule) *InhibitRule {
 	}
 }

-// hasEqual checks whether the source cache contains alerts matching
-// the equal labels for the given label set.
-func (r *InhibitRule) hasEqual(lset model.LabelSet) (model.Fingerprint, bool) {
+// hasEqual checks whether the source cache contains alerts matching the equal
+// labels for the given label set. If so, the fingerprint of one of those alerts
+// is returned. If excludeTwoSidedMatch is true, alerts that match both the
+// source and the target side of the rule are disregarded.
+func (r *InhibitRule) hasEqual(lset model.LabelSet, excludeTwoSidedMatch bool) (model.Fingerprint, bool) {
 Outer:
 	for a := range r.scache.List() {
 		// The cache might be stale and contain resolved alerts.
@ -205,6 +214,9 @@ Outer:
 				continue Outer
 			}
 		}
+		if excludeTwoSidedMatch && r.TargetMatchers.Match(a.Labels) {
+			continue Outer
+		}
 		return a.Fingerprint(), true
 	}
 	return model.Fingerprint(0), false
--- a/inhibit/inhibit_test.go
+++ b/inhibit/inhibit_test.go
@ -131,7 +131,7 @@ func TestInhibitRuleHasEqual(t *testing.T) {
 			r.scache.Set(v)
 		}

-		if _, have := r.hasEqual(c.input); have != c.result {
+		if _, have := r.hasEqual(c.input, false); have != c.result {
 			t.Errorf("Unexpected result %t, expected %t", have, c.result)
 		}
 	}
@ -140,55 +140,87 @@ func TestInhibitRuleHasEqual(t *testing.T) {
 func TestInhibitRuleMatches(t *testing.T) {
 	t.Parallel()

-	// Simple inhibut rule
-	cr := config.InhibitRule{
-		SourceMatch: map[string]string{"s": "1"},
-		TargetMatch: map[string]string{"t": "1"},
+	rule1 := config.InhibitRule{
+		SourceMatch: map[string]string{"s1": "1"},
+		TargetMatch: map[string]string{"t1": "1"},
+		Equal:       model.LabelNames{"e"},
+	}
+	rule2 := config.InhibitRule{
+		SourceMatch: map[string]string{"s2": "1"},
+		TargetMatch: map[string]string{"t2": "1"},
 		Equal:       model.LabelNames{"e"},
 	}
 	m := types.NewMarker(prometheus.NewRegistry())
-	ih := NewInhibitor(nil, []*config.InhibitRule{&cr}, m, nopLogger)
-	ir := ih.rules[0]
+	ih := NewInhibitor(nil, []*config.InhibitRule{&rule1, &rule2}, m, nopLogger)
 	now := time.Now()
-	// Active alert that matches the source filter
-	sourceAlert := &types.Alert{
+	// Active alert that matches the source filter of rule1.
+	sourceAlert1 := &types.Alert{
 		Alert: model.Alert{
-			Labels:   model.LabelSet{"s": "1", "e": "1"},
+			Labels:   model.LabelSet{"s1": "1", "t1": "2", "e": "1"},
+			StartsAt: now.Add(-time.Minute),
+			EndsAt:   now.Add(time.Hour),
+		},
+	}
+	// Active alert that matches the source filter _and_ the target filter of rule2.
+	sourceAlert2 := &types.Alert{
+		Alert: model.Alert{
+			Labels:   model.LabelSet{"s2": "1", "t2": "1", "e": "1"},
 			StartsAt: now.Add(-time.Minute),
 			EndsAt:   now.Add(time.Hour),
 		},
 	}

-	ir.scache = store.NewAlerts(5 * time.Minute)
-	ir.scache.Set(sourceAlert)
+	ih.rules[0].scache = store.NewAlerts(5 * time.Minute)
+	ih.rules[0].scache.Set(sourceAlert1)
+	ih.rules[1].scache = store.NewAlerts(5 * time.Minute)
+	ih.rules[1].scache.Set(sourceAlert2)

 	cases := []struct {
 		target   model.LabelSet
 		expected bool
 	}{
 		{
-			// Matches target filter, inhibited
-			target:   model.LabelSet{"t": "1", "e": "1"},
+			// Matches target filter of rule1, inhibited.
+			target:   model.LabelSet{"t1": "1", "e": "1"},
 			expected: true,
 		},
 		{
-			// Matches target filter (plus noise), inhibited
-			target:   model.LabelSet{"t": "1", "t2": "1", "e": "1"},
+			// Matches target filter of rule2, inhibited.
+			target:   model.LabelSet{"t2": "1", "e": "1"},
 			expected: true,
 		},
 		{
-			// Doesn't match target filter, not inhibited
-			target:   model.LabelSet{"t": "0", "e": "1"},
+			// Matches target filter of rule1 (plus noise), inhibited.
+			target:   model.LabelSet{"t1": "1", "t3": "1", "e": "1"},
+			expected: true,
+		},
+		{
+			// Matches target filter of rule1 plus rule2, inhibited.
+			target:   model.LabelSet{"t1": "1", "t2": "1", "e": "1"},
+			expected: true,
+		},
+		{
+			// Doesn't match target filter, not inhibited.
+			target:   model.LabelSet{"t1": "0", "e": "1"},
 			expected: false,
 		},
 		{
-			// Matches both source and target filters, not inhibited
-			target:   model.LabelSet{"s": "1", "t": "1", "e": "1"},
+			// Matches both source and target filters of rule1,
+			// inhibited because sourceAlert1 matches only the
+			// source filter of rule1.
+			target:   model.LabelSet{"s1": "1", "t1": "1", "e": "1"},
+			expected: true,
+		},
+		{
+			// Matches both source and target filters of rule2,
+			// not inhibited because sourceAlert2 matches also both the
+			// source and target filter of rule2.
+			target:   model.LabelSet{"s2": "1", "t2": "1", "e": "1"},
 			expected: false,
 		},
 		{
 			// Matches target filter, equal label doesn't match, not inhibited
-			target:   model.LabelSet{"t": "1", "e": "0"},
+			target:   model.LabelSet{"t1": "1", "e": "0"},
 			expected: false,
 		},
 	}
--- a/types/types.go
+++ b/types/types.go
@ -22,15 +22,22 @@ import (
 	"github.com/prometheus/common/model"
 )

+// AlertState is used as part of AlertStatus.
 type AlertState string

+// Possible values for AlertState.
 const (
 	AlertStateUnprocessed AlertState = "unprocessed"
 	AlertStateActive      AlertState = "active"
 	AlertStateSuppressed  AlertState = "suppressed"
 )

-// AlertStatus stores the state and values associated with an Alert.
+// AlertStatus stores the state of an alert and, as applicable, the IDs of
+// silences silencing the alert and of other alerts inhibiting the alert. Note
+// that currently, SilencedBy is supposed to be the complete set of the relevant
+// silences while InhibitedBy may contain only a subset of the inhibiting alerts
+// – in practice exactly one ID. (This somewhat confusing semantics might change
+// in the future.)
 type AlertStatus struct {
 	State       AlertState `json:"state"`
 	SilencedBy  []string   `json:"silencedBy"`
@ -40,15 +47,36 @@ type AlertStatus struct {
 // Marker helps to mark alerts as silenced and/or inhibited.
 // All methods are goroutine-safe.
 type Marker interface {
+	// SetActive sets the provided alert to AlertStateActive and deletes all
+	// SilencedBy and InhibitedBy entries.
 	SetActive(alert model.Fingerprint)
-	SetInhibited(alert model.Fingerprint, ids ...string)
-	SetSilenced(alert model.Fingerprint, ids ...string)
+	// SetSilenced replaces the previous SilencedBy by the provided IDs of
+	// silences. The set of provided IDs is supposed to represent the
+	// complete set of relevant silences. If no ID is provided and
+	// InhibitedBy is already empty, this call is equivalent
+	// SetActive. Otherwise, it sets AlertStateSuppressed.
+	SetSilenced(alert model.Fingerprint, silenceIDs ...string)
+	// SetInhibited replaces the previous InhibitedBy by the provided IDs of
+	// alerts. In contrast to SetSilenced, the set of provided IDs is not
+	// expected to represent the complete set of inhibiting alerts. (In
+	// practice, this method is only called with one or zero IDs. However,
+	// this expectation might change in the future.) If no ID is provided and
+	// SilencedBy is already empty, this call is equivalent to
+	// SetActive. Otherwise, it sets AlertStateSuppressed.
+	SetInhibited(alert model.Fingerprint, alertIDs ...string)

+	// Count alerts of the given state(s). With no state provided, count all
+	// alerts.
 	Count(...AlertState) int

+	// Status of the given alert.
 	Status(model.Fingerprint) AlertStatus
+	// Delete the given alert.
 	Delete(model.Fingerprint)

+	// Various methods to inquire if the given alert is in a certain
+	// AlertState. Silenced also returns all the silencing silences, while
+	// Inhibited may return only a subset of inhibiting alerts.
 	Unprocessed(model.Fingerprint) bool
 	Active(model.Fingerprint) bool
 	Silenced(model.Fingerprint) ([]string, bool)
@ -93,7 +121,7 @@ func (m *memMarker) registerMetrics(r prometheus.Registerer) {
 	r.MustRegister(alertsSuppressed)
 }

-// Count alerts of a given state.
+// Count implements Marker.
 func (m *memMarker) Count(states ...AlertState) int {
 	count := 0

@ -114,7 +142,7 @@ func (m *memMarker) Count(states ...AlertState) int {
 	return count
 }

-// SetSilenced sets the AlertStatus to suppressed and stores the associated silence IDs.
+// SetSilenced implements Marker.
 func (m *memMarker) SetSilenced(alert model.Fingerprint, ids ...string) {
 	m.mtx.Lock()

@ -139,7 +167,7 @@ func (m *memMarker) SetSilenced(alert model.Fingerprint, ids ...string) {
 	m.mtx.Unlock()
 }

-// SetInhibited sets the AlertStatus to suppressed and stores the associated alert IDs.
+// SetInhibited implements Marker.
 func (m *memMarker) SetInhibited(alert model.Fingerprint, ids ...string) {
 	m.mtx.Lock()

@ -164,6 +192,7 @@ func (m *memMarker) SetInhibited(alert model.Fingerprint, ids ...string) {
 	m.mtx.Unlock()
 }

+// SetActive implements Marker.
 func (m *memMarker) SetActive(alert model.Fingerprint) {
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
@ -182,7 +211,7 @@ func (m *memMarker) SetActive(alert model.Fingerprint) {
 	s.InhibitedBy = []string{}
 }

-// Status returns the AlertStatus for the given Fingerprint.
+// Status implements Marker.
 func (m *memMarker) Status(alert model.Fingerprint) AlertStatus {
 	m.mtx.RLock()
 	defer m.mtx.RUnlock()
@ -198,7 +227,7 @@ func (m *memMarker) Status(alert model.Fingerprint) AlertStatus {
 	return *s
 }

-// Delete deletes the given Fingerprint from the internal cache.
+// Delete implements Marker.
 func (m *memMarker) Delete(alert model.Fingerprint) {
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
@ -206,20 +235,17 @@ func (m *memMarker) Delete(alert model.Fingerprint) {
 	delete(m.m, alert)
 }

-// Unprocessed returns whether the alert for the given Fingerprint is in the
-// Unprocessed state.
+// Unprocessed implements Marker.
 func (m *memMarker) Unprocessed(alert model.Fingerprint) bool {
 	return m.Status(alert).State == AlertStateUnprocessed
 }

-// Active returns whether the alert for the given Fingerprint is in the Active
-// state.
+// Active implements Marker.
 func (m *memMarker) Active(alert model.Fingerprint) bool {
 	return m.Status(alert).State == AlertStateActive
 }

-// Inhibited returns whether the alert for the given Fingerprint is in the
-// Inhibited state and any associated alert IDs.
+// Inhibited implements Marker.
 func (m *memMarker) Inhibited(alert model.Fingerprint) ([]string, bool) {
 	s := m.Status(alert)
 	return s.InhibitedBy,
@ -361,7 +387,9 @@ func (a *Alert) Merge(o *Alert) *Alert {
 	return &res
 }

-// A Muter determines whether a given label set is muted.
+// A Muter determines whether a given label set is muted. Implementers that
+// maintain an underlying Marker are expected to update it during a call of
+// Mutes.
 type Muter interface {
 	Mutes(model.LabelSet) bool
 }
@ -408,18 +436,23 @@ func (s *Silence) Expired() bool {
 	return s.StartsAt.Equal(s.EndsAt)
 }

+// SilenceStatus stores the state of a silence.
 type SilenceStatus struct {
 	State SilenceState `json:"state"`
 }

+// SilenceState is used as part of SilenceStatus.
 type SilenceState string

+// Possible values for SilenceState.
 const (
 	SilenceStateExpired SilenceState = "expired"
 	SilenceStateActive  SilenceState = "active"
 	SilenceStatePending SilenceState = "pending"
 )

+// CalcSilenceState returns the SilenceState that a silence with the given start
+// and end time would have right now.
 func CalcSilenceState(start, end time.Time) SilenceState {
 	current := time.Now()
 	if current.Before(start) {