Merge pull request #1764 from prometheus/beorn7/muting

Modify the self-inhibition prevention semantics
This commit is contained in:
Björn Rabenstein 2019-02-26 14:01:22 +01:00 committed by GitHub
commit 891f368c51
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 121 additions and 44 deletions

View File

@ -29,8 +29,9 @@ import (
"github.com/prometheus/alertmanager/types"
)
// An Inhibitor determines whether a given label set is muted
// based on the currently active alerts and a set of inhibition rules.
// An Inhibitor determines whether a given label set is muted based on the
// currently active alerts and a set of inhibition rules. It implements the
// Muter interface.
type Inhibitor struct {
alerts provider.Alerts
rules []*InhibitRule
@ -121,13 +122,19 @@ func (ih *Inhibitor) Stop() {
}
}
// Mutes returns true iff the given label set is muted.
// Mutes returns true iff the given label set is muted. It implements the Muter
// interface.
func (ih *Inhibitor) Mutes(lset model.LabelSet) bool {
fp := lset.Fingerprint()
for _, r := range ih.rules {
// Only inhibit if target matchers match but source matchers don't.
if inhibitedByFP, eq := r.hasEqual(lset); !r.SourceMatchers.Match(lset) && r.TargetMatchers.Match(lset) && eq {
if !r.TargetMatchers.Match(lset) {
// If target side of rule doesn't match, we don't need to look any further.
continue
}
// If we are here, the target side matches. If the source side matches, too, we
// need to exclude inhibiting alerts for which the same is true.
if inhibitedByFP, eq := r.hasEqual(lset, r.SourceMatchers.Match(lset)); eq {
ih.marker.SetInhibited(fp, inhibitedByFP.String())
return true
}
@ -191,9 +198,11 @@ func NewInhibitRule(cr *config.InhibitRule) *InhibitRule {
}
}
// hasEqual checks whether the source cache contains alerts matching
// the equal labels for the given label set.
func (r *InhibitRule) hasEqual(lset model.LabelSet) (model.Fingerprint, bool) {
// hasEqual checks whether the source cache contains alerts matching the equal
// labels for the given label set. If so, the fingerprint of one of those alerts
// is returned. If excludeTwoSidedMatch is true, alerts that match both the
// source and the target side of the rule are disregarded.
func (r *InhibitRule) hasEqual(lset model.LabelSet, excludeTwoSidedMatch bool) (model.Fingerprint, bool) {
Outer:
for a := range r.scache.List() {
// The cache might be stale and contain resolved alerts.
@ -205,6 +214,9 @@ Outer:
continue Outer
}
}
if excludeTwoSidedMatch && r.TargetMatchers.Match(a.Labels) {
continue Outer
}
return a.Fingerprint(), true
}
return model.Fingerprint(0), false

View File

@ -131,7 +131,7 @@ func TestInhibitRuleHasEqual(t *testing.T) {
r.scache.Set(v)
}
if _, have := r.hasEqual(c.input); have != c.result {
if _, have := r.hasEqual(c.input, false); have != c.result {
t.Errorf("Unexpected result %t, expected %t", have, c.result)
}
}
@ -140,55 +140,87 @@ func TestInhibitRuleHasEqual(t *testing.T) {
func TestInhibitRuleMatches(t *testing.T) {
t.Parallel()
// Simple inhibut rule
cr := config.InhibitRule{
SourceMatch: map[string]string{"s": "1"},
TargetMatch: map[string]string{"t": "1"},
rule1 := config.InhibitRule{
SourceMatch: map[string]string{"s1": "1"},
TargetMatch: map[string]string{"t1": "1"},
Equal: model.LabelNames{"e"},
}
rule2 := config.InhibitRule{
SourceMatch: map[string]string{"s2": "1"},
TargetMatch: map[string]string{"t2": "1"},
Equal: model.LabelNames{"e"},
}
m := types.NewMarker(prometheus.NewRegistry())
ih := NewInhibitor(nil, []*config.InhibitRule{&cr}, m, nopLogger)
ir := ih.rules[0]
ih := NewInhibitor(nil, []*config.InhibitRule{&rule1, &rule2}, m, nopLogger)
now := time.Now()
// Active alert that matches the source filter
sourceAlert := &types.Alert{
// Active alert that matches the source filter of rule1.
sourceAlert1 := &types.Alert{
Alert: model.Alert{
Labels: model.LabelSet{"s": "1", "e": "1"},
Labels: model.LabelSet{"s1": "1", "t1": "2", "e": "1"},
StartsAt: now.Add(-time.Minute),
EndsAt: now.Add(time.Hour),
},
}
// Active alert that matches the source filter _and_ the target filter of rule2.
sourceAlert2 := &types.Alert{
Alert: model.Alert{
Labels: model.LabelSet{"s2": "1", "t2": "1", "e": "1"},
StartsAt: now.Add(-time.Minute),
EndsAt: now.Add(time.Hour),
},
}
ir.scache = store.NewAlerts(5 * time.Minute)
ir.scache.Set(sourceAlert)
ih.rules[0].scache = store.NewAlerts(5 * time.Minute)
ih.rules[0].scache.Set(sourceAlert1)
ih.rules[1].scache = store.NewAlerts(5 * time.Minute)
ih.rules[1].scache.Set(sourceAlert2)
cases := []struct {
target model.LabelSet
expected bool
}{
{
// Matches target filter, inhibited
target: model.LabelSet{"t": "1", "e": "1"},
// Matches target filter of rule1, inhibited.
target: model.LabelSet{"t1": "1", "e": "1"},
expected: true,
},
{
// Matches target filter (plus noise), inhibited
target: model.LabelSet{"t": "1", "t2": "1", "e": "1"},
// Matches target filter of rule2, inhibited.
target: model.LabelSet{"t2": "1", "e": "1"},
expected: true,
},
{
// Doesn't match target filter, not inhibited
target: model.LabelSet{"t": "0", "e": "1"},
// Matches target filter of rule1 (plus noise), inhibited.
target: model.LabelSet{"t1": "1", "t3": "1", "e": "1"},
expected: true,
},
{
// Matches target filter of rule1 plus rule2, inhibited.
target: model.LabelSet{"t1": "1", "t2": "1", "e": "1"},
expected: true,
},
{
// Doesn't match target filter, not inhibited.
target: model.LabelSet{"t1": "0", "e": "1"},
expected: false,
},
{
// Matches both source and target filters, not inhibited
target: model.LabelSet{"s": "1", "t": "1", "e": "1"},
// Matches both source and target filters of rule1,
// inhibited because sourceAlert1 matches only the
// source filter of rule1.
target: model.LabelSet{"s1": "1", "t1": "1", "e": "1"},
expected: true,
},
{
// Matches both source and target filters of rule2,
// not inhibited because sourceAlert2 matches also both the
// source and target filter of rule2.
target: model.LabelSet{"s2": "1", "t2": "1", "e": "1"},
expected: false,
},
{
// Matches target filter, equal label doesn't match, not inhibited
target: model.LabelSet{"t": "1", "e": "0"},
target: model.LabelSet{"t1": "1", "e": "0"},
expected: false,
},
}

View File

@ -22,15 +22,22 @@ import (
"github.com/prometheus/common/model"
)
// AlertState is used as part of AlertStatus.
type AlertState string
// Possible values for AlertState.
const (
AlertStateUnprocessed AlertState = "unprocessed"
AlertStateActive AlertState = "active"
AlertStateSuppressed AlertState = "suppressed"
)
// AlertStatus stores the state and values associated with an Alert.
// AlertStatus stores the state of an alert and, as applicable, the IDs of
// silences silencing the alert and of other alerts inhibiting the alert. Note
// that currently, SilencedBy is supposed to be the complete set of the relevant
// silences while InhibitedBy may contain only a subset of the inhibiting alerts
// in practice exactly one ID. (This somewhat confusing semantics might change
// in the future.)
type AlertStatus struct {
State AlertState `json:"state"`
SilencedBy []string `json:"silencedBy"`
@ -40,15 +47,36 @@ type AlertStatus struct {
// Marker helps to mark alerts as silenced and/or inhibited.
// All methods are goroutine-safe.
type Marker interface {
// SetActive sets the provided alert to AlertStateActive and deletes all
// SilencedBy and InhibitedBy entries.
SetActive(alert model.Fingerprint)
SetInhibited(alert model.Fingerprint, ids ...string)
SetSilenced(alert model.Fingerprint, ids ...string)
// SetSilenced replaces the previous SilencedBy by the provided IDs of
// silences. The set of provided IDs is supposed to represent the
// complete set of relevant silences. If no ID is provided and
// InhibitedBy is already empty, this call is equivalent
// SetActive. Otherwise, it sets AlertStateSuppressed.
SetSilenced(alert model.Fingerprint, silenceIDs ...string)
// SetInhibited replaces the previous InhibitedBy by the provided IDs of
// alerts. In contrast to SetSilenced, the set of provided IDs is not
// expected to represent the complete set of inhibiting alerts. (In
// practice, this method is only called with one or zero IDs. However,
// this expectation might change in the future.) If no ID is provided and
// SilencedBy is already empty, this call is equivalent to
// SetActive. Otherwise, it sets AlertStateSuppressed.
SetInhibited(alert model.Fingerprint, alertIDs ...string)
// Count alerts of the given state(s). With no state provided, count all
// alerts.
Count(...AlertState) int
// Status of the given alert.
Status(model.Fingerprint) AlertStatus
// Delete the given alert.
Delete(model.Fingerprint)
// Various methods to inquire if the given alert is in a certain
// AlertState. Silenced also returns all the silencing silences, while
// Inhibited may return only a subset of inhibiting alerts.
Unprocessed(model.Fingerprint) bool
Active(model.Fingerprint) bool
Silenced(model.Fingerprint) ([]string, bool)
@ -93,7 +121,7 @@ func (m *memMarker) registerMetrics(r prometheus.Registerer) {
r.MustRegister(alertsSuppressed)
}
// Count alerts of a given state.
// Count implements Marker.
func (m *memMarker) Count(states ...AlertState) int {
count := 0
@ -114,7 +142,7 @@ func (m *memMarker) Count(states ...AlertState) int {
return count
}
// SetSilenced sets the AlertStatus to suppressed and stores the associated silence IDs.
// SetSilenced implements Marker.
func (m *memMarker) SetSilenced(alert model.Fingerprint, ids ...string) {
m.mtx.Lock()
@ -139,7 +167,7 @@ func (m *memMarker) SetSilenced(alert model.Fingerprint, ids ...string) {
m.mtx.Unlock()
}
// SetInhibited sets the AlertStatus to suppressed and stores the associated alert IDs.
// SetInhibited implements Marker.
func (m *memMarker) SetInhibited(alert model.Fingerprint, ids ...string) {
m.mtx.Lock()
@ -164,6 +192,7 @@ func (m *memMarker) SetInhibited(alert model.Fingerprint, ids ...string) {
m.mtx.Unlock()
}
// SetActive implements Marker.
func (m *memMarker) SetActive(alert model.Fingerprint) {
m.mtx.Lock()
defer m.mtx.Unlock()
@ -182,7 +211,7 @@ func (m *memMarker) SetActive(alert model.Fingerprint) {
s.InhibitedBy = []string{}
}
// Status returns the AlertStatus for the given Fingerprint.
// Status implements Marker.
func (m *memMarker) Status(alert model.Fingerprint) AlertStatus {
m.mtx.RLock()
defer m.mtx.RUnlock()
@ -198,7 +227,7 @@ func (m *memMarker) Status(alert model.Fingerprint) AlertStatus {
return *s
}
// Delete deletes the given Fingerprint from the internal cache.
// Delete implements Marker.
func (m *memMarker) Delete(alert model.Fingerprint) {
m.mtx.Lock()
defer m.mtx.Unlock()
@ -206,20 +235,17 @@ func (m *memMarker) Delete(alert model.Fingerprint) {
delete(m.m, alert)
}
// Unprocessed returns whether the alert for the given Fingerprint is in the
// Unprocessed state.
// Unprocessed implements Marker.
func (m *memMarker) Unprocessed(alert model.Fingerprint) bool {
return m.Status(alert).State == AlertStateUnprocessed
}
// Active returns whether the alert for the given Fingerprint is in the Active
// state.
// Active implements Marker.
func (m *memMarker) Active(alert model.Fingerprint) bool {
return m.Status(alert).State == AlertStateActive
}
// Inhibited returns whether the alert for the given Fingerprint is in the
// Inhibited state and any associated alert IDs.
// Inhibited implements Marker.
func (m *memMarker) Inhibited(alert model.Fingerprint) ([]string, bool) {
s := m.Status(alert)
return s.InhibitedBy,
@ -361,7 +387,9 @@ func (a *Alert) Merge(o *Alert) *Alert {
return &res
}
// A Muter determines whether a given label set is muted.
// A Muter determines whether a given label set is muted. Implementers that
// maintain an underlying Marker are expected to update it during a call of
// Mutes.
type Muter interface {
Mutes(model.LabelSet) bool
}
@ -408,18 +436,23 @@ func (s *Silence) Expired() bool {
return s.StartsAt.Equal(s.EndsAt)
}
// SilenceStatus stores the state of a silence.
type SilenceStatus struct {
State SilenceState `json:"state"`
}
// SilenceState is used as part of SilenceStatus.
type SilenceState string
// Possible values for SilenceState.
const (
SilenceStateExpired SilenceState = "expired"
SilenceStateActive SilenceState = "active"
SilenceStatePending SilenceState = "pending"
)
// CalcSilenceState returns the SilenceState that a silence with the given start
// and end time would have right now.
func CalcSilenceState(start, end time.Time) SilenceState {
current := time.Now()
if current.Before(start) {