Merge pull request #1764 from prometheus/beorn7/muting
Modify the self-inhibition prevention semantics
This commit is contained in:
commit
891f368c51
|
@ -29,8 +29,9 @@ import (
|
|||
"github.com/prometheus/alertmanager/types"
|
||||
)
|
||||
|
||||
// An Inhibitor determines whether a given label set is muted
|
||||
// based on the currently active alerts and a set of inhibition rules.
|
||||
// An Inhibitor determines whether a given label set is muted based on the
|
||||
// currently active alerts and a set of inhibition rules. It implements the
|
||||
// Muter interface.
|
||||
type Inhibitor struct {
|
||||
alerts provider.Alerts
|
||||
rules []*InhibitRule
|
||||
|
@ -121,13 +122,19 @@ func (ih *Inhibitor) Stop() {
|
|||
}
|
||||
}
|
||||
|
||||
// Mutes returns true iff the given label set is muted.
|
||||
// Mutes returns true iff the given label set is muted. It implements the Muter
|
||||
// interface.
|
||||
func (ih *Inhibitor) Mutes(lset model.LabelSet) bool {
|
||||
fp := lset.Fingerprint()
|
||||
|
||||
for _, r := range ih.rules {
|
||||
// Only inhibit if target matchers match but source matchers don't.
|
||||
if inhibitedByFP, eq := r.hasEqual(lset); !r.SourceMatchers.Match(lset) && r.TargetMatchers.Match(lset) && eq {
|
||||
if !r.TargetMatchers.Match(lset) {
|
||||
// If target side of rule doesn't match, we don't need to look any further.
|
||||
continue
|
||||
}
|
||||
// If we are here, the target side matches. If the source side matches, too, we
|
||||
// need to exclude inhibiting alerts for which the same is true.
|
||||
if inhibitedByFP, eq := r.hasEqual(lset, r.SourceMatchers.Match(lset)); eq {
|
||||
ih.marker.SetInhibited(fp, inhibitedByFP.String())
|
||||
return true
|
||||
}
|
||||
|
@ -191,9 +198,11 @@ func NewInhibitRule(cr *config.InhibitRule) *InhibitRule {
|
|||
}
|
||||
}
|
||||
|
||||
// hasEqual checks whether the source cache contains alerts matching
|
||||
// the equal labels for the given label set.
|
||||
func (r *InhibitRule) hasEqual(lset model.LabelSet) (model.Fingerprint, bool) {
|
||||
// hasEqual checks whether the source cache contains alerts matching the equal
|
||||
// labels for the given label set. If so, the fingerprint of one of those alerts
|
||||
// is returned. If excludeTwoSidedMatch is true, alerts that match both the
|
||||
// source and the target side of the rule are disregarded.
|
||||
func (r *InhibitRule) hasEqual(lset model.LabelSet, excludeTwoSidedMatch bool) (model.Fingerprint, bool) {
|
||||
Outer:
|
||||
for a := range r.scache.List() {
|
||||
// The cache might be stale and contain resolved alerts.
|
||||
|
@ -205,6 +214,9 @@ Outer:
|
|||
continue Outer
|
||||
}
|
||||
}
|
||||
if excludeTwoSidedMatch && r.TargetMatchers.Match(a.Labels) {
|
||||
continue Outer
|
||||
}
|
||||
return a.Fingerprint(), true
|
||||
}
|
||||
return model.Fingerprint(0), false
|
||||
|
|
|
@ -131,7 +131,7 @@ func TestInhibitRuleHasEqual(t *testing.T) {
|
|||
r.scache.Set(v)
|
||||
}
|
||||
|
||||
if _, have := r.hasEqual(c.input); have != c.result {
|
||||
if _, have := r.hasEqual(c.input, false); have != c.result {
|
||||
t.Errorf("Unexpected result %t, expected %t", have, c.result)
|
||||
}
|
||||
}
|
||||
|
@ -140,55 +140,87 @@ func TestInhibitRuleHasEqual(t *testing.T) {
|
|||
func TestInhibitRuleMatches(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
// Simple inhibut rule
|
||||
cr := config.InhibitRule{
|
||||
SourceMatch: map[string]string{"s": "1"},
|
||||
TargetMatch: map[string]string{"t": "1"},
|
||||
rule1 := config.InhibitRule{
|
||||
SourceMatch: map[string]string{"s1": "1"},
|
||||
TargetMatch: map[string]string{"t1": "1"},
|
||||
Equal: model.LabelNames{"e"},
|
||||
}
|
||||
rule2 := config.InhibitRule{
|
||||
SourceMatch: map[string]string{"s2": "1"},
|
||||
TargetMatch: map[string]string{"t2": "1"},
|
||||
Equal: model.LabelNames{"e"},
|
||||
}
|
||||
m := types.NewMarker(prometheus.NewRegistry())
|
||||
ih := NewInhibitor(nil, []*config.InhibitRule{&cr}, m, nopLogger)
|
||||
ir := ih.rules[0]
|
||||
ih := NewInhibitor(nil, []*config.InhibitRule{&rule1, &rule2}, m, nopLogger)
|
||||
now := time.Now()
|
||||
// Active alert that matches the source filter
|
||||
sourceAlert := &types.Alert{
|
||||
// Active alert that matches the source filter of rule1.
|
||||
sourceAlert1 := &types.Alert{
|
||||
Alert: model.Alert{
|
||||
Labels: model.LabelSet{"s": "1", "e": "1"},
|
||||
Labels: model.LabelSet{"s1": "1", "t1": "2", "e": "1"},
|
||||
StartsAt: now.Add(-time.Minute),
|
||||
EndsAt: now.Add(time.Hour),
|
||||
},
|
||||
}
|
||||
// Active alert that matches the source filter _and_ the target filter of rule2.
|
||||
sourceAlert2 := &types.Alert{
|
||||
Alert: model.Alert{
|
||||
Labels: model.LabelSet{"s2": "1", "t2": "1", "e": "1"},
|
||||
StartsAt: now.Add(-time.Minute),
|
||||
EndsAt: now.Add(time.Hour),
|
||||
},
|
||||
}
|
||||
|
||||
ir.scache = store.NewAlerts(5 * time.Minute)
|
||||
ir.scache.Set(sourceAlert)
|
||||
ih.rules[0].scache = store.NewAlerts(5 * time.Minute)
|
||||
ih.rules[0].scache.Set(sourceAlert1)
|
||||
ih.rules[1].scache = store.NewAlerts(5 * time.Minute)
|
||||
ih.rules[1].scache.Set(sourceAlert2)
|
||||
|
||||
cases := []struct {
|
||||
target model.LabelSet
|
||||
expected bool
|
||||
}{
|
||||
{
|
||||
// Matches target filter, inhibited
|
||||
target: model.LabelSet{"t": "1", "e": "1"},
|
||||
// Matches target filter of rule1, inhibited.
|
||||
target: model.LabelSet{"t1": "1", "e": "1"},
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
// Matches target filter (plus noise), inhibited
|
||||
target: model.LabelSet{"t": "1", "t2": "1", "e": "1"},
|
||||
// Matches target filter of rule2, inhibited.
|
||||
target: model.LabelSet{"t2": "1", "e": "1"},
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
// Doesn't match target filter, not inhibited
|
||||
target: model.LabelSet{"t": "0", "e": "1"},
|
||||
// Matches target filter of rule1 (plus noise), inhibited.
|
||||
target: model.LabelSet{"t1": "1", "t3": "1", "e": "1"},
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
// Matches target filter of rule1 plus rule2, inhibited.
|
||||
target: model.LabelSet{"t1": "1", "t2": "1", "e": "1"},
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
// Doesn't match target filter, not inhibited.
|
||||
target: model.LabelSet{"t1": "0", "e": "1"},
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
// Matches both source and target filters, not inhibited
|
||||
target: model.LabelSet{"s": "1", "t": "1", "e": "1"},
|
||||
// Matches both source and target filters of rule1,
|
||||
// inhibited because sourceAlert1 matches only the
|
||||
// source filter of rule1.
|
||||
target: model.LabelSet{"s1": "1", "t1": "1", "e": "1"},
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
// Matches both source and target filters of rule2,
|
||||
// not inhibited because sourceAlert2 matches also both the
|
||||
// source and target filter of rule2.
|
||||
target: model.LabelSet{"s2": "1", "t2": "1", "e": "1"},
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
// Matches target filter, equal label doesn't match, not inhibited
|
||||
target: model.LabelSet{"t": "1", "e": "0"},
|
||||
target: model.LabelSet{"t1": "1", "e": "0"},
|
||||
expected: false,
|
||||
},
|
||||
}
|
||||
|
|
|
@ -22,15 +22,22 @@ import (
|
|||
"github.com/prometheus/common/model"
|
||||
)
|
||||
|
||||
// AlertState is used as part of AlertStatus.
|
||||
type AlertState string
|
||||
|
||||
// Possible values for AlertState.
|
||||
const (
|
||||
AlertStateUnprocessed AlertState = "unprocessed"
|
||||
AlertStateActive AlertState = "active"
|
||||
AlertStateSuppressed AlertState = "suppressed"
|
||||
)
|
||||
|
||||
// AlertStatus stores the state and values associated with an Alert.
|
||||
// AlertStatus stores the state of an alert and, as applicable, the IDs of
|
||||
// silences silencing the alert and of other alerts inhibiting the alert. Note
|
||||
// that currently, SilencedBy is supposed to be the complete set of the relevant
|
||||
// silences while InhibitedBy may contain only a subset of the inhibiting alerts
|
||||
// – in practice exactly one ID. (This somewhat confusing semantics might change
|
||||
// in the future.)
|
||||
type AlertStatus struct {
|
||||
State AlertState `json:"state"`
|
||||
SilencedBy []string `json:"silencedBy"`
|
||||
|
@ -40,15 +47,36 @@ type AlertStatus struct {
|
|||
// Marker helps to mark alerts as silenced and/or inhibited.
|
||||
// All methods are goroutine-safe.
|
||||
type Marker interface {
|
||||
// SetActive sets the provided alert to AlertStateActive and deletes all
|
||||
// SilencedBy and InhibitedBy entries.
|
||||
SetActive(alert model.Fingerprint)
|
||||
SetInhibited(alert model.Fingerprint, ids ...string)
|
||||
SetSilenced(alert model.Fingerprint, ids ...string)
|
||||
// SetSilenced replaces the previous SilencedBy by the provided IDs of
|
||||
// silences. The set of provided IDs is supposed to represent the
|
||||
// complete set of relevant silences. If no ID is provided and
|
||||
// InhibitedBy is already empty, this call is equivalent
|
||||
// SetActive. Otherwise, it sets AlertStateSuppressed.
|
||||
SetSilenced(alert model.Fingerprint, silenceIDs ...string)
|
||||
// SetInhibited replaces the previous InhibitedBy by the provided IDs of
|
||||
// alerts. In contrast to SetSilenced, the set of provided IDs is not
|
||||
// expected to represent the complete set of inhibiting alerts. (In
|
||||
// practice, this method is only called with one or zero IDs. However,
|
||||
// this expectation might change in the future.) If no ID is provided and
|
||||
// SilencedBy is already empty, this call is equivalent to
|
||||
// SetActive. Otherwise, it sets AlertStateSuppressed.
|
||||
SetInhibited(alert model.Fingerprint, alertIDs ...string)
|
||||
|
||||
// Count alerts of the given state(s). With no state provided, count all
|
||||
// alerts.
|
||||
Count(...AlertState) int
|
||||
|
||||
// Status of the given alert.
|
||||
Status(model.Fingerprint) AlertStatus
|
||||
// Delete the given alert.
|
||||
Delete(model.Fingerprint)
|
||||
|
||||
// Various methods to inquire if the given alert is in a certain
|
||||
// AlertState. Silenced also returns all the silencing silences, while
|
||||
// Inhibited may return only a subset of inhibiting alerts.
|
||||
Unprocessed(model.Fingerprint) bool
|
||||
Active(model.Fingerprint) bool
|
||||
Silenced(model.Fingerprint) ([]string, bool)
|
||||
|
@ -93,7 +121,7 @@ func (m *memMarker) registerMetrics(r prometheus.Registerer) {
|
|||
r.MustRegister(alertsSuppressed)
|
||||
}
|
||||
|
||||
// Count alerts of a given state.
|
||||
// Count implements Marker.
|
||||
func (m *memMarker) Count(states ...AlertState) int {
|
||||
count := 0
|
||||
|
||||
|
@ -114,7 +142,7 @@ func (m *memMarker) Count(states ...AlertState) int {
|
|||
return count
|
||||
}
|
||||
|
||||
// SetSilenced sets the AlertStatus to suppressed and stores the associated silence IDs.
|
||||
// SetSilenced implements Marker.
|
||||
func (m *memMarker) SetSilenced(alert model.Fingerprint, ids ...string) {
|
||||
m.mtx.Lock()
|
||||
|
||||
|
@ -139,7 +167,7 @@ func (m *memMarker) SetSilenced(alert model.Fingerprint, ids ...string) {
|
|||
m.mtx.Unlock()
|
||||
}
|
||||
|
||||
// SetInhibited sets the AlertStatus to suppressed and stores the associated alert IDs.
|
||||
// SetInhibited implements Marker.
|
||||
func (m *memMarker) SetInhibited(alert model.Fingerprint, ids ...string) {
|
||||
m.mtx.Lock()
|
||||
|
||||
|
@ -164,6 +192,7 @@ func (m *memMarker) SetInhibited(alert model.Fingerprint, ids ...string) {
|
|||
m.mtx.Unlock()
|
||||
}
|
||||
|
||||
// SetActive implements Marker.
|
||||
func (m *memMarker) SetActive(alert model.Fingerprint) {
|
||||
m.mtx.Lock()
|
||||
defer m.mtx.Unlock()
|
||||
|
@ -182,7 +211,7 @@ func (m *memMarker) SetActive(alert model.Fingerprint) {
|
|||
s.InhibitedBy = []string{}
|
||||
}
|
||||
|
||||
// Status returns the AlertStatus for the given Fingerprint.
|
||||
// Status implements Marker.
|
||||
func (m *memMarker) Status(alert model.Fingerprint) AlertStatus {
|
||||
m.mtx.RLock()
|
||||
defer m.mtx.RUnlock()
|
||||
|
@ -198,7 +227,7 @@ func (m *memMarker) Status(alert model.Fingerprint) AlertStatus {
|
|||
return *s
|
||||
}
|
||||
|
||||
// Delete deletes the given Fingerprint from the internal cache.
|
||||
// Delete implements Marker.
|
||||
func (m *memMarker) Delete(alert model.Fingerprint) {
|
||||
m.mtx.Lock()
|
||||
defer m.mtx.Unlock()
|
||||
|
@ -206,20 +235,17 @@ func (m *memMarker) Delete(alert model.Fingerprint) {
|
|||
delete(m.m, alert)
|
||||
}
|
||||
|
||||
// Unprocessed returns whether the alert for the given Fingerprint is in the
|
||||
// Unprocessed state.
|
||||
// Unprocessed implements Marker.
|
||||
func (m *memMarker) Unprocessed(alert model.Fingerprint) bool {
|
||||
return m.Status(alert).State == AlertStateUnprocessed
|
||||
}
|
||||
|
||||
// Active returns whether the alert for the given Fingerprint is in the Active
|
||||
// state.
|
||||
// Active implements Marker.
|
||||
func (m *memMarker) Active(alert model.Fingerprint) bool {
|
||||
return m.Status(alert).State == AlertStateActive
|
||||
}
|
||||
|
||||
// Inhibited returns whether the alert for the given Fingerprint is in the
|
||||
// Inhibited state and any associated alert IDs.
|
||||
// Inhibited implements Marker.
|
||||
func (m *memMarker) Inhibited(alert model.Fingerprint) ([]string, bool) {
|
||||
s := m.Status(alert)
|
||||
return s.InhibitedBy,
|
||||
|
@ -361,7 +387,9 @@ func (a *Alert) Merge(o *Alert) *Alert {
|
|||
return &res
|
||||
}
|
||||
|
||||
// A Muter determines whether a given label set is muted.
|
||||
// A Muter determines whether a given label set is muted. Implementers that
|
||||
// maintain an underlying Marker are expected to update it during a call of
|
||||
// Mutes.
|
||||
type Muter interface {
|
||||
Mutes(model.LabelSet) bool
|
||||
}
|
||||
|
@ -408,18 +436,23 @@ func (s *Silence) Expired() bool {
|
|||
return s.StartsAt.Equal(s.EndsAt)
|
||||
}
|
||||
|
||||
// SilenceStatus stores the state of a silence.
|
||||
type SilenceStatus struct {
|
||||
State SilenceState `json:"state"`
|
||||
}
|
||||
|
||||
// SilenceState is used as part of SilenceStatus.
|
||||
type SilenceState string
|
||||
|
||||
// Possible values for SilenceState.
|
||||
const (
|
||||
SilenceStateExpired SilenceState = "expired"
|
||||
SilenceStateActive SilenceState = "active"
|
||||
SilenceStatePending SilenceState = "pending"
|
||||
)
|
||||
|
||||
// CalcSilenceState returns the SilenceState that a silence with the given start
|
||||
// and end time would have right now.
|
||||
func CalcSilenceState(start, end time.Time) SilenceState {
|
||||
current := time.Now()
|
||||
if current.Before(start) {
|
||||
|
|
Loading…
Reference in New Issue