Merge pull request #2590 from prometheus/beorn7/silence
Include pending silences for future muting decisions
This commit is contained in:
commit
22bb972d1a
|
@ -673,7 +673,7 @@ func TestMuteStageWithSilences(t *testing.T) {
|
|||
|
||||
// Set the second alert as previously silenced with an old version
|
||||
// number. This is expected to get unsilenced by the stage.
|
||||
marker.SetSilenced(inAlerts[1].Fingerprint(), 0, "123")
|
||||
marker.SetSilenced(inAlerts[1].Fingerprint(), 0, []string{"123"}, nil)
|
||||
|
||||
_, alerts, err := stage.Exec(context.Background(), log.NewNopLogger(), inAlerts...)
|
||||
if err != nil {
|
||||
|
|
|
@ -296,7 +296,8 @@ func TestAlertsGC(t *testing.T) {
|
|||
}
|
||||
|
||||
for _, a := range insert {
|
||||
marker.SetActive(a.Fingerprint())
|
||||
marker.SetSilenced(a.Fingerprint(), 0, nil, nil)
|
||||
marker.SetInhibited(a.Fingerprint())
|
||||
if !marker.Active(a.Fingerprint()) {
|
||||
t.Errorf("error setting status: %v", a)
|
||||
}
|
||||
|
|
|
@ -113,17 +113,19 @@ func NewSilencer(s *Silences, m types.Marker, l log.Logger) *Silencer {
|
|||
// Mutes implements the Muter interface.
|
||||
func (s *Silencer) Mutes(lset model.LabelSet) bool {
|
||||
fp := lset.Fingerprint()
|
||||
ids, markerVersion, _ := s.marker.Silenced(fp)
|
||||
activeIDs, pendingIDs, markerVersion, _ := s.marker.Silenced(fp)
|
||||
|
||||
var (
|
||||
err error
|
||||
sils []*pb.Silence
|
||||
allSils []*pb.Silence
|
||||
newVersion = markerVersion
|
||||
)
|
||||
if markerVersion == s.silences.Version() {
|
||||
totalSilences := len(activeIDs) + len(pendingIDs)
|
||||
// No new silences added, just need to check which of the old
|
||||
// silences are still relevant.
|
||||
if len(ids) == 0 {
|
||||
// silences are still relevant and which of the pending ones
|
||||
// have become active.
|
||||
if totalSilences == 0 {
|
||||
// Super fast path: No silences ever applied to this
|
||||
// alert, none have been added. We are done.
|
||||
return false
|
||||
|
@ -134,47 +136,55 @@ func (s *Silencer) Mutes(lset model.LabelSet) bool {
|
|||
// markerVersion because the Query call might already return a
|
||||
// newer version, which is not the version our old list of
|
||||
// applicable silences is based on.
|
||||
sils, _, err = s.silences.Query(
|
||||
QIDs(ids...),
|
||||
QState(types.SilenceStateActive),
|
||||
allIDs := append(append(make([]string, 0, totalSilences), activeIDs...), pendingIDs...)
|
||||
allSils, _, err = s.silences.Query(
|
||||
QIDs(allIDs...),
|
||||
QState(types.SilenceStateActive, types.SilenceStatePending),
|
||||
)
|
||||
} else {
|
||||
// New silences have been added, do a full query.
|
||||
sils, newVersion, err = s.silences.Query(
|
||||
QState(types.SilenceStateActive),
|
||||
allSils, newVersion, err = s.silences.Query(
|
||||
QState(types.SilenceStateActive, types.SilenceStatePending),
|
||||
QMatches(lset),
|
||||
)
|
||||
}
|
||||
if err != nil {
|
||||
level.Error(s.logger).Log("msg", "Querying silences failed, alerts might not get silenced correctly", "err", err)
|
||||
}
|
||||
if len(sils) == 0 {
|
||||
s.marker.SetSilenced(fp, newVersion)
|
||||
if len(allSils) == 0 {
|
||||
// Easy case, neither active nor pending silences anymore.
|
||||
s.marker.SetSilenced(fp, newVersion, nil, nil)
|
||||
return false
|
||||
}
|
||||
idsChanged := len(sils) != len(ids)
|
||||
if !idsChanged {
|
||||
// Length is the same, but is the content the same?
|
||||
for i, s := range sils {
|
||||
if ids[i] != s.Id {
|
||||
idsChanged = true
|
||||
break
|
||||
}
|
||||
// It is still possible that nothing has changed, but finding out is not
|
||||
// much less effort than just recreating the IDs from the query
|
||||
// result. So let's do it in any case. Note that we cannot reuse the
|
||||
// current ID slices for concurrency reasons.
|
||||
activeIDs, pendingIDs = nil, nil
|
||||
now := s.silences.now()
|
||||
for _, sil := range allSils {
|
||||
switch getState(sil, now) {
|
||||
case types.SilenceStatePending:
|
||||
pendingIDs = append(pendingIDs, sil.Id)
|
||||
case types.SilenceStateActive:
|
||||
activeIDs = append(activeIDs, sil.Id)
|
||||
default:
|
||||
// Do nothing, silence has expired in the meantime.
|
||||
}
|
||||
}
|
||||
if idsChanged {
|
||||
// Need to recreate ids.
|
||||
ids = make([]string, len(sils))
|
||||
for i, s := range sils {
|
||||
ids[i] = s.Id
|
||||
}
|
||||
sort.Strings(ids) // For comparability.
|
||||
}
|
||||
if idsChanged || newVersion != markerVersion {
|
||||
// Update marker only if something changed.
|
||||
s.marker.SetSilenced(fp, newVersion, ids...)
|
||||
}
|
||||
return true
|
||||
level.Debug(s.logger).Log(
|
||||
"msg", "determined current silences state",
|
||||
"now", now,
|
||||
"total", len(allSils),
|
||||
"active", len(activeIDs),
|
||||
"pending", len(pendingIDs),
|
||||
)
|
||||
sort.Strings(activeIDs)
|
||||
sort.Strings(pendingIDs)
|
||||
|
||||
s.marker.SetSilenced(fp, newVersion, activeIDs, pendingIDs)
|
||||
|
||||
return len(activeIDs) > 0
|
||||
}
|
||||
|
||||
// Silences holds a silence state that can be modified, queried, and snapshot.
|
||||
|
|
|
@ -22,9 +22,11 @@ import (
|
|||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/go-kit/kit/log"
|
||||
"github.com/matttproud/golang_protobuf_extensions/pbutil"
|
||||
pb "github.com/prometheus/alertmanager/silence/silencepb"
|
||||
"github.com/prometheus/alertmanager/types"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
@ -904,6 +906,73 @@ func TestSilenceExpireWithZeroRetention(t *testing.T) {
|
|||
require.Equal(t, 3, count)
|
||||
}
|
||||
|
||||
func TestSilencer(t *testing.T) {
|
||||
ss, err := New(Options{Retention: time.Hour})
|
||||
require.NoError(t, err)
|
||||
|
||||
now := time.Now()
|
||||
ss.now = func() time.Time { return now }
|
||||
|
||||
m := types.NewMarker(prometheus.NewRegistry())
|
||||
s := NewSilencer(ss, m, log.NewNopLogger())
|
||||
|
||||
require.False(t, s.Mutes(model.LabelSet{"foo": "bar"}), "expected alert not silenced without any silences")
|
||||
|
||||
_, err = ss.Set(&pb.Silence{
|
||||
Matchers: []*pb.Matcher{{Name: "foo", Pattern: "baz"}},
|
||||
StartsAt: now.Add(-time.Hour),
|
||||
EndsAt: now.Add(5 * time.Minute),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
require.False(t, s.Mutes(model.LabelSet{"foo": "bar"}), "expected alert not silenced by non-matching silence")
|
||||
|
||||
id, err := ss.Set(&pb.Silence{
|
||||
Matchers: []*pb.Matcher{{Name: "foo", Pattern: "bar"}},
|
||||
StartsAt: now.Add(-time.Hour),
|
||||
EndsAt: now.Add(5 * time.Minute),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
require.NotEmpty(t, id)
|
||||
|
||||
require.True(t, s.Mutes(model.LabelSet{"foo": "bar"}), "expected alert silenced by matching silence")
|
||||
|
||||
now = now.Add(time.Hour) // One hour passes, silence expires.
|
||||
|
||||
require.False(t, s.Mutes(model.LabelSet{"foo": "bar"}), "expected alert not silenced by expired silence")
|
||||
|
||||
// Update silence to start in the future.
|
||||
_, err = ss.Set(&pb.Silence{
|
||||
Id: id,
|
||||
Matchers: []*pb.Matcher{{Name: "foo", Pattern: "bar"}},
|
||||
StartsAt: now.Add(time.Hour),
|
||||
EndsAt: now.Add(3 * time.Hour),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
require.False(t, s.Mutes(model.LabelSet{"foo": "bar"}), "expected alert not silenced by future silence")
|
||||
|
||||
now = now.Add(2 * time.Hour) // Two hours pass, silence becomes active.
|
||||
|
||||
// Exposes issue #2426.
|
||||
require.True(t, s.Mutes(model.LabelSet{"foo": "bar"}), "expected alert silenced by activated silence")
|
||||
|
||||
_, err = ss.Set(&pb.Silence{
|
||||
Matchers: []*pb.Matcher{{Name: "foo", Pattern: "b..", Type: pb.Matcher_REGEXP}},
|
||||
StartsAt: now.Add(time.Hour),
|
||||
EndsAt: now.Add(3 * time.Hour),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Note that issue #2426 doesn't apply anymore because we added a new silence.
|
||||
require.True(t, s.Mutes(model.LabelSet{"foo": "bar"}), "expected alert still silenced by activated silence")
|
||||
|
||||
now = now.Add(2 * time.Hour) // Two hours pass, first silence expires, overlapping second silence becomes active.
|
||||
|
||||
// Another variant of issue #2426 (overlapping silences).
|
||||
require.True(t, s.Mutes(model.LabelSet{"foo": "bar"}), "expected alert silenced by activated second silence")
|
||||
}
|
||||
|
||||
func TestValidateMatcher(t *testing.T) {
|
||||
cases := []struct {
|
||||
m *pb.Matcher
|
||||
|
|
144
types/types.go
144
types/types.go
|
@ -44,29 +44,29 @@ type AlertStatus struct {
|
|||
SilencedBy []string `json:"silencedBy"`
|
||||
InhibitedBy []string `json:"inhibitedBy"`
|
||||
|
||||
// For internal tracking, not exposed in the API.
|
||||
pendingSilences []string
|
||||
silencesVersion int
|
||||
}
|
||||
|
||||
// Marker helps to mark alerts as silenced and/or inhibited.
|
||||
// All methods are goroutine-safe.
|
||||
type Marker interface {
|
||||
// SetActive sets the provided alert to AlertStateActive and deletes all
|
||||
// SilencedBy and InhibitedBy entries.
|
||||
SetActive(alert model.Fingerprint)
|
||||
// SetSilenced replaces the previous SilencedBy by the provided IDs of
|
||||
// silences, including the version number of the silences state. The set
|
||||
// of provided IDs is supposed to represent the complete set of relevant
|
||||
// silences. If no ID is provided and InhibitedBy is already empty, this
|
||||
// call is equivalent to SetActive. Otherwise, it sets
|
||||
// AlertStateSuppressed.
|
||||
SetSilenced(alert model.Fingerprint, version int, silenceIDs ...string)
|
||||
// active and pending silences, including the version number of the
|
||||
// silences state. The set of provided IDs is supposed to represent the
|
||||
// complete set of relevant silences. If no active silence IDs are provided and
|
||||
// InhibitedBy is already empty, it sets the provided alert to AlertStateActive.
|
||||
// Otherwise, it sets the provided alert to AlertStateSuppressed.
|
||||
SetSilenced(alert model.Fingerprint, version int, activeSilenceIDs []string, pendingSilenceIDs []string)
|
||||
// SetInhibited replaces the previous InhibitedBy by the provided IDs of
|
||||
// alerts. In contrast to SetSilenced, the set of provided IDs is not
|
||||
// expected to represent the complete set of inhibiting alerts. (In
|
||||
// practice, this method is only called with one or zero IDs. However,
|
||||
// this expectation might change in the future.) If no ID is provided and
|
||||
// SilencedBy is already empty, this call is equivalent to
|
||||
// SetActive. Otherwise, it sets AlertStateSuppressed.
|
||||
// this expectation might change in the future.) If no IDs are provided
|
||||
// and InhibitedBy is already empty, it sets the provided alert to
|
||||
// AlertStateActive. Otherwise, it sets the provided alert to
|
||||
// AlertStateSuppressed.
|
||||
SetInhibited(alert model.Fingerprint, alertIDs ...string)
|
||||
|
||||
// Count alerts of the given state(s). With no state provided, count all
|
||||
|
@ -79,13 +79,13 @@ type Marker interface {
|
|||
Delete(model.Fingerprint)
|
||||
|
||||
// Various methods to inquire if the given alert is in a certain
|
||||
// AlertState. Silenced also returns all the silencing silences, while
|
||||
// Inhibited may return only a subset of inhibiting alerts. Silenced
|
||||
// also returns the version of the silences state the result is based
|
||||
// on.
|
||||
// AlertState. Silenced also returns all the active and pending
|
||||
// silences, while Inhibited may return only a subset of inhibiting
|
||||
// alerts. Silenced also returns the version of the silences state the
|
||||
// result is based on.
|
||||
Unprocessed(model.Fingerprint) bool
|
||||
Active(model.Fingerprint) bool
|
||||
Silenced(model.Fingerprint) ([]string, int, bool)
|
||||
Silenced(model.Fingerprint) (activeIDs []string, pendingIDs []string, version int, silenced bool)
|
||||
Inhibited(model.Fingerprint) ([]string, bool)
|
||||
}
|
||||
|
||||
|
@ -148,58 +148,7 @@ func (m *memMarker) Count(states ...AlertState) int {
|
|||
}
|
||||
|
||||
// SetSilenced implements Marker.
|
||||
func (m *memMarker) SetSilenced(alert model.Fingerprint, version int, ids ...string) {
|
||||
m.mtx.Lock()
|
||||
|
||||
s, found := m.m[alert]
|
||||
if !found {
|
||||
s = &AlertStatus{}
|
||||
m.m[alert] = s
|
||||
}
|
||||
s.silencesVersion = version
|
||||
|
||||
// If there are any silence or alert IDs associated with the
|
||||
// fingerprint, it is suppressed. Otherwise, set it to
|
||||
// AlertStateUnprocessed.
|
||||
if len(ids) == 0 && len(s.InhibitedBy) == 0 {
|
||||
m.mtx.Unlock()
|
||||
m.SetActive(alert)
|
||||
return
|
||||
}
|
||||
|
||||
s.State = AlertStateSuppressed
|
||||
s.SilencedBy = ids
|
||||
|
||||
m.mtx.Unlock()
|
||||
}
|
||||
|
||||
// SetInhibited implements Marker.
|
||||
func (m *memMarker) SetInhibited(alert model.Fingerprint, ids ...string) {
|
||||
m.mtx.Lock()
|
||||
|
||||
s, found := m.m[alert]
|
||||
if !found {
|
||||
s = &AlertStatus{}
|
||||
m.m[alert] = s
|
||||
}
|
||||
|
||||
// If there are any silence or alert IDs associated with the
|
||||
// fingerprint, it is suppressed. Otherwise, set it to
|
||||
// AlertStateUnprocessed.
|
||||
if len(ids) == 0 && len(s.SilencedBy) == 0 {
|
||||
m.mtx.Unlock()
|
||||
m.SetActive(alert)
|
||||
return
|
||||
}
|
||||
|
||||
s.State = AlertStateSuppressed
|
||||
s.InhibitedBy = ids
|
||||
|
||||
m.mtx.Unlock()
|
||||
}
|
||||
|
||||
// SetActive implements Marker.
|
||||
func (m *memMarker) SetActive(alert model.Fingerprint) {
|
||||
func (m *memMarker) SetSilenced(alert model.Fingerprint, version int, activeIDs []string, pendingIDs []string) {
|
||||
m.mtx.Lock()
|
||||
defer m.mtx.Unlock()
|
||||
|
||||
|
@ -208,10 +157,42 @@ func (m *memMarker) SetActive(alert model.Fingerprint) {
|
|||
s = &AlertStatus{}
|
||||
m.m[alert] = s
|
||||
}
|
||||
s.SilencedBy = activeIDs
|
||||
s.pendingSilences = pendingIDs
|
||||
s.silencesVersion = version
|
||||
|
||||
s.State = AlertStateActive
|
||||
s.SilencedBy = []string{}
|
||||
s.InhibitedBy = []string{}
|
||||
// If there are any silence or alert IDs associated with the
|
||||
// fingerprint, it is suppressed. Otherwise, set it to
|
||||
// AlertStateActive.
|
||||
if len(activeIDs) == 0 && len(s.InhibitedBy) == 0 {
|
||||
s.State = AlertStateActive
|
||||
return
|
||||
}
|
||||
|
||||
s.State = AlertStateSuppressed
|
||||
}
|
||||
|
||||
// SetInhibited implements Marker.
|
||||
func (m *memMarker) SetInhibited(alert model.Fingerprint, ids ...string) {
|
||||
m.mtx.Lock()
|
||||
defer m.mtx.Unlock()
|
||||
|
||||
s, found := m.m[alert]
|
||||
if !found {
|
||||
s = &AlertStatus{}
|
||||
m.m[alert] = s
|
||||
}
|
||||
s.InhibitedBy = ids
|
||||
|
||||
// If there are any silence or alert IDs associated with the
|
||||
// fingerprint, it is suppressed. Otherwise, set it to
|
||||
// AlertStateActive.
|
||||
if len(ids) == 0 && len(s.SilencedBy) == 0 {
|
||||
s.State = AlertStateActive
|
||||
return
|
||||
}
|
||||
|
||||
s.State = AlertStateSuppressed
|
||||
}
|
||||
|
||||
// Status implements Marker.
|
||||
|
@ -219,15 +200,14 @@ func (m *memMarker) Status(alert model.Fingerprint) AlertStatus {
|
|||
m.mtx.RLock()
|
||||
defer m.mtx.RUnlock()
|
||||
|
||||
s, found := m.m[alert]
|
||||
if !found {
|
||||
s = &AlertStatus{
|
||||
State: AlertStateUnprocessed,
|
||||
SilencedBy: []string{},
|
||||
InhibitedBy: []string{},
|
||||
}
|
||||
if s, found := m.m[alert]; found {
|
||||
return *s
|
||||
}
|
||||
return AlertStatus{
|
||||
State: AlertStateUnprocessed,
|
||||
SilencedBy: []string{},
|
||||
InhibitedBy: []string{},
|
||||
}
|
||||
return *s
|
||||
}
|
||||
|
||||
// Delete implements Marker.
|
||||
|
@ -258,9 +238,9 @@ func (m *memMarker) Inhibited(alert model.Fingerprint) ([]string, bool) {
|
|||
// Silenced returns whether the alert for the given Fingerprint is in the
|
||||
// Silenced state, any associated silence IDs, and the silences state version
|
||||
// the result is based on.
|
||||
func (m *memMarker) Silenced(alert model.Fingerprint) ([]string, int, bool) {
|
||||
func (m *memMarker) Silenced(alert model.Fingerprint) (activeIDs []string, pendingIDs []string, version int, silenced bool) {
|
||||
s := m.Status(alert)
|
||||
return s.SilencedBy, s.silencesVersion,
|
||||
return s.SilencedBy, s.pendingSilences, s.silencesVersion,
|
||||
s.State == AlertStateSuppressed && len(s.SilencedBy) > 0
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue