Merge pull request #2590 from prometheus/beorn7/silence

Include pending silences for future muting decisions
This commit is contained in:
Björn Rabenstein 2021-05-31 18:14:18 +02:00 committed by GitHub
commit 22bb972d1a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 176 additions and 116 deletions

View File

@ -673,7 +673,7 @@ func TestMuteStageWithSilences(t *testing.T) {
// Set the second alert as previously silenced with an old version
// number. This is expected to get unsilenced by the stage.
marker.SetSilenced(inAlerts[1].Fingerprint(), 0, "123")
marker.SetSilenced(inAlerts[1].Fingerprint(), 0, []string{"123"}, nil)
_, alerts, err := stage.Exec(context.Background(), log.NewNopLogger(), inAlerts...)
if err != nil {

View File

@ -296,7 +296,8 @@ func TestAlertsGC(t *testing.T) {
}
for _, a := range insert {
marker.SetActive(a.Fingerprint())
marker.SetSilenced(a.Fingerprint(), 0, nil, nil)
marker.SetInhibited(a.Fingerprint())
if !marker.Active(a.Fingerprint()) {
t.Errorf("error setting status: %v", a)
}

View File

@ -113,17 +113,19 @@ func NewSilencer(s *Silences, m types.Marker, l log.Logger) *Silencer {
// Mutes implements the Muter interface.
func (s *Silencer) Mutes(lset model.LabelSet) bool {
fp := lset.Fingerprint()
ids, markerVersion, _ := s.marker.Silenced(fp)
activeIDs, pendingIDs, markerVersion, _ := s.marker.Silenced(fp)
var (
err error
sils []*pb.Silence
allSils []*pb.Silence
newVersion = markerVersion
)
if markerVersion == s.silences.Version() {
totalSilences := len(activeIDs) + len(pendingIDs)
// No new silences added, just need to check which of the old
// silences are still relevant.
if len(ids) == 0 {
// silences are still relevant and which of the pending ones
// have become active.
if totalSilences == 0 {
// Super fast path: No silences ever applied to this
// alert, none have been added. We are done.
return false
@ -134,47 +136,55 @@ func (s *Silencer) Mutes(lset model.LabelSet) bool {
// markerVersion because the Query call might already return a
// newer version, which is not the version our old list of
// applicable silences is based on.
sils, _, err = s.silences.Query(
QIDs(ids...),
QState(types.SilenceStateActive),
allIDs := append(append(make([]string, 0, totalSilences), activeIDs...), pendingIDs...)
allSils, _, err = s.silences.Query(
QIDs(allIDs...),
QState(types.SilenceStateActive, types.SilenceStatePending),
)
} else {
// New silences have been added, do a full query.
sils, newVersion, err = s.silences.Query(
QState(types.SilenceStateActive),
allSils, newVersion, err = s.silences.Query(
QState(types.SilenceStateActive, types.SilenceStatePending),
QMatches(lset),
)
}
if err != nil {
level.Error(s.logger).Log("msg", "Querying silences failed, alerts might not get silenced correctly", "err", err)
}
if len(sils) == 0 {
s.marker.SetSilenced(fp, newVersion)
if len(allSils) == 0 {
// Easy case, neither active nor pending silences anymore.
s.marker.SetSilenced(fp, newVersion, nil, nil)
return false
}
idsChanged := len(sils) != len(ids)
if !idsChanged {
// Length is the same, but is the content the same?
for i, s := range sils {
if ids[i] != s.Id {
idsChanged = true
break
}
// It is still possible that nothing has changed, but finding out is not
// much less effort than just recreating the IDs from the query
// result. So let's do it in any case. Note that we cannot reuse the
// current ID slices for concurrency reasons.
activeIDs, pendingIDs = nil, nil
now := s.silences.now()
for _, sil := range allSils {
switch getState(sil, now) {
case types.SilenceStatePending:
pendingIDs = append(pendingIDs, sil.Id)
case types.SilenceStateActive:
activeIDs = append(activeIDs, sil.Id)
default:
// Do nothing, silence has expired in the meantime.
}
}
if idsChanged {
// Need to recreate ids.
ids = make([]string, len(sils))
for i, s := range sils {
ids[i] = s.Id
}
sort.Strings(ids) // For comparability.
}
if idsChanged || newVersion != markerVersion {
// Update marker only if something changed.
s.marker.SetSilenced(fp, newVersion, ids...)
}
return true
level.Debug(s.logger).Log(
"msg", "determined current silences state",
"now", now,
"total", len(allSils),
"active", len(activeIDs),
"pending", len(pendingIDs),
)
sort.Strings(activeIDs)
sort.Strings(pendingIDs)
s.marker.SetSilenced(fp, newVersion, activeIDs, pendingIDs)
return len(activeIDs) > 0
}
// Silences holds a silence state that can be modified, queried, and snapshot.

View File

@ -22,9 +22,11 @@ import (
"testing"
"time"
"github.com/go-kit/kit/log"
"github.com/matttproud/golang_protobuf_extensions/pbutil"
pb "github.com/prometheus/alertmanager/silence/silencepb"
"github.com/prometheus/alertmanager/types"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
)
@ -904,6 +906,73 @@ func TestSilenceExpireWithZeroRetention(t *testing.T) {
require.Equal(t, 3, count)
}
func TestSilencer(t *testing.T) {
ss, err := New(Options{Retention: time.Hour})
require.NoError(t, err)
now := time.Now()
ss.now = func() time.Time { return now }
m := types.NewMarker(prometheus.NewRegistry())
s := NewSilencer(ss, m, log.NewNopLogger())
require.False(t, s.Mutes(model.LabelSet{"foo": "bar"}), "expected alert not silenced without any silences")
_, err = ss.Set(&pb.Silence{
Matchers: []*pb.Matcher{{Name: "foo", Pattern: "baz"}},
StartsAt: now.Add(-time.Hour),
EndsAt: now.Add(5 * time.Minute),
})
require.NoError(t, err)
require.False(t, s.Mutes(model.LabelSet{"foo": "bar"}), "expected alert not silenced by non-matching silence")
id, err := ss.Set(&pb.Silence{
Matchers: []*pb.Matcher{{Name: "foo", Pattern: "bar"}},
StartsAt: now.Add(-time.Hour),
EndsAt: now.Add(5 * time.Minute),
})
require.NoError(t, err)
require.NotEmpty(t, id)
require.True(t, s.Mutes(model.LabelSet{"foo": "bar"}), "expected alert silenced by matching silence")
now = now.Add(time.Hour) // One hour passes, silence expires.
require.False(t, s.Mutes(model.LabelSet{"foo": "bar"}), "expected alert not silenced by expired silence")
// Update silence to start in the future.
_, err = ss.Set(&pb.Silence{
Id: id,
Matchers: []*pb.Matcher{{Name: "foo", Pattern: "bar"}},
StartsAt: now.Add(time.Hour),
EndsAt: now.Add(3 * time.Hour),
})
require.NoError(t, err)
require.False(t, s.Mutes(model.LabelSet{"foo": "bar"}), "expected alert not silenced by future silence")
now = now.Add(2 * time.Hour) // Two hours pass, silence becomes active.
// Exposes issue #2426.
require.True(t, s.Mutes(model.LabelSet{"foo": "bar"}), "expected alert silenced by activated silence")
_, err = ss.Set(&pb.Silence{
Matchers: []*pb.Matcher{{Name: "foo", Pattern: "b..", Type: pb.Matcher_REGEXP}},
StartsAt: now.Add(time.Hour),
EndsAt: now.Add(3 * time.Hour),
})
require.NoError(t, err)
// Note that issue #2426 doesn't apply anymore because we added a new silence.
require.True(t, s.Mutes(model.LabelSet{"foo": "bar"}), "expected alert still silenced by activated silence")
now = now.Add(2 * time.Hour) // Two hours pass, first silence expires, overlapping second silence becomes active.
// Another variant of issue #2426 (overlapping silences).
require.True(t, s.Mutes(model.LabelSet{"foo": "bar"}), "expected alert silenced by activated second silence")
}
func TestValidateMatcher(t *testing.T) {
cases := []struct {
m *pb.Matcher

View File

@ -44,29 +44,29 @@ type AlertStatus struct {
SilencedBy []string `json:"silencedBy"`
InhibitedBy []string `json:"inhibitedBy"`
// For internal tracking, not exposed in the API.
pendingSilences []string
silencesVersion int
}
// Marker helps to mark alerts as silenced and/or inhibited.
// All methods are goroutine-safe.
type Marker interface {
// SetActive sets the provided alert to AlertStateActive and deletes all
// SilencedBy and InhibitedBy entries.
SetActive(alert model.Fingerprint)
// SetSilenced replaces the previous SilencedBy by the provided IDs of
// silences, including the version number of the silences state. The set
// of provided IDs is supposed to represent the complete set of relevant
// silences. If no ID is provided and InhibitedBy is already empty, this
// call is equivalent to SetActive. Otherwise, it sets
// AlertStateSuppressed.
SetSilenced(alert model.Fingerprint, version int, silenceIDs ...string)
// active and pending silences, including the version number of the
// silences state. The set of provided IDs is supposed to represent the
// complete set of relevant silences. If no active silence IDs are provided and
// InhibitedBy is already empty, it sets the provided alert to AlertStateActive.
// Otherwise, it sets the provided alert to AlertStateSuppressed.
SetSilenced(alert model.Fingerprint, version int, activeSilenceIDs []string, pendingSilenceIDs []string)
// SetInhibited replaces the previous InhibitedBy by the provided IDs of
// alerts. In contrast to SetSilenced, the set of provided IDs is not
// expected to represent the complete set of inhibiting alerts. (In
// practice, this method is only called with one or zero IDs. However,
// this expectation might change in the future.) If no ID is provided and
// SilencedBy is already empty, this call is equivalent to
// SetActive. Otherwise, it sets AlertStateSuppressed.
// this expectation might change in the future.) If no IDs are provided
// and InhibitedBy is already empty, it sets the provided alert to
// AlertStateActive. Otherwise, it sets the provided alert to
// AlertStateSuppressed.
SetInhibited(alert model.Fingerprint, alertIDs ...string)
// Count alerts of the given state(s). With no state provided, count all
@ -79,13 +79,13 @@ type Marker interface {
Delete(model.Fingerprint)
// Various methods to inquire if the given alert is in a certain
// AlertState. Silenced also returns all the silencing silences, while
// Inhibited may return only a subset of inhibiting alerts. Silenced
// also returns the version of the silences state the result is based
// on.
// AlertState. Silenced also returns all the active and pending
// silences, while Inhibited may return only a subset of inhibiting
// alerts. Silenced also returns the version of the silences state the
// result is based on.
Unprocessed(model.Fingerprint) bool
Active(model.Fingerprint) bool
Silenced(model.Fingerprint) ([]string, int, bool)
Silenced(model.Fingerprint) (activeIDs []string, pendingIDs []string, version int, silenced bool)
Inhibited(model.Fingerprint) ([]string, bool)
}
@ -148,58 +148,7 @@ func (m *memMarker) Count(states ...AlertState) int {
}
// SetSilenced implements Marker.
func (m *memMarker) SetSilenced(alert model.Fingerprint, version int, ids ...string) {
m.mtx.Lock()
s, found := m.m[alert]
if !found {
s = &AlertStatus{}
m.m[alert] = s
}
s.silencesVersion = version
// If there are any silence or alert IDs associated with the
// fingerprint, it is suppressed. Otherwise, set it to
// AlertStateUnprocessed.
if len(ids) == 0 && len(s.InhibitedBy) == 0 {
m.mtx.Unlock()
m.SetActive(alert)
return
}
s.State = AlertStateSuppressed
s.SilencedBy = ids
m.mtx.Unlock()
}
// SetInhibited implements Marker.
func (m *memMarker) SetInhibited(alert model.Fingerprint, ids ...string) {
m.mtx.Lock()
s, found := m.m[alert]
if !found {
s = &AlertStatus{}
m.m[alert] = s
}
// If there are any silence or alert IDs associated with the
// fingerprint, it is suppressed. Otherwise, set it to
// AlertStateUnprocessed.
if len(ids) == 0 && len(s.SilencedBy) == 0 {
m.mtx.Unlock()
m.SetActive(alert)
return
}
s.State = AlertStateSuppressed
s.InhibitedBy = ids
m.mtx.Unlock()
}
// SetActive implements Marker.
func (m *memMarker) SetActive(alert model.Fingerprint) {
func (m *memMarker) SetSilenced(alert model.Fingerprint, version int, activeIDs []string, pendingIDs []string) {
m.mtx.Lock()
defer m.mtx.Unlock()
@ -208,10 +157,42 @@ func (m *memMarker) SetActive(alert model.Fingerprint) {
s = &AlertStatus{}
m.m[alert] = s
}
s.SilencedBy = activeIDs
s.pendingSilences = pendingIDs
s.silencesVersion = version
s.State = AlertStateActive
s.SilencedBy = []string{}
s.InhibitedBy = []string{}
// If there are any silence or alert IDs associated with the
// fingerprint, it is suppressed. Otherwise, set it to
// AlertStateActive.
if len(activeIDs) == 0 && len(s.InhibitedBy) == 0 {
s.State = AlertStateActive
return
}
s.State = AlertStateSuppressed
}
// SetInhibited implements Marker.
func (m *memMarker) SetInhibited(alert model.Fingerprint, ids ...string) {
m.mtx.Lock()
defer m.mtx.Unlock()
s, found := m.m[alert]
if !found {
s = &AlertStatus{}
m.m[alert] = s
}
s.InhibitedBy = ids
// If there are any silence or alert IDs associated with the
// fingerprint, it is suppressed. Otherwise, set it to
// AlertStateActive.
if len(ids) == 0 && len(s.SilencedBy) == 0 {
s.State = AlertStateActive
return
}
s.State = AlertStateSuppressed
}
// Status implements Marker.
@ -219,15 +200,14 @@ func (m *memMarker) Status(alert model.Fingerprint) AlertStatus {
m.mtx.RLock()
defer m.mtx.RUnlock()
s, found := m.m[alert]
if !found {
s = &AlertStatus{
State: AlertStateUnprocessed,
SilencedBy: []string{},
InhibitedBy: []string{},
}
if s, found := m.m[alert]; found {
return *s
}
return AlertStatus{
State: AlertStateUnprocessed,
SilencedBy: []string{},
InhibitedBy: []string{},
}
return *s
}
// Delete implements Marker.
@ -258,9 +238,9 @@ func (m *memMarker) Inhibited(alert model.Fingerprint) ([]string, bool) {
// Silenced returns whether the alert for the given Fingerprint is in the
// Silenced state, any associated silence IDs, and the silences state version
// the result is based on.
func (m *memMarker) Silenced(alert model.Fingerprint) ([]string, int, bool) {
func (m *memMarker) Silenced(alert model.Fingerprint) (activeIDs []string, pendingIDs []string, version int, silenced bool) {
s := m.Status(alert)
return s.SilencedBy, s.silencesVersion,
return s.SilencedBy, s.pendingSilences, s.silencesVersion,
s.State == AlertStateSuppressed && len(s.SilencedBy) > 0
}