mirror of
https://github.com/prometheus/alertmanager
synced 2025-04-24 12:07:25 +00:00
[gossip] Don't merge expired gossip messages (#1631)
* [silences] Don't merge expired silences If they're expired, they should be cleaned up on the next GC cycle, but merging them in means that they'll probably be gossip'd continually between the cluster members. Signed-off-by: stuart nelson <stuartnelson3@gmail.com> * Add analogous behavior+test for nflog The code for nflog was also constantly re-adding nflogs to the internal memory store, the same as the silence code was. Signed-off-by: stuart nelson <stuartnelson3@gmail.com> * Add retention to TestQuery With the default 0 retention, the alerts would not be merged. Signed-off-by: Stuart Nelson <stuartnelson3@gmail.com>
This commit is contained in:
parent
573389a9bb
commit
2026e4a01f
@ -226,7 +226,10 @@ func (s state) clone() state {
|
|||||||
|
|
||||||
// merge returns true or false whether the MeshEntry was merged or
|
// merge returns true or false whether the MeshEntry was merged or
|
||||||
// not. This information is used to decide to gossip the message further.
|
// not. This information is used to decide to gossip the message further.
|
||||||
func (s state) merge(e *pb.MeshEntry) bool {
|
func (s state) merge(e *pb.MeshEntry, now time.Time) bool {
|
||||||
|
if e.ExpiresAt.Before(now) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
k := stateKey(string(e.Entry.GroupKey), e.Entry.Receiver)
|
k := stateKey(string(e.Entry.GroupKey), e.Entry.Receiver)
|
||||||
|
|
||||||
prev, ok := s[k]
|
prev, ok := s[k]
|
||||||
@ -411,7 +414,7 @@ func (l *Log) Log(r *pb.Receiver, gkey string, firingAlerts, resolvedAlerts []ui
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
l.st.merge(e)
|
l.st.merge(e, l.now())
|
||||||
l.broadcast(b)
|
l.broadcast(b)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
@ -522,9 +525,10 @@ func (l *Log) Merge(b []byte) error {
|
|||||||
}
|
}
|
||||||
l.mtx.Lock()
|
l.mtx.Lock()
|
||||||
defer l.mtx.Unlock()
|
defer l.mtx.Unlock()
|
||||||
|
now := l.now()
|
||||||
|
|
||||||
for _, e := range st {
|
for _, e := range st {
|
||||||
if merged := l.st.merge(e); merged && !cluster.OversizedMessage(b) {
|
if merged := l.st.merge(e, now); merged && !cluster.OversizedMessage(b) {
|
||||||
// If this is the first we've seen the message and it's
|
// If this is the first we've seen the message and it's
|
||||||
// not oversized, gossip it to other nodes. We don't
|
// not oversized, gossip it to other nodes. We don't
|
||||||
// propagate oversized messages because they're sent to
|
// propagate oversized messages because they're sent to
|
||||||
|
@ -156,7 +156,7 @@ func TestStateMerge(t *testing.T) {
|
|||||||
|
|
||||||
// We only care about key names and timestamps for the
|
// We only care about key names and timestamps for the
|
||||||
// merging logic.
|
// merging logic.
|
||||||
newEntry := func(ts time.Time, name string) *pb.MeshEntry {
|
newEntry := func(name string, ts, exp time.Time) *pb.MeshEntry {
|
||||||
return &pb.MeshEntry{
|
return &pb.MeshEntry{
|
||||||
Entry: &pb.Entry{
|
Entry: &pb.Entry{
|
||||||
Timestamp: ts,
|
Timestamp: ts,
|
||||||
@ -167,28 +167,33 @@ func TestStateMerge(t *testing.T) {
|
|||||||
Integration: "integr",
|
Integration: "integr",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
ExpiresAt: exp,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
exp := now.Add(time.Minute)
|
||||||
|
|
||||||
cases := []struct {
|
cases := []struct {
|
||||||
a, b state
|
a, b state
|
||||||
final state
|
final state
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
a: state{
|
a: state{
|
||||||
"key:a1/integr/1": newEntry(now, "a1"),
|
"key:a1/integr/1": newEntry("a1", now, exp),
|
||||||
"key:a2/integr/1": newEntry(now, "a2"),
|
"key:a2/integr/1": newEntry("a2", now, exp),
|
||||||
"key:a3/integr/1": newEntry(now, "a3"),
|
"key:a3/integr/1": newEntry("a3", now, exp),
|
||||||
},
|
},
|
||||||
b: state{
|
b: state{
|
||||||
"key:b1/integr/1": newEntry(now, "b1"), // new key, should be added
|
"key:b1/integr/1": newEntry("b1", now, exp), // new key, should be added
|
||||||
"key:a2/integr/1": newEntry(now.Add(-time.Minute), "a2"), // older timestamp, should be dropped
|
"key:b2/integr/1": newEntry("b2", now.Add(-time.Minute), now.Add(-time.Millisecond)), // new key, expired, should not be added
|
||||||
"key:a3/integr/1": newEntry(now.Add(time.Minute), "a3"), // newer timestamp, should overwrite
|
"key:a2/integr/1": newEntry("a2", now.Add(-time.Minute), exp), // older timestamp, should be dropped
|
||||||
|
"key:a3/integr/1": newEntry("a3", now.Add(time.Minute), exp), // newer timestamp, should overwrite
|
||||||
},
|
},
|
||||||
final: state{
|
final: state{
|
||||||
"key:a1/integr/1": newEntry(now, "a1"),
|
"key:a1/integr/1": newEntry("a1", now, exp),
|
||||||
"key:a2/integr/1": newEntry(now, "a2"),
|
"key:a2/integr/1": newEntry("a2", now, exp),
|
||||||
"key:a3/integr/1": newEntry(now.Add(time.Minute), "a3"),
|
"key:a3/integr/1": newEntry("a3", now.Add(time.Minute), exp),
|
||||||
"key:b1/integr/1": newEntry(now, "b1"),
|
"key:b1/integr/1": newEntry("b1", now, exp),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@ -198,7 +203,7 @@ func TestStateMerge(t *testing.T) {
|
|||||||
|
|
||||||
res := c.a.clone()
|
res := c.a.clone()
|
||||||
for _, e := range cb {
|
for _, e := range cb {
|
||||||
res.merge(e)
|
res.merge(e, now)
|
||||||
}
|
}
|
||||||
require.Equal(t, c.final, res, "Merge result should match expectation")
|
require.Equal(t, c.final, res, "Merge result should match expectation")
|
||||||
require.Equal(t, c.b, cb, "Merged state should remain unmodified")
|
require.Equal(t, c.b, cb, "Merged state should remain unmodified")
|
||||||
@ -264,7 +269,7 @@ func TestStateDataCoding(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestQuery(t *testing.T) {
|
func TestQuery(t *testing.T) {
|
||||||
nl, err := New()
|
nl, err := New(WithRetention(time.Second))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
require.NoError(t, err, "constructing nflog failed")
|
require.NoError(t, err, "constructing nflog failed")
|
||||||
}
|
}
|
||||||
|
@ -403,7 +403,7 @@ func (s *Silences) setSilence(sil *pb.Silence) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
s.st.merge(msil)
|
s.st.merge(msil, s.now())
|
||||||
s.broadcast(b)
|
s.broadcast(b)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
@ -717,8 +717,10 @@ func (s *Silences) Merge(b []byte) error {
|
|||||||
s.mtx.Lock()
|
s.mtx.Lock()
|
||||||
defer s.mtx.Unlock()
|
defer s.mtx.Unlock()
|
||||||
|
|
||||||
|
now := s.now()
|
||||||
|
|
||||||
for _, e := range st {
|
for _, e := range st {
|
||||||
if merged := s.st.merge(e); merged && !cluster.OversizedMessage(b) {
|
if merged := s.st.merge(e, now); merged && !cluster.OversizedMessage(b) {
|
||||||
// If this is the first we've seen the message and it's
|
// If this is the first we've seen the message and it's
|
||||||
// not oversized, gossip it to other nodes. We don't
|
// not oversized, gossip it to other nodes. We don't
|
||||||
// propagate oversized messages because they're sent to
|
// propagate oversized messages because they're sent to
|
||||||
@ -739,7 +741,10 @@ func (s *Silences) SetBroadcast(f func([]byte)) {
|
|||||||
|
|
||||||
type state map[string]*pb.MeshSilence
|
type state map[string]*pb.MeshSilence
|
||||||
|
|
||||||
func (s state) merge(e *pb.MeshSilence) bool {
|
func (s state) merge(e *pb.MeshSilence, now time.Time) bool {
|
||||||
|
if e.ExpiresAt.Before(now) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
// Comments list was moved to a single comment. Apply upgrade
|
// Comments list was moved to a single comment. Apply upgrade
|
||||||
// on silences received from peers.
|
// on silences received from peers.
|
||||||
if len(e.Silence.Comments) > 0 {
|
if len(e.Silence.Comments) > 0 {
|
||||||
|
@ -988,39 +988,43 @@ func TestStateMerge(t *testing.T) {
|
|||||||
|
|
||||||
// We only care about key names and timestamps for the
|
// We only care about key names and timestamps for the
|
||||||
// merging logic.
|
// merging logic.
|
||||||
newSilence := func(id string, ts time.Time) *pb.MeshSilence {
|
newSilence := func(id string, ts, exp time.Time) *pb.MeshSilence {
|
||||||
return &pb.MeshSilence{
|
return &pb.MeshSilence{
|
||||||
Silence: &pb.Silence{Id: id, UpdatedAt: ts},
|
Silence: &pb.Silence{Id: id, UpdatedAt: ts},
|
||||||
|
ExpiresAt: exp,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
exp := now.Add(time.Minute)
|
||||||
|
|
||||||
cases := []struct {
|
cases := []struct {
|
||||||
a, b state
|
a, b state
|
||||||
final state
|
final state
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
a: state{
|
a: state{
|
||||||
"a1": newSilence("a1", now),
|
"a1": newSilence("a1", now, exp),
|
||||||
"a2": newSilence("a2", now),
|
"a2": newSilence("a2", now, exp),
|
||||||
"a3": newSilence("a3", now),
|
"a3": newSilence("a3", now, exp),
|
||||||
},
|
},
|
||||||
b: state{
|
b: state{
|
||||||
"b1": newSilence("b1", now), // new key, should be added
|
"b1": newSilence("b1", now, exp), // new key, should be added
|
||||||
"a2": newSilence("a2", now.Add(-time.Minute)), // older timestamp, should be dropped
|
"a2": newSilence("a2", now.Add(-time.Minute), exp), // older timestamp, should be dropped
|
||||||
"a3": newSilence("a3", now.Add(time.Minute)), // newer timestamp, should overwrite
|
"a3": newSilence("a3", now.Add(time.Minute), exp), // newer timestamp, should overwrite
|
||||||
|
"a4": newSilence("a4", now.Add(-time.Minute), now.Add(-time.Millisecond)), // new key, expired, should not be added
|
||||||
},
|
},
|
||||||
final: state{
|
final: state{
|
||||||
"a1": newSilence("a1", now),
|
"a1": newSilence("a1", now, exp),
|
||||||
"a2": newSilence("a2", now),
|
"a2": newSilence("a2", now, exp),
|
||||||
"a3": newSilence("a3", now.Add(time.Minute)),
|
"a3": newSilence("a3", now.Add(time.Minute), exp),
|
||||||
"b1": newSilence("b1", now),
|
"b1": newSilence("b1", now, exp),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, c := range cases {
|
for _, c := range cases {
|
||||||
for _, e := range c.b {
|
for _, e := range c.b {
|
||||||
c.a.merge(e)
|
c.a.merge(e, now)
|
||||||
}
|
}
|
||||||
|
|
||||||
require.Equal(t, c.final, c.a, "Merge result should match expectation")
|
require.Equal(t, c.final, c.a, "Merge result should match expectation")
|
||||||
|
Loading…
Reference in New Issue
Block a user