From 9449bd1fa974cb1f148b0bfce9a1e25f91afecf6 Mon Sep 17 00:00:00 2001 From: Jose Donizetti Date: Thu, 28 Sep 2017 05:25:35 -0300 Subject: [PATCH] Ignore expired silences OnGossip (#999) This will fix the bug of resync deleted silences due to the state of other peers. --- silence/silence.go | 17 +++++++++-- silence/silence_test.go | 63 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 77 insertions(+), 3 deletions(-) diff --git a/silence/silence.go b/silence/silence.go index 3c72eacb..1a869264 100644 --- a/silence/silence.go +++ b/silence/silence.go @@ -291,6 +291,7 @@ func (s *Silences) GC() (int, error) { n++ } } + return n, nil } @@ -833,8 +834,8 @@ func (gd *gossipData) Merge(other mesh.GossipData) mesh.GossipData { return gd } -// mergeDelta behaves like Merge but returns a gossipData only -// containing things that have changed. +// mergeDelta behaves like Merge but ignores expired silences, and +// returns a gossipData only containing things that have changed. func (gd *gossipData) mergeDelta(od *gossipData) *gossipData { delta := newGossipData() @@ -845,6 +846,18 @@ func (gd *gossipData) mergeDelta(od *gossipData) *gossipData { defer gd.mtx.Unlock() for id, s := range od.data { + // If a gossiped silence is expired, skip it. + // For a given silence duration exceeding a few minutes, + // active silences will have already been gossiped. + // Once the active silence is gossiped, its expiration + // should happen more or less simultaneously on the different + // alertmanager nodes. Preventing the gossiping of expired + // silences allows them to be GC'd, and doesn't affect + // consistency across the mesh. + if !s.ExpiresAt.After(utcNow()) { + continue + } + // Comments list was moved to a single comment. Apply upgrade // on silences received from peers. if len(s.Silence.Comments) > 0 { diff --git a/silence/silence_test.go b/silence/silence_test.go index eed40604..6fae48a5 100644 --- a/silence/silence_test.go +++ b/silence/silence_test.go @@ -1015,6 +1015,7 @@ func TestGossipDataMerge(t *testing.T) { Silence: &pb.Silence{UpdatedAt: ts}, } } + cases := []struct { a, b *gossipData final, delta *gossipData @@ -1059,8 +1060,68 @@ func TestGossipDataMerge(t *testing.T) { require.Equal(t, c.final, res, "Merge result should match expectation") require.Equal(t, c.final, ca, "Merge should apply changes to original state") require.Equal(t, c.b, cb, "Merged state should remain unmodified") + } +} - ca, cb = c.a.clone(), c.b.clone() +func TestGossipDataMergeDelta(t *testing.T) { + now := utcNow() + + // We only care about key names and timestamps for the + // merging logic. + newSilence := func(ts time.Time) *pb.MeshSilence { + return &pb.MeshSilence{ + Silence: &pb.Silence{UpdatedAt: ts}, + ExpiresAt: ts.Add(time.Hour), + } + } + + newExpiredSilence := func(ts time.Time) *pb.MeshSilence { + return &pb.MeshSilence{ + Silence: &pb.Silence{UpdatedAt: ts}, + ExpiresAt: ts, + } + } + + cases := []struct { + a, b *gossipData + final, delta *gossipData + }{ + { + a: &gossipData{ + data: silenceMap{ + "a1": newSilence(now), + "a2": newSilence(now), + "a3": newSilence(now), + }, + }, + b: &gossipData{ + data: silenceMap{ + "b1": newSilence(now), // new key, should be added + "a2": newSilence(now.Add(-time.Minute)), // older timestamp, should be dropped + "a3": newSilence(now.Add(time.Minute)), // newer timestamp, should overwrite + "a4": newExpiredSilence(now.Add(-time.Minute)), // expired, should be dropped + "a5": newExpiredSilence(now.Add(-time.Hour)), // expired, should be dropped + }, + }, + final: &gossipData{ + data: silenceMap{ + "a1": newSilence(now), + "a2": newSilence(now), + "a3": newSilence(now.Add(time.Minute)), + "b1": newSilence(now), + }, + }, + delta: &gossipData{ + data: silenceMap{ + "b1": newSilence(now), + "a3": newSilence(now.Add(time.Minute)), + }, + }, + }, + } + + for _, c := range cases { + ca, cb := c.a.clone(), c.b.clone() delta := ca.mergeDelta(cb)