bugfix: fix leaking of Silences matcherCache entries (#3930)

* fix leaking of matcher cache entries

Signed-off-by: Ethan Hunter <ehunter@hudson-trading.com>

* improve clock logic in TestSilenceGCOverTime

Signed-off-by: Ethan Hunter <ehunter@hudson-trading.com>

* remove TestSilencesGc

Signed-off-by: Ethan Hunter <ehunter@hudson-trading.com>

* make table driven test more idiomatic

Signed-off-by: Ethan Hunter <ehunter@hudson-trading.com>

* replace test with one suggested by grobinson-grafana

Signed-off-by: Ethan Hunter <ehunter@hudson-trading.com>

* replace require.Len with require.Empty where needed

Signed-off-by: Ethan Hunter <ehunter@hudson-trading.com>

---------

Signed-off-by: Ethan Hunter <ehunter@hudson-trading.com>
Co-authored-by: gotjosh <josue.abreu@gmail.com>
This commit is contained in:
Ethan Hunter 2024-08-21 10:10:15 -06:00 committed by GitHub
parent 76527e9ada
commit 3e6356b4c9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 130 additions and 24 deletions

View File

@ -49,13 +49,13 @@ var ErrNotFound = errors.New("silence not found")
// ErrInvalidState is returned if the state isn't valid.
var ErrInvalidState = errors.New("invalid state")
type matcherCache map[*pb.Silence]labels.Matchers
type matcherCache map[string]labels.Matchers
// Get retrieves the matchers for a given silence. If it is a missed cache
// access, it compiles and adds the matchers of the requested silence to the
// cache.
func (c matcherCache) Get(s *pb.Silence) (labels.Matchers, error) {
if m, ok := c[s]; ok {
if m, ok := c[s.Id]; ok {
return m, nil
}
return c.add(s)
@ -88,7 +88,7 @@ func (c matcherCache) add(s *pb.Silence) (labels.Matchers, error) {
ms[i] = matcher
}
c[s] = ms
c[s.Id] = ms
return ms, nil
}
@ -478,7 +478,7 @@ func (s *Silences) GC() (int, error) {
}
if !sil.ExpiresAt.After(now) {
delete(s.st, id)
delete(s.mc, sil.Silence)
delete(s.mc, sil.Silence.Id)
n++
}
}

View File

@ -84,29 +84,135 @@ func TestOptionsValidate(t *testing.T) {
}
}
func TestSilencesGC(t *testing.T) {
s, err := New(Options{})
require.NoError(t, err)
func TestSilenceGCOverTime(t *testing.T) {
t.Run("GC does not remove active silences", func(t *testing.T) {
s, err := New(Options{})
require.NoError(t, err)
s.clock = clock.NewMock()
now := s.nowUTC()
s.st = state{
"1": &pb.MeshSilence{Silence: &pb.Silence{Id: "1"}, ExpiresAt: now},
"2": &pb.MeshSilence{Silence: &pb.Silence{Id: "2"}, ExpiresAt: now.Add(-time.Second)},
"3": &pb.MeshSilence{Silence: &pb.Silence{Id: "3"}, ExpiresAt: now.Add(time.Second)},
}
want := state{
"3": &pb.MeshSilence{Silence: &pb.Silence{Id: "3"}, ExpiresAt: now.Add(time.Second)},
}
n, err := s.GC()
require.NoError(t, err)
require.Equal(t, 2, n)
require.Equal(t, want, s.st)
})
s.clock = clock.NewMock()
now := s.nowUTC()
t.Run("GC does not leak cache entries", func(t *testing.T) {
s, err := New(Options{})
require.NoError(t, err)
clock := clock.NewMock()
s.clock = clock
sil1 := &pb.Silence{
Matchers: []*pb.Matcher{{
Type: pb.Matcher_EQUAL,
Name: "foo",
Pattern: "bar",
}},
StartsAt: clock.Now(),
EndsAt: clock.Now().Add(time.Minute),
}
require.NoError(t, s.Set(sil1))
// Need to query the silence to populate the matcher cache.
s.Query(QMatches(model.LabelSet{"foo": "bar"}))
require.Len(t, s.st, 1)
require.Len(t, s.mc, 1)
// Move time forward and both silence and cache entry should be garbage
// collected.
clock.Add(time.Minute)
n, err := s.GC()
require.NoError(t, err)
require.Equal(t, 1, n)
require.Empty(t, s.st)
require.Empty(t, s.mc)
})
newSilence := func(exp time.Time) *pb.MeshSilence {
return &pb.MeshSilence{ExpiresAt: exp}
}
s.st = state{
"1": newSilence(now),
"2": newSilence(now.Add(-time.Second)),
"3": newSilence(now.Add(time.Second)),
}
want := state{
"3": newSilence(now.Add(time.Second)),
}
t.Run("replacing a silences does not leak cache entries", func(t *testing.T) {
s, err := New(Options{})
require.NoError(t, err)
clock := clock.NewMock()
s.clock = clock
sil1 := &pb.Silence{
Matchers: []*pb.Matcher{{
Type: pb.Matcher_EQUAL,
Name: "foo",
Pattern: "bar",
}},
StartsAt: clock.Now(),
EndsAt: clock.Now().Add(time.Minute),
}
require.NoError(t, s.Set(sil1))
// Need to query the silence to populate the matcher cache.
s.Query(QMatches(model.LabelSet{"foo": "bar"}))
require.Len(t, s.st, 1)
require.Len(t, s.mc, 1)
// must clone sil1 before replacing it.
sil2 := cloneSilence(sil1)
sil2.Matchers = []*pb.Matcher{{
Type: pb.Matcher_EQUAL,
Name: "bar",
Pattern: "baz",
}}
require.NoError(t, s.Set(sil2))
// Need to query the silence to populate the matcher cache.
s.Query(QMatches(model.LabelSet{"bar": "baz"}))
require.Len(t, s.st, 2)
require.Len(t, s.mc, 2)
// Move time forward and both silence and cache entry should be garbage
// collected.
clock.Add(time.Minute)
n, err := s.GC()
require.NoError(t, err)
require.Equal(t, 2, n)
require.Empty(t, s.st)
require.Empty(t, s.mc)
})
n, err := s.GC()
require.NoError(t, err)
require.Equal(t, 2, n)
require.Equal(t, want, s.st)
// This test checks for a memory leak that occurred in the matcher cache when
// updating an existing silence.
t.Run("updating a silences does not leak cache entries", func(t *testing.T) {
s, err := New(Options{})
require.NoError(t, err)
clock := clock.NewMock()
s.clock = clock
sil1 := &pb.Silence{
Id: "1",
Matchers: []*pb.Matcher{{
Type: pb.Matcher_EQUAL,
Name: "foo",
Pattern: "bar",
}},
StartsAt: clock.Now(),
EndsAt: clock.Now().Add(time.Minute),
}
s.st["1"] = &pb.MeshSilence{Silence: sil1, ExpiresAt: clock.Now().Add(time.Minute)}
// Need to query the silence to populate the matcher cache.
s.Query(QMatches(model.LabelSet{"foo": "bar"}))
require.Len(t, s.mc, 1)
// must clone sil1 before updating it.
sil2 := cloneSilence(sil1)
require.NoError(t, s.Set(sil2))
// The memory leak occurred because updating a silence would add a new
// entry in the matcher cache even though no new silence was created.
// This check asserts that this no longer happens.
s.Query(QMatches(model.LabelSet{"foo": "bar"}))
require.Len(t, s.st, 1)
require.Len(t, s.mc, 1)
// Move time forward and both silence and cache entry should be garbage
// collected.
clock.Add(time.Minute)
n, err := s.GC()
require.NoError(t, err)
require.Equal(t, 1, n)
require.Empty(t, s.st)
require.Empty(t, s.mc)
})
}
func TestSilencesSnapshot(t *testing.T) {