Fix MaxSilenceSizeBytes limit causes incomplete updates of existing silences (#3897)

This commit fixes a bug where the MaxSilenceSizeBytes limit can
cause an incomplete update of existing silences, where the old
silence can be expired but the new silence cannot be created
because it would exceed the maximum size limit.

Signed-off-by: George Robinson <george.robinson@grafana.com>
This commit is contained in:
George Robinson 2024-06-25 13:23:09 +01:00 committed by GitHub
parent 58dc6f8d33
commit 94ac36b3e0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 62 additions and 28 deletions

View File

@ -550,6 +550,16 @@ func cloneSilence(sil *pb.Silence) *pb.Silence {
return &s return &s
} }
func (s *Silences) checkSizeLimits(msil *pb.MeshSilence) error {
if s.limits.MaxSilenceSizeBytes != nil {
n := msil.Size()
if m := s.limits.MaxSilenceSizeBytes(); m > 0 && n > m {
return fmt.Errorf("silence exceeded maximum size: %d bytes (limit: %d bytes)", n, m)
}
}
return nil
}
func (s *Silences) getSilence(id string) (*pb.Silence, bool) { func (s *Silences) getSilence(id string) (*pb.Silence, bool) {
msil, ok := s.st[id] msil, ok := s.st[id]
if !ok { if !ok {
@ -565,30 +575,15 @@ func (s *Silences) toMeshSilence(sil *pb.Silence) *pb.MeshSilence {
} }
} }
func (s *Silences) setSilence(sil *pb.Silence, now time.Time) error { func (s *Silences) setSilence(msil *pb.MeshSilence, now time.Time) error {
sil.UpdatedAt = now
msil := s.toMeshSilence(sil)
b, err := marshalMeshSilence(msil) b, err := marshalMeshSilence(msil)
if err != nil { if err != nil {
return err return err
} }
// Check the limit unless the silence has been expired. This is to avoid
// situations where silences cannot be expired after the limit has been
// reduced.
if s.limits.MaxSilenceSizeBytes != nil {
n := msil.Size()
if m := s.limits.MaxSilenceSizeBytes(); m > 0 && n > m && sil.EndsAt.After(now) {
return fmt.Errorf("silence exceeded maximum size: %d bytes (limit: %d bytes)", n, m)
}
}
if s.st.merge(msil, now) { if s.st.merge(msil, now) {
s.version++ s.version++
} }
s.broadcast(b) s.broadcast(b)
return nil return nil
} }
@ -613,7 +608,12 @@ func (s *Silences) Set(sil *pb.Silence) error {
} }
if ok && canUpdate(prev, sil, now) { if ok && canUpdate(prev, sil, now) {
return s.setSilence(sil, now) sil.UpdatedAt = now
msil := s.toMeshSilence(sil)
if err := s.checkSizeLimits(msil); err != nil {
return err
}
return s.setSilence(msil, now)
} }
// If we got here it's either a new silence or a replacing one (which would // If we got here it's either a new silence or a replacing one (which would
@ -625,14 +625,6 @@ func (s *Silences) Set(sil *pb.Silence) error {
} }
} }
if ok && getState(prev, s.nowUTC()) != types.SilenceStateExpired {
// We cannot update the silence, expire the old one to leave a history of
// the silence before modification.
if err := s.expire(prev.Id); err != nil {
return fmt.Errorf("expire previous silence: %w", err)
}
}
uid, err := uuid.NewV4() uid, err := uuid.NewV4()
if err != nil { if err != nil {
return fmt.Errorf("generate uuid: %w", err) return fmt.Errorf("generate uuid: %w", err)
@ -642,8 +634,22 @@ func (s *Silences) Set(sil *pb.Silence) error {
if sil.StartsAt.Before(now) { if sil.StartsAt.Before(now) {
sil.StartsAt = now sil.StartsAt = now
} }
sil.UpdatedAt = now
return s.setSilence(sil, now) msil := s.toMeshSilence(sil)
if err := s.checkSizeLimits(msil); err != nil {
return err
}
if ok && getState(prev, s.nowUTC()) != types.SilenceStateExpired {
// We cannot update the silence, expire the old one to leave a history of
// the silence before modification.
if err := s.expire(prev.Id); err != nil {
return fmt.Errorf("expire previous silence: %w", err)
}
}
return s.setSilence(msil, now)
} }
// canUpdate returns true if silence a can be updated to b without // canUpdate returns true if silence a can be updated to b without
@ -701,7 +707,8 @@ func (s *Silences) expire(id string) error {
sil.StartsAt = now sil.StartsAt = now
sil.EndsAt = now sil.EndsAt = now
} }
return s.setSilence(sil, now) sil.UpdatedAt = now
return s.setSilence(s.toMeshSilence(sil), now)
} }
// QueryParam expresses parameters along which silences are queried. // QueryParam expresses parameters along which silences are queried.

View File

@ -295,7 +295,7 @@ func TestSilencesSetSilence(t *testing.T) {
func() { func() {
s.mtx.Lock() s.mtx.Lock()
defer s.mtx.Unlock() defer s.mtx.Unlock()
require.NoError(t, s.setSilence(sil, nowpb)) require.NoError(t, s.setSilence(s.toMeshSilence(sil), nowpb))
}() }()
// Ensure broadcast was called. // Ensure broadcast was called.
@ -575,6 +575,33 @@ func TestSilenceLimits(t *testing.T) {
sil6, err = s.QueryOne(QIDs(sil6.Id)) sil6, err = s.QueryOne(QIDs(sil6.Id))
require.NoError(t, err) require.NoError(t, err)
require.Equal(t, types.SilenceStateActive, getState(sil6, s.nowUTC())) require.Equal(t, types.SilenceStateActive, getState(sil6, s.nowUTC()))
// Should not be able to update with a comment that exceeds maximum size.
// Need to increase the maximum number of silences to test this.
s.limits.MaxSilences = func() int { return 2 }
sil8 := cloneSilence(sil6)
sil8.Comment = strings.Repeat("m", 2<<11)
require.EqualError(t, s.Set(sil8), fmt.Sprintf("silence exceeded maximum size: %d bytes (limit: 4096 bytes)", s.toMeshSilence(sil8).Size()))
// sil6 should not be expired because the update failed.
sil6, err = s.QueryOne(QIDs(sil6.Id))
require.NoError(t, err)
require.Equal(t, types.SilenceStateActive, getState(sil6, s.nowUTC()))
// Should not be able to replace with a silence that exceeds maximum size.
// This is different from the previous assertion as unlike when adding or
// updating a comment, changing the matchers for a silence should expire
// the existing silence, unless the silence that is replacing it exceeds
// limits, in which case the operation should fail and the existing silence
// should still be active.
sil9 := cloneSilence(sil8)
sil9.Matchers = []*pb.Matcher{{Name: "n", Pattern: "o"}}
require.EqualError(t, s.Set(sil9), fmt.Sprintf("silence exceeded maximum size: %d bytes (limit: 4096 bytes)", s.toMeshSilence(sil9).Size()))
// sil6 should not be expired because the update failed.
sil6, err = s.QueryOne(QIDs(sil6.Id))
require.NoError(t, err)
require.Equal(t, types.SilenceStateActive, getState(sil6, s.nowUTC()))
} }
func TestSilenceNoLimits(t *testing.T) { func TestSilenceNoLimits(t *testing.T) {