From e86d82ad2d95fbdd2f3c3b6b052cb82038fba95f Mon Sep 17 00:00:00 2001 From: Krasi Georgiev Date: Wed, 1 Nov 2017 11:58:00 +0000 Subject: [PATCH] Fix regression of alert rules state loss on config reload. (#3382) * incorrect map name for the group prevented copying state from existing alert rules on config reload * applyConfig test * few nits * nits 2 --- config/testdata/first.rules | 10 +++++++++ rules/manager.go | 13 +++++++---- rules/manager_test.go | 45 +++++++++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 4 deletions(-) create mode 100644 config/testdata/first.rules diff --git a/config/testdata/first.rules b/config/testdata/first.rules new file mode 100644 index 000000000..96a4c78f7 --- /dev/null +++ b/config/testdata/first.rules @@ -0,0 +1,10 @@ +groups: +- name: my-group-name + rules: + - alert: InstanceDown + expr: up == 0 + for: 1m + labels: + severity: critical + annotations: + description: "stuff's happening with {{ $labels.service }}" diff --git a/rules/manager.go b/rules/manager.go index 02846670e..865e80efe 100644 --- a/rules/manager.go +++ b/rules/manager.go @@ -494,8 +494,9 @@ func (m *Manager) ApplyConfig(conf *config.Config) error { // If there is an old group with the same identifier, stop it and wait for // it to finish the current iteration. Then copy it into the new group. - oldg, ok := m.groups[newg.name] - delete(m.groups, newg.name) + gn := groupKey(newg.name, newg.file) + oldg, ok := m.groups[gn] + delete(m.groups, gn) go func(newg *Group) { if ok { @@ -567,14 +568,18 @@ func (m *Manager) loadGroups(interval time.Duration, filenames ...string) (map[s )) } - // Group names need not be unique across filenames. - groups[rg.Name+";"+fn] = NewGroup(rg.Name, fn, itv, rules, m.opts) + groups[groupKey(rg.Name, fn)] = NewGroup(rg.Name, fn, itv, rules, m.opts) } } return groups, nil } +// Group names need not be unique across filenames. +func groupKey(name, file string) string { + return name + ";" + file +} + // RuleGroups returns the list of manager's rule groups. func (m *Manager) RuleGroups() []*Group { m.mtx.RLock() diff --git a/rules/manager_test.go b/rules/manager_test.go index 57fb2d397..6eb348f49 100644 --- a/rules/manager_test.go +++ b/rules/manager_test.go @@ -25,6 +25,7 @@ import ( "github.com/go-kit/kit/log" "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/pkg/labels" "github.com/prometheus/prometheus/pkg/timestamp" "github.com/prometheus/prometheus/pkg/value" @@ -286,3 +287,47 @@ func TestCopyState(t *testing.T) { t.Fatalf("Active alerts not as expected. Wanted: %+v Got: %+v", oldGroup.rules[0], oldGroup.rules[3]) } } + +func TestApplyConfig(t *testing.T) { + expected := map[string]labels.Labels{ + "test": labels.Labels{ + labels.Label{ + Name: "name", + Value: "value", + }, + }, + } + conf, err := config.LoadFile("../config/testdata/conf.good.yml") + if err != nil { + t.Fatalf(err.Error()) + } + ruleManager := NewManager(&ManagerOptions{ + Appendable: nil, + Notifier: nil, + QueryEngine: nil, + Context: context.Background(), + Logger: log.NewNopLogger(), + }) + ruleManager.Run() + + if err := ruleManager.ApplyConfig(conf); err != nil { + t.Fatalf(err.Error()) + } + for _, g := range ruleManager.groups { + g.seriesInPreviousEval = []map[string]labels.Labels{ + expected, + } + } + + if err := ruleManager.ApplyConfig(conf); err != nil { + t.Fatalf(err.Error()) + } + for _, g := range ruleManager.groups { + for _, actual := range g.seriesInPreviousEval { + + if !reflect.DeepEqual(expected, actual) { + t.Fatalf("Rule groups state lost after config reload. Expected: %+v Got: %+v", expected, actual) + } + } + } +}