Fix regression of alert rules state loss on config reload. (#3382)
* incorrect map name for the group prevented copying state from existing alert rules on config reload * applyConfig test * few nits * nits 2
This commit is contained in:
parent
3382f39046
commit
e86d82ad2d
|
@ -0,0 +1,10 @@
|
||||||
|
groups:
|
||||||
|
- name: my-group-name
|
||||||
|
rules:
|
||||||
|
- alert: InstanceDown
|
||||||
|
expr: up == 0
|
||||||
|
for: 1m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
annotations:
|
||||||
|
description: "stuff's happening with {{ $labels.service }}"
|
|
@ -494,8 +494,9 @@ func (m *Manager) ApplyConfig(conf *config.Config) error {
|
||||||
|
|
||||||
// If there is an old group with the same identifier, stop it and wait for
|
// If there is an old group with the same identifier, stop it and wait for
|
||||||
// it to finish the current iteration. Then copy it into the new group.
|
// it to finish the current iteration. Then copy it into the new group.
|
||||||
oldg, ok := m.groups[newg.name]
|
gn := groupKey(newg.name, newg.file)
|
||||||
delete(m.groups, newg.name)
|
oldg, ok := m.groups[gn]
|
||||||
|
delete(m.groups, gn)
|
||||||
|
|
||||||
go func(newg *Group) {
|
go func(newg *Group) {
|
||||||
if ok {
|
if ok {
|
||||||
|
@ -567,14 +568,18 @@ func (m *Manager) loadGroups(interval time.Duration, filenames ...string) (map[s
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Group names need not be unique across filenames.
|
groups[groupKey(rg.Name, fn)] = NewGroup(rg.Name, fn, itv, rules, m.opts)
|
||||||
groups[rg.Name+";"+fn] = NewGroup(rg.Name, fn, itv, rules, m.opts)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return groups, nil
|
return groups, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Group names need not be unique across filenames.
|
||||||
|
func groupKey(name, file string) string {
|
||||||
|
return name + ";" + file
|
||||||
|
}
|
||||||
|
|
||||||
// RuleGroups returns the list of manager's rule groups.
|
// RuleGroups returns the list of manager's rule groups.
|
||||||
func (m *Manager) RuleGroups() []*Group {
|
func (m *Manager) RuleGroups() []*Group {
|
||||||
m.mtx.RLock()
|
m.mtx.RLock()
|
||||||
|
|
|
@ -25,6 +25,7 @@ import (
|
||||||
"github.com/go-kit/kit/log"
|
"github.com/go-kit/kit/log"
|
||||||
"github.com/prometheus/common/model"
|
"github.com/prometheus/common/model"
|
||||||
|
|
||||||
|
"github.com/prometheus/prometheus/config"
|
||||||
"github.com/prometheus/prometheus/pkg/labels"
|
"github.com/prometheus/prometheus/pkg/labels"
|
||||||
"github.com/prometheus/prometheus/pkg/timestamp"
|
"github.com/prometheus/prometheus/pkg/timestamp"
|
||||||
"github.com/prometheus/prometheus/pkg/value"
|
"github.com/prometheus/prometheus/pkg/value"
|
||||||
|
@ -286,3 +287,47 @@ func TestCopyState(t *testing.T) {
|
||||||
t.Fatalf("Active alerts not as expected. Wanted: %+v Got: %+v", oldGroup.rules[0], oldGroup.rules[3])
|
t.Fatalf("Active alerts not as expected. Wanted: %+v Got: %+v", oldGroup.rules[0], oldGroup.rules[3])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestApplyConfig(t *testing.T) {
|
||||||
|
expected := map[string]labels.Labels{
|
||||||
|
"test": labels.Labels{
|
||||||
|
labels.Label{
|
||||||
|
Name: "name",
|
||||||
|
Value: "value",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
conf, err := config.LoadFile("../config/testdata/conf.good.yml")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf(err.Error())
|
||||||
|
}
|
||||||
|
ruleManager := NewManager(&ManagerOptions{
|
||||||
|
Appendable: nil,
|
||||||
|
Notifier: nil,
|
||||||
|
QueryEngine: nil,
|
||||||
|
Context: context.Background(),
|
||||||
|
Logger: log.NewNopLogger(),
|
||||||
|
})
|
||||||
|
ruleManager.Run()
|
||||||
|
|
||||||
|
if err := ruleManager.ApplyConfig(conf); err != nil {
|
||||||
|
t.Fatalf(err.Error())
|
||||||
|
}
|
||||||
|
for _, g := range ruleManager.groups {
|
||||||
|
g.seriesInPreviousEval = []map[string]labels.Labels{
|
||||||
|
expected,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := ruleManager.ApplyConfig(conf); err != nil {
|
||||||
|
t.Fatalf(err.Error())
|
||||||
|
}
|
||||||
|
for _, g := range ruleManager.groups {
|
||||||
|
for _, actual := range g.seriesInPreviousEval {
|
||||||
|
|
||||||
|
if !reflect.DeepEqual(expected, actual) {
|
||||||
|
t.Fatalf("Rule groups state lost after config reload. Expected: %+v Got: %+v", expected, actual)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue