Delete prometheus_rule_group metrics when groups are removed (#6693)
* Delete prometheus_rule_group metrics when groups are removed Signed-off-by: Julien Pivotto <roidelapluie@inuits.eu>
This commit is contained in:
parent
cf42888e4d
commit
56ebd5afde
|
@ -0,0 +1,5 @@
|
|||
groups:
|
||||
- name: test_2
|
||||
rules:
|
||||
- record: test_2
|
||||
expr: vector(2)
|
|
@ -874,7 +874,7 @@ func (m *Manager) Update(interval time.Duration, files []string, externalLabels
|
|||
// check if new group equals with the old group, if yes then skip it.
|
||||
// If not equals, stop it and wait for it to finish the current iteration.
|
||||
// Then copy it into the new group.
|
||||
gn := groupKey(newg.name, newg.file)
|
||||
gn := groupKey(newg.file, newg.name)
|
||||
oldg, ok := m.groups[gn]
|
||||
delete(m.groups, gn)
|
||||
|
||||
|
@ -901,8 +901,13 @@ func (m *Manager) Update(interval time.Duration, files []string, externalLabels
|
|||
}
|
||||
|
||||
// Stop remaining old groups.
|
||||
for _, oldg := range m.groups {
|
||||
for n, oldg := range m.groups {
|
||||
oldg.stop()
|
||||
if m := oldg.metrics; m != nil {
|
||||
m.groupLastEvalTime.DeleteLabelValues(n)
|
||||
m.groupLastDuration.DeleteLabelValues(n)
|
||||
m.groupRules.DeleteLabelValues(n)
|
||||
}
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
|
@ -958,7 +963,7 @@ func (m *Manager) LoadGroups(
|
|||
))
|
||||
}
|
||||
|
||||
groups[groupKey(rg.Name, fn)] = NewGroup(rg.Name, fn, itv, rules, shouldRestore, m.opts)
|
||||
groups[groupKey(fn, rg.Name)] = NewGroup(rg.Name, fn, itv, rules, shouldRestore, m.opts)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -966,8 +971,8 @@ func (m *Manager) LoadGroups(
|
|||
}
|
||||
|
||||
// Group names need not be unique across filenames.
|
||||
func groupKey(name, file string) string {
|
||||
return name + ";" + file
|
||||
func groupKey(file, name string) string {
|
||||
return file + ";" + name
|
||||
}
|
||||
|
||||
// RuleGroups returns the list of manager's rule groups.
|
||||
|
|
|
@ -24,6 +24,7 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/go-kit/kit/log"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/common/model"
|
||||
yaml "gopkg.in/yaml.v2"
|
||||
|
||||
|
@ -874,3 +875,80 @@ func TestNotify(t *testing.T) {
|
|||
group.Eval(ctx, time.Unix(6, 0))
|
||||
testutil.Equals(t, 1, len(lastNotified))
|
||||
}
|
||||
|
||||
func TestMetricsUpdate(t *testing.T) {
|
||||
files := []string{"fixtures/rules.yaml", "fixtures/rules2.yaml"}
|
||||
metricNames := []string{
|
||||
"prometheus_rule_group_interval_seconds",
|
||||
"prometheus_rule_group_last_duration_seconds",
|
||||
"prometheus_rule_group_last_evaluation_timestamp_seconds",
|
||||
"prometheus_rule_group_rules",
|
||||
}
|
||||
|
||||
storage := teststorage.New(t)
|
||||
registry := prometheus.NewRegistry()
|
||||
defer storage.Close()
|
||||
opts := promql.EngineOpts{
|
||||
Logger: nil,
|
||||
Reg: nil,
|
||||
MaxConcurrent: 10,
|
||||
MaxSamples: 10,
|
||||
Timeout: 10 * time.Second,
|
||||
}
|
||||
engine := promql.NewEngine(opts)
|
||||
ruleManager := NewManager(&ManagerOptions{
|
||||
Appendable: storage,
|
||||
TSDB: storage,
|
||||
QueryFunc: EngineQueryFunc(engine, storage),
|
||||
Context: context.Background(),
|
||||
Logger: log.NewNopLogger(),
|
||||
Registerer: registry,
|
||||
})
|
||||
ruleManager.Run()
|
||||
defer ruleManager.Stop()
|
||||
|
||||
countMetrics := func() int {
|
||||
ms, err := registry.Gather()
|
||||
testutil.Ok(t, err)
|
||||
var metrics int
|
||||
for _, m := range ms {
|
||||
s := m.GetName()
|
||||
for _, n := range metricNames {
|
||||
if s == n {
|
||||
metrics += len(m.Metric)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return metrics
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
files []string
|
||||
metrics int
|
||||
}{
|
||||
{
|
||||
files: files,
|
||||
metrics: 8,
|
||||
},
|
||||
{
|
||||
files: files[:1],
|
||||
metrics: 4,
|
||||
},
|
||||
{
|
||||
files: files[:0],
|
||||
metrics: 0,
|
||||
},
|
||||
{
|
||||
files: files[1:],
|
||||
metrics: 4,
|
||||
},
|
||||
}
|
||||
|
||||
for i, c := range cases {
|
||||
err := ruleManager.Update(time.Second, c.files, nil)
|
||||
testutil.Ok(t, err)
|
||||
time.Sleep(2 * time.Second)
|
||||
testutil.Equals(t, c.metrics, countMetrics(), "test %d: invalid count of metrics", i)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue