From 56ebd5afde459dbbc53c2024279dbb36e4f66113 Mon Sep 17 00:00:00 2001 From: Julien Pivotto Date: Mon, 27 Jan 2020 13:41:32 +0100 Subject: [PATCH] Delete prometheus_rule_group metrics when groups are removed (#6693) * Delete prometheus_rule_group metrics when groups are removed Signed-off-by: Julien Pivotto --- rules/fixtures/rules2.yaml | 5 +++ rules/manager.go | 15 +++++--- rules/manager_test.go | 78 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 5 deletions(-) create mode 100644 rules/fixtures/rules2.yaml diff --git a/rules/fixtures/rules2.yaml b/rules/fixtures/rules2.yaml new file mode 100644 index 000000000..e405138f8 --- /dev/null +++ b/rules/fixtures/rules2.yaml @@ -0,0 +1,5 @@ +groups: + - name: test_2 + rules: + - record: test_2 + expr: vector(2) diff --git a/rules/manager.go b/rules/manager.go index cbb89df79..7fd107acc 100644 --- a/rules/manager.go +++ b/rules/manager.go @@ -874,7 +874,7 @@ func (m *Manager) Update(interval time.Duration, files []string, externalLabels // check if new group equals with the old group, if yes then skip it. // If not equals, stop it and wait for it to finish the current iteration. // Then copy it into the new group. - gn := groupKey(newg.name, newg.file) + gn := groupKey(newg.file, newg.name) oldg, ok := m.groups[gn] delete(m.groups, gn) @@ -901,8 +901,13 @@ func (m *Manager) Update(interval time.Duration, files []string, externalLabels } // Stop remaining old groups. - for _, oldg := range m.groups { + for n, oldg := range m.groups { oldg.stop() + if m := oldg.metrics; m != nil { + m.groupLastEvalTime.DeleteLabelValues(n) + m.groupLastDuration.DeleteLabelValues(n) + m.groupRules.DeleteLabelValues(n) + } } wg.Wait() @@ -958,7 +963,7 @@ func (m *Manager) LoadGroups( )) } - groups[groupKey(rg.Name, fn)] = NewGroup(rg.Name, fn, itv, rules, shouldRestore, m.opts) + groups[groupKey(fn, rg.Name)] = NewGroup(rg.Name, fn, itv, rules, shouldRestore, m.opts) } } @@ -966,8 +971,8 @@ func (m *Manager) LoadGroups( } // Group names need not be unique across filenames. -func groupKey(name, file string) string { - return name + ";" + file +func groupKey(file, name string) string { + return file + ";" + name } // RuleGroups returns the list of manager's rule groups. diff --git a/rules/manager_test.go b/rules/manager_test.go index f1f21ffa2..204c4f51a 100644 --- a/rules/manager_test.go +++ b/rules/manager_test.go @@ -24,6 +24,7 @@ import ( "time" "github.com/go-kit/kit/log" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" yaml "gopkg.in/yaml.v2" @@ -874,3 +875,80 @@ func TestNotify(t *testing.T) { group.Eval(ctx, time.Unix(6, 0)) testutil.Equals(t, 1, len(lastNotified)) } + +func TestMetricsUpdate(t *testing.T) { + files := []string{"fixtures/rules.yaml", "fixtures/rules2.yaml"} + metricNames := []string{ + "prometheus_rule_group_interval_seconds", + "prometheus_rule_group_last_duration_seconds", + "prometheus_rule_group_last_evaluation_timestamp_seconds", + "prometheus_rule_group_rules", + } + + storage := teststorage.New(t) + registry := prometheus.NewRegistry() + defer storage.Close() + opts := promql.EngineOpts{ + Logger: nil, + Reg: nil, + MaxConcurrent: 10, + MaxSamples: 10, + Timeout: 10 * time.Second, + } + engine := promql.NewEngine(opts) + ruleManager := NewManager(&ManagerOptions{ + Appendable: storage, + TSDB: storage, + QueryFunc: EngineQueryFunc(engine, storage), + Context: context.Background(), + Logger: log.NewNopLogger(), + Registerer: registry, + }) + ruleManager.Run() + defer ruleManager.Stop() + + countMetrics := func() int { + ms, err := registry.Gather() + testutil.Ok(t, err) + var metrics int + for _, m := range ms { + s := m.GetName() + for _, n := range metricNames { + if s == n { + metrics += len(m.Metric) + break + } + } + } + return metrics + } + + cases := []struct { + files []string + metrics int + }{ + { + files: files, + metrics: 8, + }, + { + files: files[:1], + metrics: 4, + }, + { + files: files[:0], + metrics: 0, + }, + { + files: files[1:], + metrics: 4, + }, + } + + for i, c := range cases { + err := ruleManager.Update(time.Second, c.files, nil) + testutil.Ok(t, err) + time.Sleep(2 * time.Second) + testutil.Equals(t, c.metrics, countMetrics(), "test %d: invalid count of metrics", i) + } +}