From 19c190b406c992278aaade63be92ecc7bb6a4921 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Mierzwa?= Date: Fri, 25 Sep 2020 16:48:38 +0100 Subject: [PATCH] Add a rule_group_samples metric (#7977) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This new metric allows tracking how many samples did each rule group generate. Signed-off-by: Ɓukasz Mierzwa --- rules/manager.go | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/rules/manager.go b/rules/manager.go index 476f62851..129d47638 100644 --- a/rules/manager.go +++ b/rules/manager.go @@ -63,6 +63,7 @@ type Metrics struct { groupLastEvalTime *prometheus.GaugeVec groupLastDuration *prometheus.GaugeVec groupRules *prometheus.GaugeVec + groupSamples *prometheus.GaugeVec } // NewGroupMetrics creates a new instance of Metrics and registers it with the provided registerer, @@ -146,6 +147,14 @@ func NewGroupMetrics(reg prometheus.Registerer) *Metrics { }, []string{"rule_group"}, ), + groupSamples: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: namespace, + Name: "rule_group_last_evaluation_samples", + Help: "The number of samples returned during the last rule group evaluation.", + }, + []string{"rule_group"}, + ), } if reg != nil { @@ -160,6 +169,7 @@ func NewGroupMetrics(reg prometheus.Registerer) *Metrics { m.groupLastEvalTime, m.groupLastDuration, m.groupRules, + m.groupSamples, ) } @@ -276,6 +286,7 @@ func NewGroup(o GroupOptions) *Group { metrics.groupLastEvalTime.WithLabelValues(key) metrics.groupLastDuration.WithLabelValues(key) metrics.groupRules.WithLabelValues(key).Set(float64(len(o.Rules))) + metrics.groupSamples.WithLabelValues(key) metrics.groupInterval.WithLabelValues(key).Set(o.Interval.Seconds()) return &Group{ @@ -557,6 +568,7 @@ func (g *Group) CopyState(from *Group) { // Eval runs a single evaluation cycle in which all rules are evaluated sequentially. func (g *Group) Eval(ctx context.Context, ts time.Time) { + var samplesTotal float64 for i, rule := range g.rules { select { case <-g.done: @@ -590,6 +602,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { g.metrics.evalFailures.WithLabelValues(groupKey(g.File(), g.Name())).Inc() return } + samplesTotal += float64(len(vector)) if ar, ok := rule.(*AlertingRule); ok { ar.sendAlerts(ctx, ts, g.opts.ResendDelay, g.interval, g.opts.NotifyFunc) @@ -647,6 +660,9 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { } }(i, rule) } + if g.metrics != nil { + g.metrics.groupSamples.WithLabelValues(groupKey(g.File(), g.Name())).Set(samplesTotal) + } g.cleanupStaleSeries(ctx, ts) } @@ -978,6 +994,7 @@ func (m *Manager) Update(interval time.Duration, files []string, externalLabels m.groupLastEvalTime.DeleteLabelValues(n) m.groupLastDuration.DeleteLabelValues(n) m.groupRules.DeleteLabelValues(n) + m.groupSamples.DeleteLabelValues((n)) } wg.Done() }(n, oldg)