Add a rule_group_samples metric (#7977)

This new metric allows tracking how many samples did each rule group generate.

Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
This commit is contained in:
Łukasz Mierzwa 2020-09-25 16:48:38 +01:00 committed by GitHub
parent 072b9649a3
commit 19c190b406
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -63,6 +63,7 @@ type Metrics struct {
groupLastEvalTime *prometheus.GaugeVec groupLastEvalTime *prometheus.GaugeVec
groupLastDuration *prometheus.GaugeVec groupLastDuration *prometheus.GaugeVec
groupRules *prometheus.GaugeVec groupRules *prometheus.GaugeVec
groupSamples *prometheus.GaugeVec
} }
// NewGroupMetrics creates a new instance of Metrics and registers it with the provided registerer, // NewGroupMetrics creates a new instance of Metrics and registers it with the provided registerer,
@ -146,6 +147,14 @@ func NewGroupMetrics(reg prometheus.Registerer) *Metrics {
}, },
[]string{"rule_group"}, []string{"rule_group"},
), ),
groupSamples: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Name: "rule_group_last_evaluation_samples",
Help: "The number of samples returned during the last rule group evaluation.",
},
[]string{"rule_group"},
),
} }
if reg != nil { if reg != nil {
@ -160,6 +169,7 @@ func NewGroupMetrics(reg prometheus.Registerer) *Metrics {
m.groupLastEvalTime, m.groupLastEvalTime,
m.groupLastDuration, m.groupLastDuration,
m.groupRules, m.groupRules,
m.groupSamples,
) )
} }
@ -276,6 +286,7 @@ func NewGroup(o GroupOptions) *Group {
metrics.groupLastEvalTime.WithLabelValues(key) metrics.groupLastEvalTime.WithLabelValues(key)
metrics.groupLastDuration.WithLabelValues(key) metrics.groupLastDuration.WithLabelValues(key)
metrics.groupRules.WithLabelValues(key).Set(float64(len(o.Rules))) metrics.groupRules.WithLabelValues(key).Set(float64(len(o.Rules)))
metrics.groupSamples.WithLabelValues(key)
metrics.groupInterval.WithLabelValues(key).Set(o.Interval.Seconds()) metrics.groupInterval.WithLabelValues(key).Set(o.Interval.Seconds())
return &Group{ return &Group{
@ -557,6 +568,7 @@ func (g *Group) CopyState(from *Group) {
// Eval runs a single evaluation cycle in which all rules are evaluated sequentially. // Eval runs a single evaluation cycle in which all rules are evaluated sequentially.
func (g *Group) Eval(ctx context.Context, ts time.Time) { func (g *Group) Eval(ctx context.Context, ts time.Time) {
var samplesTotal float64
for i, rule := range g.rules { for i, rule := range g.rules {
select { select {
case <-g.done: case <-g.done:
@ -590,6 +602,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
g.metrics.evalFailures.WithLabelValues(groupKey(g.File(), g.Name())).Inc() g.metrics.evalFailures.WithLabelValues(groupKey(g.File(), g.Name())).Inc()
return return
} }
samplesTotal += float64(len(vector))
if ar, ok := rule.(*AlertingRule); ok { if ar, ok := rule.(*AlertingRule); ok {
ar.sendAlerts(ctx, ts, g.opts.ResendDelay, g.interval, g.opts.NotifyFunc) ar.sendAlerts(ctx, ts, g.opts.ResendDelay, g.interval, g.opts.NotifyFunc)
@ -647,6 +660,9 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
} }
}(i, rule) }(i, rule)
} }
if g.metrics != nil {
g.metrics.groupSamples.WithLabelValues(groupKey(g.File(), g.Name())).Set(samplesTotal)
}
g.cleanupStaleSeries(ctx, ts) g.cleanupStaleSeries(ctx, ts)
} }
@ -978,6 +994,7 @@ func (m *Manager) Update(interval time.Duration, files []string, externalLabels
m.groupLastEvalTime.DeleteLabelValues(n) m.groupLastEvalTime.DeleteLabelValues(n)
m.groupLastDuration.DeleteLabelValues(n) m.groupLastDuration.DeleteLabelValues(n)
m.groupRules.DeleteLabelValues(n) m.groupRules.DeleteLabelValues(n)
m.groupSamples.DeleteLabelValues((n))
} }
wg.Done() wg.Done()
}(n, oldg) }(n, oldg)