Add a rule_group_samples metric (#7977)

This new metric allows tracking how many samples did each rule group generate.

Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
This commit is contained in:
Łukasz Mierzwa 2020-09-25 16:48:38 +01:00 committed by GitHub
parent 072b9649a3
commit 19c190b406
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -63,6 +63,7 @@ type Metrics struct {
groupLastEvalTime *prometheus.GaugeVec
groupLastDuration *prometheus.GaugeVec
groupRules *prometheus.GaugeVec
groupSamples *prometheus.GaugeVec
}
// NewGroupMetrics creates a new instance of Metrics and registers it with the provided registerer,
@ -146,6 +147,14 @@ func NewGroupMetrics(reg prometheus.Registerer) *Metrics {
},
[]string{"rule_group"},
),
groupSamples: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Name: "rule_group_last_evaluation_samples",
Help: "The number of samples returned during the last rule group evaluation.",
},
[]string{"rule_group"},
),
}
if reg != nil {
@ -160,6 +169,7 @@ func NewGroupMetrics(reg prometheus.Registerer) *Metrics {
m.groupLastEvalTime,
m.groupLastDuration,
m.groupRules,
m.groupSamples,
)
}
@ -276,6 +286,7 @@ func NewGroup(o GroupOptions) *Group {
metrics.groupLastEvalTime.WithLabelValues(key)
metrics.groupLastDuration.WithLabelValues(key)
metrics.groupRules.WithLabelValues(key).Set(float64(len(o.Rules)))
metrics.groupSamples.WithLabelValues(key)
metrics.groupInterval.WithLabelValues(key).Set(o.Interval.Seconds())
return &Group{
@ -557,6 +568,7 @@ func (g *Group) CopyState(from *Group) {
// Eval runs a single evaluation cycle in which all rules are evaluated sequentially.
func (g *Group) Eval(ctx context.Context, ts time.Time) {
var samplesTotal float64
for i, rule := range g.rules {
select {
case <-g.done:
@ -590,6 +602,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
g.metrics.evalFailures.WithLabelValues(groupKey(g.File(), g.Name())).Inc()
return
}
samplesTotal += float64(len(vector))
if ar, ok := rule.(*AlertingRule); ok {
ar.sendAlerts(ctx, ts, g.opts.ResendDelay, g.interval, g.opts.NotifyFunc)
@ -647,6 +660,9 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
}
}(i, rule)
}
if g.metrics != nil {
g.metrics.groupSamples.WithLabelValues(groupKey(g.File(), g.Name())).Set(samplesTotal)
}
g.cleanupStaleSeries(ctx, ts)
}
@ -978,6 +994,7 @@ func (m *Manager) Update(interval time.Duration, files []string, externalLabels
m.groupLastEvalTime.DeleteLabelValues(n)
m.groupLastDuration.DeleteLabelValues(n)
m.groupRules.DeleteLabelValues(n)
m.groupSamples.DeleteLabelValues((n))
}
wg.Done()
}(n, oldg)