From 2efdf660b16be165c09338d528013f59e88a5cf5 Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Thu, 29 Apr 2021 14:28:48 +0200 Subject: [PATCH] Increase evaluation failures on Commit() (#8770) I think we should increment the metric here, we're setting the rule health anyways. This means even if the "evaluation" suceeded, none of the samples made it to storage. This is a simplified solution to: https://github.com/prometheus/prometheus/pull/8410/ Signed-off-by: Goutham Veeramachaneni --- rules/manager.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rules/manager.go b/rules/manager.go index 3f4ce8d8b..110b64d55 100644 --- a/rules/manager.go +++ b/rules/manager.go @@ -595,13 +595,13 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { if err != nil { rule.SetHealth(HealthBad) rule.SetLastError(err) + g.metrics.evalFailures.WithLabelValues(GroupKey(g.File(), g.Name())).Inc() // Canceled queries are intentional termination of queries. This normally // happens on shutdown and thus we skip logging of any errors here. if _, ok := err.(promql.ErrQueryCanceled); !ok { level.Warn(g.logger).Log("msg", "Evaluating rule failed", "rule", rule, "err", err) } - g.metrics.evalFailures.WithLabelValues(GroupKey(g.File(), g.Name())).Inc() return } samplesTotal += float64(len(vector)) @@ -620,6 +620,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { if err := app.Commit(); err != nil { rule.SetHealth(HealthBad) rule.SetLastError(err) + g.metrics.evalFailures.WithLabelValues(GroupKey(g.File(), g.Name())).Inc() level.Warn(g.logger).Log("msg", "Rule sample appending failed", "err", err) return