Merge pull request #13034 from charleskorn/rule-evaluation-logs
Include trace ID in log lines emitted during rule evaluation
This commit is contained in:
commit
12dd0cb53b
|
@ -430,6 +430,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func(i int, rule Rule) {
|
func(i int, rule Rule) {
|
||||||
|
logger := log.WithPrefix(g.logger, "name", rule.Name(), "index", i)
|
||||||
ctx, sp := otel.Tracer("").Start(ctx, "rule")
|
ctx, sp := otel.Tracer("").Start(ctx, "rule")
|
||||||
sp.SetAttributes(attribute.String("name", rule.Name()))
|
sp.SetAttributes(attribute.String("name", rule.Name()))
|
||||||
defer func(t time.Time) {
|
defer func(t time.Time) {
|
||||||
|
@ -441,6 +442,10 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
|
||||||
rule.SetEvaluationTimestamp(t)
|
rule.SetEvaluationTimestamp(t)
|
||||||
}(time.Now())
|
}(time.Now())
|
||||||
|
|
||||||
|
if sp.SpanContext().IsSampled() && sp.SpanContext().HasTraceID() {
|
||||||
|
logger = log.WithPrefix(logger, "traceID", sp.SpanContext().TraceID())
|
||||||
|
}
|
||||||
|
|
||||||
g.metrics.EvalTotal.WithLabelValues(GroupKey(g.File(), g.Name())).Inc()
|
g.metrics.EvalTotal.WithLabelValues(GroupKey(g.File(), g.Name())).Inc()
|
||||||
|
|
||||||
vector, err := rule.Eval(ctx, ts, g.opts.QueryFunc, g.opts.ExternalURL, g.Limit())
|
vector, err := rule.Eval(ctx, ts, g.opts.QueryFunc, g.opts.ExternalURL, g.Limit())
|
||||||
|
@ -454,7 +459,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
|
||||||
// happens on shutdown and thus we skip logging of any errors here.
|
// happens on shutdown and thus we skip logging of any errors here.
|
||||||
var eqc promql.ErrQueryCanceled
|
var eqc promql.ErrQueryCanceled
|
||||||
if !errors.As(err, &eqc) {
|
if !errors.As(err, &eqc) {
|
||||||
level.Warn(g.logger).Log("name", rule.Name(), "index", i, "msg", "Evaluating rule failed", "rule", rule, "err", err)
|
level.Warn(logger).Log("msg", "Evaluating rule failed", "rule", rule, "err", err)
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -480,7 +485,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
|
||||||
sp.SetStatus(codes.Error, err.Error())
|
sp.SetStatus(codes.Error, err.Error())
|
||||||
g.metrics.EvalFailures.WithLabelValues(GroupKey(g.File(), g.Name())).Inc()
|
g.metrics.EvalFailures.WithLabelValues(GroupKey(g.File(), g.Name())).Inc()
|
||||||
|
|
||||||
level.Warn(g.logger).Log("name", rule.Name(), "index", i, "msg", "Rule sample appending failed", "err", err)
|
level.Warn(logger).Log("msg", "Rule sample appending failed", "err", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
g.seriesInPreviousEval[i] = seriesReturned
|
g.seriesInPreviousEval[i] = seriesReturned
|
||||||
|
@ -504,15 +509,15 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
|
||||||
switch {
|
switch {
|
||||||
case errors.Is(unwrappedErr, storage.ErrOutOfOrderSample):
|
case errors.Is(unwrappedErr, storage.ErrOutOfOrderSample):
|
||||||
numOutOfOrder++
|
numOutOfOrder++
|
||||||
level.Debug(g.logger).Log("name", rule.Name(), "index", i, "msg", "Rule evaluation result discarded", "err", err, "sample", s)
|
level.Debug(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s)
|
||||||
case errors.Is(unwrappedErr, storage.ErrTooOldSample):
|
case errors.Is(unwrappedErr, storage.ErrTooOldSample):
|
||||||
numTooOld++
|
numTooOld++
|
||||||
level.Debug(g.logger).Log("name", rule.Name(), "index", i, "msg", "Rule evaluation result discarded", "err", err, "sample", s)
|
level.Debug(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s)
|
||||||
case errors.Is(unwrappedErr, storage.ErrDuplicateSampleForTimestamp):
|
case errors.Is(unwrappedErr, storage.ErrDuplicateSampleForTimestamp):
|
||||||
numDuplicates++
|
numDuplicates++
|
||||||
level.Debug(g.logger).Log("name", rule.Name(), "index", i, "msg", "Rule evaluation result discarded", "err", err, "sample", s)
|
level.Debug(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s)
|
||||||
default:
|
default:
|
||||||
level.Warn(g.logger).Log("name", rule.Name(), "index", i, "msg", "Rule evaluation result discarded", "err", err, "sample", s)
|
level.Warn(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
buf := [1024]byte{}
|
buf := [1024]byte{}
|
||||||
|
@ -520,13 +525,13 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if numOutOfOrder > 0 {
|
if numOutOfOrder > 0 {
|
||||||
level.Warn(g.logger).Log("name", rule.Name(), "index", i, "msg", "Error on ingesting out-of-order result from rule evaluation", "numDropped", numOutOfOrder)
|
level.Warn(logger).Log("msg", "Error on ingesting out-of-order result from rule evaluation", "numDropped", numOutOfOrder)
|
||||||
}
|
}
|
||||||
if numTooOld > 0 {
|
if numTooOld > 0 {
|
||||||
level.Warn(g.logger).Log("name", rule.Name(), "index", i, "msg", "Error on ingesting too old result from rule evaluation", "numDropped", numTooOld)
|
level.Warn(logger).Log("msg", "Error on ingesting too old result from rule evaluation", "numDropped", numTooOld)
|
||||||
}
|
}
|
||||||
if numDuplicates > 0 {
|
if numDuplicates > 0 {
|
||||||
level.Warn(g.logger).Log("name", rule.Name(), "index", i, "msg", "Error on ingesting results from rule evaluation with different value but same timestamp", "numDropped", numDuplicates)
|
level.Warn(logger).Log("msg", "Error on ingesting results from rule evaluation with different value but same timestamp", "numDropped", numDuplicates)
|
||||||
}
|
}
|
||||||
|
|
||||||
for metric, lset := range g.seriesInPreviousEval[i] {
|
for metric, lset := range g.seriesInPreviousEval[i] {
|
||||||
|
@ -545,7 +550,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
|
||||||
// Do not count these in logging, as this is expected if series
|
// Do not count these in logging, as this is expected if series
|
||||||
// is exposed from a different rule.
|
// is exposed from a different rule.
|
||||||
default:
|
default:
|
||||||
level.Warn(g.logger).Log("name", rule.Name(), "index", i, "msg", "Adding stale sample failed", "sample", lset.String(), "err", err)
|
level.Warn(logger).Log("msg", "Adding stale sample failed", "sample", lset.String(), "err", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue