Merge pull request from prometheus/beorn7/histogram

promql: Fix counter reset detection in subqueries with histograms
This commit is contained in:
Björn Rabenstein 2025-02-11 20:41:19 +01:00 committed by GitHub
commit 906f6a33b6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 38 additions and 0 deletions
promql
engine.go
promqltest/testdata

View File

@ -1548,6 +1548,28 @@ func (ev *evaluator) evalSubquery(ctx context.Context, subq *parser.SubqueryExpr
VectorSelector: vs,
}
for _, s := range mat {
// Set any "NotCounterReset" and "CounterReset" hints in native
// histograms to "UnknownCounterReset" because we might
// otherwise miss a counter reset happening in samples not
// returned by the subquery, or we might over-detect counter
// resets if the sample with a counter reset is returned
// multiple times by a high-res subquery. This intentionally
// does not attempt to be clever (like detecting if we are
// really missing underlying samples or returning underlying
// samples multiple times) because subqueries on counters are
// inherently problematic WRT counter reset handling, so we
// cannot really solve the problem for good. We only want to
// avoid problems that happen due to the explicitly set counter
// reset hints and go back to the behavior we already know from
// float samples.
for i, hp := range s.Histograms {
switch hp.H.CounterResetHint {
case histogram.NotCounterReset, histogram.CounterReset:
h := *hp.H // Shallow copy is sufficient, we only change CounterResetHint.
h.CounterResetHint = histogram.UnknownCounterReset
s.Histograms[i].H = &h
}
}
vs.Series = append(vs.Series, NewStorageSeries(s))
}
return ms, mat.TotalSamples(), ws

View File

@ -134,3 +134,19 @@ eval instant at 20s min_over_time(metric_total[15s:10s])
eval instant at 20m min_over_time(rate(metric_total[5m])[20m:1m])
{} 0.12119047619047618
clear
# This native histogram series has a counter reset at 5m.
# TODO(beorn7): Write this more nicely once https://github.com/prometheus/prometheus/issues/15984 is fixed.
load 1m
native_histogram {{sum:100 count:100}} {{sum:103 count:103}} {{sum:106 count:106}} {{sum:109 count:109}} {{sum:112 count:112}} {{sum:3 count:3 counter_reset_hint:reset}} {{sum:6 count:6}}+{{sum:3 count:3}}x5
# This sub-query has to detect the counter reset even if it does
# not include the exact sample where the counter reset has happened.
eval instant at 10m increase(native_histogram[10m:3m])
{} {{count:25 sum:25}}
# This sub-query must not detect the counter reset multiple times
# even though the sample where the counter reset happened is returned
# by the sub-query multiple times.
eval instant at 10m increase(native_histogram[10m:15s])
{} {{count:30.769230769230766 sum:30.769230769230766}}