[ENHANCEMENT] PromQL: use Kahan summation for sum()

This can give a more precise result, by keeping a separate running
compensation value to accumulate small errors.

See https://en.wikipedia.org/wiki/Kahan_summation_algorithm

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
This commit is contained in:
Bryan Boreham 2024-05-09 14:29:34 +01:00
parent 3fd24d1cd7
commit ea82b49c33
2 changed files with 21 additions and 4 deletions

View File

@ -2730,7 +2730,7 @@ type groupedAggregation struct {
hasHistogram bool // Has at least 1 histogram sample aggregated.
floatValue float64
histogramValue *histogram.FloatHistogram
floatMean float64
floatMean float64 // Mean, or "compensating value" for Kahan summation.
groupCount int
heap vectorByValueHeap
}
@ -2758,11 +2758,13 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix
*group = groupedAggregation{
seen: true,
floatValue: f,
floatMean: f,
groupCount: 1,
}
switch op {
case parser.SUM, parser.AVG:
case parser.AVG:
group.floatMean = f
fallthrough
case parser.SUM:
if h == nil {
group.hasFloat = true
} else {
@ -2770,6 +2772,7 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix
group.hasHistogram = true
}
case parser.STDVAR, parser.STDDEV:
group.floatMean = f
group.floatValue = 0
case parser.QUANTILE:
group.heap = make(vectorByValueHeap, 1)
@ -2792,7 +2795,7 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix
// point in copying the histogram in that case.
} else {
group.hasFloat = true
group.floatValue += f
group.floatValue, group.floatMean = kahanSumInc(f, group.floatValue, group.floatMean)
}
case parser.AVG:
@ -2903,6 +2906,8 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix
}
if aggr.hasHistogram {
aggr.histogramValue.Compact(0)
} else {
aggr.floatValue += aggr.floatMean // Add Kahan summation compensating term.
}
default:
// For other aggregations, we already have the right value.

View File

@ -503,6 +503,18 @@ eval instant at 1m avg(data{test="-big"})
eval instant at 1m avg(data{test="bigzero"})
{} 0
# Test summing extreme values.
clear
load 10s
data{test="ten",point="a"} 2
data{test="ten",point="b"} 8
data{test="ten",point="c"} 1e+100
data{test="ten",point="d"} -1e100
eval instant at 1m sum(data{test="ten"})
{} 10
clear
# Test that aggregations are deterministic.