diff --git a/promql/functions.go b/promql/functions.go index 575f8302d..ca987545d 100644 --- a/promql/functions.go +++ b/promql/functions.go @@ -573,9 +573,28 @@ func funcAvgOverTime(vals []parser.Value, args parser.Expressions, enh *EvalNode return vec, nil } return aggrOverTime(vals, enh, func(s Series) float64 { - var mean, count, c float64 + var ( + sum, mean, count, kahanC float64 + incrementalMean bool + ) for _, f := range s.Floats { count++ + if !incrementalMean { + newSum, newC := kahanSumInc(f.F, sum, kahanC) + // Perform regular mean calculation as long as + // the sum doesn't overflow and (in any case) + // for the first iteration (even if we start + // with ±Inf) to not run into division-by-zero + // problems below. + if count == 1 || !math.IsInf(newSum, 0) { + sum, kahanC = newSum, newC + continue + } + // Handle overflow by reverting to incremental calculation of the mean value. + incrementalMean = true + mean = sum / (count - 1) + kahanC /= count - 1 + } if math.IsInf(mean, 0) { if math.IsInf(f.F, 0) && (mean > 0) == (f.F > 0) { // The `mean` and `f.F` values are `Inf` of the same sign. They @@ -593,14 +612,13 @@ func funcAvgOverTime(vals []parser.Value, args parser.Expressions, enh *EvalNode continue } } - correctedMean := mean + c - mean, c = kahanSumInc(f.F/count-correctedMean/count, mean, c) + correctedMean := mean + kahanC + mean, kahanC = kahanSumInc(f.F/count-correctedMean/count, mean, kahanC) } - - if math.IsInf(mean, 0) { - return mean + if incrementalMean { + return mean + kahanC } - return mean + c + return (sum + kahanC) / count }), nil } diff --git a/promql/promqltest/testdata/functions.test b/promql/promqltest/testdata/functions.test index 718e001c3..290beb5b9 100644 --- a/promql/promqltest/testdata/functions.test +++ b/promql/promqltest/testdata/functions.test @@ -737,7 +737,6 @@ eval instant at 1m avg_over_time(metric6c[1m]) eval instant at 1m sum_over_time(metric6c[1m])/count_over_time(metric6c[1m]) {} NaN - eval instant at 1m avg_over_time(metric7[1m]) {} NaN @@ -772,6 +771,9 @@ load 10s eval instant at 1m sum_over_time(metric[1m]) {} 2 +eval instant at 1m avg_over_time(metric[1m]) + {} 0.5 + # Tests for stddev_over_time and stdvar_over_time. clear load 10s