From 0a9deb9597e08da85e55fc4b1ccb38cc6f11412d Mon Sep 17 00:00:00 2001 From: darshanime Date: Mon, 25 Oct 2021 16:32:40 +0530 Subject: [PATCH 1/4] use kahan summation for numerical stability Signed-off-by: darshanime --- promql/functions.go | 54 ++++++++++++++++++++++++++++++++-------- promql/functions_test.go | 7 ++++++ 2 files changed, 50 insertions(+), 11 deletions(-) diff --git a/promql/functions.go b/promql/functions.go index 50594503d..9982b92c7 100644 --- a/promql/functions.go +++ b/promql/functions.go @@ -439,11 +439,14 @@ func funcMinOverTime(vals []parser.Value, args parser.Expressions, enh *EvalNode // === sum_over_time(Matrix parser.ValueTypeMatrix) Vector === func funcSumOverTime(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) Vector { return aggrOverTime(vals, enh, func(values []Point) float64 { - var sum float64 + var sum, c float64 for _, v := range values { - sum += v.V + sum, c = kahanSummationIter(v.V, sum, c) } - return sum + if math.IsInf(sum, 0) { + return sum + } + return sum + c }) } @@ -675,23 +678,52 @@ func funcTimestamp(vals []parser.Value, args parser.Expressions, enh *EvalNodeHe return enh.Out } +func kahanSummation(samples []float64) float64 { + sum, c := 0.0, 0.0 + + for _, v := range samples { + sum, c = kahanSummationIter(v, sum, c) + } + return sum + c +} + +func kahanSummationIter(v, sum, c float64) (float64, float64) { + t := sum + v + // using Neumaier improvement, swap if next term larger than sum + if math.Abs(sum) >= math.Abs(v) { + c += (sum - t) + v + } else { + c += (v - t) + sum + } + sum = t + return sum, c +} + // linearRegression performs a least-square linear regression analysis on the // provided SamplePairs. It returns the slope, and the intercept value at the // provided time. func linearRegression(samples []Point, interceptTime int64) (slope, intercept float64) { var ( - n float64 - sumX, sumY float64 - sumXY, sumX2 float64 + n float64 + sumX, cX float64 + sumY, cY float64 + sumXY, cXY float64 + sumX2, cX2 float64 ) for _, sample := range samples { - x := float64(sample.T-interceptTime) / 1e3 n += 1.0 - sumY += sample.V - sumX += x - sumXY += x * sample.V - sumX2 += x * x + x := float64(sample.T-interceptTime) / 1e3 + sumX, cX = kahanSummationIter(x, sumX, cX) + sumY, cY = kahanSummationIter(sample.V, sumY, cY) + sumXY, cXY = kahanSummationIter(x*sample.V, sumXY, cXY) + sumX2, cX2 = kahanSummationIter(x*x, sumX2, cX2) } + + sumX = sumX + cX + sumY = sumY + cY + sumXY = sumXY + cXY + sumX2 = sumX2 + cX2 + covXY := sumXY - sumX*sumY/n varX := sumX2 - sumX*sumX/n diff --git a/promql/functions_test.go b/promql/functions_test.go index 5707cbed3..b0a8b5db6 100644 --- a/promql/functions_test.go +++ b/promql/functions_test.go @@ -15,6 +15,7 @@ package promql import ( "context" + "math" "testing" "time" @@ -71,3 +72,9 @@ func TestFunctionList(t *testing.T) { require.True(t, ok, "function %s exists in parser package, but not in promql package", i) } } + +func TestKahanSummation(t *testing.T) { + vals := []float64{1.0, math.Pow(10, 100), 1.0, -1 * math.Pow(10, 100)} + expected := 2.0 + require.Equal(t, expected, kahanSummation(vals)) +} From a905354da35b371605c89a74bd7a8e8d566e5970 Mon Sep 17 00:00:00 2001 From: darshanime Date: Mon, 25 Oct 2021 18:02:25 +0530 Subject: [PATCH 2/4] use kahan for avg_over_time Signed-off-by: darshanime --- promql/functions.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/promql/functions.go b/promql/functions.go index 9982b92c7..e43cc85ab 100644 --- a/promql/functions.go +++ b/promql/functions.go @@ -367,7 +367,7 @@ func aggrOverTime(vals []parser.Value, enh *EvalNodeHelper, aggrFn func([]Point) // === avg_over_time(Matrix parser.ValueTypeMatrix) Vector === func funcAvgOverTime(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) Vector { return aggrOverTime(vals, enh, func(values []Point) float64 { - var mean, count float64 + var mean, count, c float64 for _, v := range values { count++ if math.IsInf(mean, 0) { @@ -387,9 +387,13 @@ func funcAvgOverTime(vals []parser.Value, args parser.Expressions, enh *EvalNode continue } } - mean += v.V/count - mean/count + mean, c = kahanSummationIter(v.V/count-mean/count, mean, c) } - return mean + + if math.IsInf(mean, 0) { + return mean + } + return mean + c }) } From 694b872deea7f5f385933714618610b8491d6f4e Mon Sep 17 00:00:00 2001 From: darshanime Date: Sat, 30 Oct 2021 19:08:23 +0530 Subject: [PATCH 3/4] address stylistic nits Signed-off-by: darshanime --- promql/functions.go | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/promql/functions.go b/promql/functions.go index e43cc85ab..e737992e4 100644 --- a/promql/functions.go +++ b/promql/functions.go @@ -387,7 +387,7 @@ func funcAvgOverTime(vals []parser.Value, args parser.Expressions, enh *EvalNode continue } } - mean, c = kahanSummationIter(v.V/count-mean/count, mean, c) + mean, c = kahanSumInc(v.V/count-mean/count, mean, c) } if math.IsInf(mean, 0) { @@ -445,7 +445,7 @@ func funcSumOverTime(vals []parser.Value, args parser.Expressions, enh *EvalNode return aggrOverTime(vals, enh, func(values []Point) float64 { var sum, c float64 for _, v := range values { - sum, c = kahanSummationIter(v.V, sum, c) + sum, c = kahanSumInc(v.V, sum, c) } if math.IsInf(sum, 0) { return sum @@ -682,25 +682,24 @@ func funcTimestamp(vals []parser.Value, args parser.Expressions, enh *EvalNodeHe return enh.Out } -func kahanSummation(samples []float64) float64 { - sum, c := 0.0, 0.0 +func kahanSum(samples []float64) float64 { + var sum, c float64 for _, v := range samples { - sum, c = kahanSummationIter(v, sum, c) + sum, c = kahanSumInc(v, sum, c) } return sum + c } -func kahanSummationIter(v, sum, c float64) (float64, float64) { - t := sum + v - // using Neumaier improvement, swap if next term larger than sum - if math.Abs(sum) >= math.Abs(v) { - c += (sum - t) + v +func kahanSumInc(inc, sum, c float64) (newSum, newC float64) { + t := sum + inc + // Using Neumaier improvement, swap if next term larger than sum. + if math.Abs(sum) >= math.Abs(inc) { + c += (sum - t) + inc } else { - c += (v - t) + sum + c += (inc - t) + sum } - sum = t - return sum, c + return t, c } // linearRegression performs a least-square linear regression analysis on the @@ -717,10 +716,10 @@ func linearRegression(samples []Point, interceptTime int64) (slope, intercept fl for _, sample := range samples { n += 1.0 x := float64(sample.T-interceptTime) / 1e3 - sumX, cX = kahanSummationIter(x, sumX, cX) - sumY, cY = kahanSummationIter(sample.V, sumY, cY) - sumXY, cXY = kahanSummationIter(x*sample.V, sumXY, cXY) - sumX2, cX2 = kahanSummationIter(x*x, sumX2, cX2) + sumX, cX = kahanSumInc(x, sumX, cX) + sumY, cY = kahanSumInc(sample.V, sumY, cY) + sumXY, cXY = kahanSumInc(x*sample.V, sumXY, cXY) + sumX2, cX2 = kahanSumInc(x*x, sumX2, cX2) } sumX = sumX + cX From 42d786f1acb1fe315a194478bea69acdcd4aca56 Mon Sep 17 00:00:00 2001 From: darshanime Date: Sat, 30 Oct 2021 19:41:36 +0530 Subject: [PATCH 4/4] use kahan summation for aggregation functions Signed-off-by: darshanime --- promql/functions.go | 24 ++++++++++++++---------- promql/functions_test.go | 4 ++-- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/promql/functions.go b/promql/functions.go index e737992e4..49ff09678 100644 --- a/promql/functions.go +++ b/promql/functions.go @@ -471,28 +471,32 @@ func funcQuantileOverTime(vals []parser.Value, args parser.Expressions, enh *Eva // === stddev_over_time(Matrix parser.ValueTypeMatrix) Vector === func funcStddevOverTime(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) Vector { return aggrOverTime(vals, enh, func(values []Point) float64 { - var aux, count, mean float64 + var count float64 + var mean, cMean float64 + var aux, cAux float64 for _, v := range values { count++ - delta := v.V - mean - mean += delta / count - aux += delta * (v.V - mean) + delta := v.V - (mean + cMean) + mean, cMean = kahanSumInc(delta/count, mean, cMean) + aux, cAux = kahanSumInc(delta*(v.V-(mean+cMean)), aux, cAux) } - return math.Sqrt(aux / count) + return math.Sqrt((aux + cAux) / count) }) } // === stdvar_over_time(Matrix parser.ValueTypeMatrix) Vector === func funcStdvarOverTime(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) Vector { return aggrOverTime(vals, enh, func(values []Point) float64 { - var aux, count, mean float64 + var count float64 + var mean, cMean float64 + var aux, cAux float64 for _, v := range values { count++ - delta := v.V - mean - mean += delta / count - aux += delta * (v.V - mean) + delta := v.V - (mean + cMean) + mean, cMean = kahanSumInc(delta/count, mean, cMean) + aux, cAux = kahanSumInc(delta*(v.V-(mean+cMean)), aux, cAux) } - return aux / count + return (aux + cAux) / count }) } diff --git a/promql/functions_test.go b/promql/functions_test.go index b0a8b5db6..19ee105da 100644 --- a/promql/functions_test.go +++ b/promql/functions_test.go @@ -73,8 +73,8 @@ func TestFunctionList(t *testing.T) { } } -func TestKahanSummation(t *testing.T) { +func TestKahanSum(t *testing.T) { vals := []float64{1.0, math.Pow(10, 100), 1.0, -1 * math.Pow(10, 100)} expected := 2.0 - require.Equal(t, expected, kahanSummation(vals)) + require.Equal(t, expected, kahanSum(vals)) }