mirror of
https://github.com/prometheus/prometheus
synced 2025-03-11 07:59:57 +00:00
use Welford/Knuth method to compute standard deviation and variance (#4533)
* use Welford/Knuth method to compute standard deviation and variance, avoids float precision issues * use better method for calculating avg and avg_over_time Signed-off-by: Dan Cech <dcech@grafana.com>
This commit is contained in:
parent
7d01ead689
commit
9f4cb06a37
@ -1464,12 +1464,12 @@ func intersection(ls1, ls2 labels.Labels) labels.Labels {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type groupedAggregation struct {
|
type groupedAggregation struct {
|
||||||
labels labels.Labels
|
labels labels.Labels
|
||||||
value float64
|
value float64
|
||||||
valuesSquaredSum float64
|
mean float64
|
||||||
groupCount int
|
groupCount int
|
||||||
heap vectorByValueHeap
|
heap vectorByValueHeap
|
||||||
reverseHeap vectorByReverseValueHeap
|
reverseHeap vectorByReverseValueHeap
|
||||||
}
|
}
|
||||||
|
|
||||||
// aggregation evaluates an aggregation operation on a Vector.
|
// aggregation evaluates an aggregation operation on a Vector.
|
||||||
@ -1540,17 +1540,19 @@ func (ev *evaluator) aggregation(op ItemType, grouping []string, without bool, p
|
|||||||
sort.Sort(m)
|
sort.Sort(m)
|
||||||
}
|
}
|
||||||
result[groupingKey] = &groupedAggregation{
|
result[groupingKey] = &groupedAggregation{
|
||||||
labels: m,
|
labels: m,
|
||||||
value: s.V,
|
value: s.V,
|
||||||
valuesSquaredSum: s.V * s.V,
|
mean: s.V,
|
||||||
groupCount: 1,
|
groupCount: 1,
|
||||||
}
|
}
|
||||||
inputVecLen := int64(len(vec))
|
inputVecLen := int64(len(vec))
|
||||||
resultSize := k
|
resultSize := k
|
||||||
if k > inputVecLen {
|
if k > inputVecLen {
|
||||||
resultSize = inputVecLen
|
resultSize = inputVecLen
|
||||||
}
|
}
|
||||||
if op == itemTopK || op == itemQuantile {
|
if op == itemStdvar || op == itemStddev {
|
||||||
|
result[groupingKey].value = 0.0
|
||||||
|
} else if op == itemTopK || op == itemQuantile {
|
||||||
result[groupingKey].heap = make(vectorByValueHeap, 0, resultSize)
|
result[groupingKey].heap = make(vectorByValueHeap, 0, resultSize)
|
||||||
heap.Push(&result[groupingKey].heap, &Sample{
|
heap.Push(&result[groupingKey].heap, &Sample{
|
||||||
Point: Point{V: s.V},
|
Point: Point{V: s.V},
|
||||||
@ -1571,8 +1573,8 @@ func (ev *evaluator) aggregation(op ItemType, grouping []string, without bool, p
|
|||||||
group.value += s.V
|
group.value += s.V
|
||||||
|
|
||||||
case itemAvg:
|
case itemAvg:
|
||||||
group.value += s.V
|
|
||||||
group.groupCount++
|
group.groupCount++
|
||||||
|
group.mean += (s.V - group.mean) / float64(group.groupCount)
|
||||||
|
|
||||||
case itemMax:
|
case itemMax:
|
||||||
if group.value < s.V || math.IsNaN(group.value) {
|
if group.value < s.V || math.IsNaN(group.value) {
|
||||||
@ -1588,9 +1590,10 @@ func (ev *evaluator) aggregation(op ItemType, grouping []string, without bool, p
|
|||||||
group.groupCount++
|
group.groupCount++
|
||||||
|
|
||||||
case itemStdvar, itemStddev:
|
case itemStdvar, itemStddev:
|
||||||
group.value += s.V
|
|
||||||
group.valuesSquaredSum += s.V * s.V
|
|
||||||
group.groupCount++
|
group.groupCount++
|
||||||
|
delta := s.V - group.mean
|
||||||
|
group.mean += delta / float64(group.groupCount)
|
||||||
|
group.value += delta * (s.V - group.mean)
|
||||||
|
|
||||||
case itemTopK:
|
case itemTopK:
|
||||||
if int64(len(group.heap)) < k || group.heap[0].V < s.V || math.IsNaN(group.heap[0].V) {
|
if int64(len(group.heap)) < k || group.heap[0].V < s.V || math.IsNaN(group.heap[0].V) {
|
||||||
@ -1626,18 +1629,16 @@ func (ev *evaluator) aggregation(op ItemType, grouping []string, without bool, p
|
|||||||
for _, aggr := range result {
|
for _, aggr := range result {
|
||||||
switch op {
|
switch op {
|
||||||
case itemAvg:
|
case itemAvg:
|
||||||
aggr.value = aggr.value / float64(aggr.groupCount)
|
aggr.value = aggr.mean
|
||||||
|
|
||||||
case itemCount, itemCountValues:
|
case itemCount, itemCountValues:
|
||||||
aggr.value = float64(aggr.groupCount)
|
aggr.value = float64(aggr.groupCount)
|
||||||
|
|
||||||
case itemStdvar:
|
case itemStdvar:
|
||||||
avg := aggr.value / float64(aggr.groupCount)
|
aggr.value = aggr.value / float64(aggr.groupCount)
|
||||||
aggr.value = aggr.valuesSquaredSum/float64(aggr.groupCount) - avg*avg
|
|
||||||
|
|
||||||
case itemStddev:
|
case itemStddev:
|
||||||
avg := aggr.value / float64(aggr.groupCount)
|
aggr.value = math.Sqrt(aggr.value / float64(aggr.groupCount))
|
||||||
aggr.value = math.Sqrt(aggr.valuesSquaredSum/float64(aggr.groupCount) - avg*avg)
|
|
||||||
|
|
||||||
case itemTopK:
|
case itemTopK:
|
||||||
// The heap keeps the lowest value on top, so reverse it.
|
// The heap keeps the lowest value on top, so reverse it.
|
||||||
|
@ -371,11 +371,12 @@ func aggrOverTime(vals []Value, enh *EvalNodeHelper, aggrFn func([]Point) float6
|
|||||||
// === avg_over_time(Matrix ValueTypeMatrix) Vector ===
|
// === avg_over_time(Matrix ValueTypeMatrix) Vector ===
|
||||||
func funcAvgOverTime(vals []Value, args Expressions, enh *EvalNodeHelper) Vector {
|
func funcAvgOverTime(vals []Value, args Expressions, enh *EvalNodeHelper) Vector {
|
||||||
return aggrOverTime(vals, enh, func(values []Point) float64 {
|
return aggrOverTime(vals, enh, func(values []Point) float64 {
|
||||||
var sum float64
|
var mean, count float64
|
||||||
for _, v := range values {
|
for _, v := range values {
|
||||||
sum += v.V
|
count++
|
||||||
|
mean += (v.V - mean) / count
|
||||||
}
|
}
|
||||||
return sum / float64(len(values))
|
return mean
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -444,28 +445,28 @@ func funcQuantileOverTime(vals []Value, args Expressions, enh *EvalNodeHelper) V
|
|||||||
// === stddev_over_time(Matrix ValueTypeMatrix) Vector ===
|
// === stddev_over_time(Matrix ValueTypeMatrix) Vector ===
|
||||||
func funcStddevOverTime(vals []Value, args Expressions, enh *EvalNodeHelper) Vector {
|
func funcStddevOverTime(vals []Value, args Expressions, enh *EvalNodeHelper) Vector {
|
||||||
return aggrOverTime(vals, enh, func(values []Point) float64 {
|
return aggrOverTime(vals, enh, func(values []Point) float64 {
|
||||||
var sum, squaredSum, count float64
|
var aux, count, mean float64
|
||||||
for _, v := range values {
|
for _, v := range values {
|
||||||
sum += v.V
|
|
||||||
squaredSum += v.V * v.V
|
|
||||||
count++
|
count++
|
||||||
|
delta := v.V - mean
|
||||||
|
mean += delta / count
|
||||||
|
aux += delta * (v.V - mean)
|
||||||
}
|
}
|
||||||
avg := sum / count
|
return math.Sqrt(aux / count)
|
||||||
return math.Sqrt(squaredSum/count - avg*avg)
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// === stdvar_over_time(Matrix ValueTypeMatrix) Vector ===
|
// === stdvar_over_time(Matrix ValueTypeMatrix) Vector ===
|
||||||
func funcStdvarOverTime(vals []Value, args Expressions, enh *EvalNodeHelper) Vector {
|
func funcStdvarOverTime(vals []Value, args Expressions, enh *EvalNodeHelper) Vector {
|
||||||
return aggrOverTime(vals, enh, func(values []Point) float64 {
|
return aggrOverTime(vals, enh, func(values []Point) float64 {
|
||||||
var sum, squaredSum, count float64
|
var aux, count, mean float64
|
||||||
for _, v := range values {
|
for _, v := range values {
|
||||||
sum += v.V
|
|
||||||
squaredSum += v.V * v.V
|
|
||||||
count++
|
count++
|
||||||
|
delta := v.V - mean
|
||||||
|
mean += delta / count
|
||||||
|
aux += delta * (v.V - mean)
|
||||||
}
|
}
|
||||||
avg := sum / count
|
return aux / count
|
||||||
return squaredSum/count - avg*avg
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
13
promql/testdata/aggregators.test
vendored
13
promql/testdata/aggregators.test
vendored
@ -90,6 +90,19 @@ eval instant at 50m stdvar by (instance)(http_requests)
|
|||||||
{instance="0"} 50000
|
{instance="0"} 50000
|
||||||
{instance="1"} 50000
|
{instance="1"} 50000
|
||||||
|
|
||||||
|
# Float precision test for standard deviation and variance
|
||||||
|
clear
|
||||||
|
load 5m
|
||||||
|
http_requests{job="api-server", instance="0", group="production"} 0+1.33x10
|
||||||
|
http_requests{job="api-server", instance="1", group="production"} 0+1.33x10
|
||||||
|
http_requests{job="api-server", instance="0", group="canary"} 0+1.33x10
|
||||||
|
|
||||||
|
eval instant at 50m stddev(http_requests)
|
||||||
|
{} 0.0
|
||||||
|
|
||||||
|
eval instant at 50m stdvar(http_requests)
|
||||||
|
{} 0.0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Regression test for missing separator byte in labelsToGroupingKey.
|
# Regression test for missing separator byte in labelsToGroupingKey.
|
||||||
|
19
promql/testdata/functions.test
vendored
19
promql/testdata/functions.test
vendored
@ -374,6 +374,14 @@ eval instant at 8000s holt_winters(http_requests[1m], 0.01, 0.1)
|
|||||||
{job="api-server", instance="0", group="canary"} 24000
|
{job="api-server", instance="0", group="canary"} 24000
|
||||||
{job="api-server", instance="1", group="canary"} -32000
|
{job="api-server", instance="1", group="canary"} -32000
|
||||||
|
|
||||||
|
# Tests for avg_over_time
|
||||||
|
clear
|
||||||
|
load 10s
|
||||||
|
metric 1 2 3 4 5
|
||||||
|
|
||||||
|
eval instant at 1m avg_over_time(metric[1m])
|
||||||
|
{} 3
|
||||||
|
|
||||||
# Tests for stddev_over_time and stdvar_over_time.
|
# Tests for stddev_over_time and stdvar_over_time.
|
||||||
clear
|
clear
|
||||||
load 10s
|
load 10s
|
||||||
@ -385,6 +393,17 @@ eval instant at 1m stdvar_over_time(metric[1m])
|
|||||||
eval instant at 1m stddev_over_time(metric[1m])
|
eval instant at 1m stddev_over_time(metric[1m])
|
||||||
{} 3.249615
|
{} 3.249615
|
||||||
|
|
||||||
|
# Tests for stddev_over_time and stdvar_over_time #4927.
|
||||||
|
clear
|
||||||
|
load 10s
|
||||||
|
metric 1.5990505637277868 1.5990505637277868 1.5990505637277868
|
||||||
|
|
||||||
|
eval instant at 1m stdvar_over_time(metric[1m])
|
||||||
|
{} 0
|
||||||
|
|
||||||
|
eval instant at 1m stddev_over_time(metric[1m])
|
||||||
|
{} 0
|
||||||
|
|
||||||
# Tests for quantile_over_time
|
# Tests for quantile_over_time
|
||||||
clear
|
clear
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user