promql: Limit extrapolation of delta/rate/increase
The new implementation detects the start and end of a series by looking at the average sample interval within the range. If the first (last) sample in the range is more than 1.1*interval distant from the beginning (end) of the range, it is considered the first (last) sample of the series as a whole, and extrapolation is limited to half the interval (rather than all the way to the beginning (end) of the range). In addition, if the extrapolated starting point of a counter (where it is zero) is within the range, it is used as the starting point of the series. Fixes #581
This commit is contained in:
parent
7da42eee6e
commit
c77c3a8c56
|
@ -19,7 +19,6 @@ import (
|
|||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/common/model"
|
||||
|
||||
|
@ -44,28 +43,25 @@ func funcTime(ev *evaluator, args Expressions) model.Value {
|
|||
}
|
||||
}
|
||||
|
||||
// === delta(matrix model.ValMatrix) Vector ===
|
||||
func funcDelta(ev *evaluator, args Expressions) model.Value {
|
||||
// This function still takes a 2nd argument for use by rate() and increase().
|
||||
isCounter := len(args) >= 2 && ev.evalInt(args[1]) > 0
|
||||
// extrapolatedRate is a utility function for rate/increase/delta.
|
||||
// It calculates the rate (allowing for counter resets if isCounter is true),
|
||||
// extrapolates if the first/last sample is close to the boundary, and returns
|
||||
// the result as either per-second (if isRate is true) or overall.
|
||||
func extrapolatedRate(ev *evaluator, arg Expr, isCounter bool, isRate bool) model.Value {
|
||||
ms := arg.(*MatrixSelector)
|
||||
|
||||
rangeStart := ev.Timestamp.Add(-ms.Range - ms.Offset)
|
||||
rangeEnd := ev.Timestamp.Add(-ms.Offset)
|
||||
|
||||
resultVector := vector{}
|
||||
|
||||
// If we treat these metrics as counters, we need to fetch all values
|
||||
// in the interval to find breaks in the timeseries' monotonicity.
|
||||
// I.e. if a counter resets, we want to ignore that reset.
|
||||
var matrixValue matrix
|
||||
if isCounter {
|
||||
matrixValue = ev.evalMatrix(args[0])
|
||||
} else {
|
||||
matrixValue = ev.evalMatrixBounds(args[0])
|
||||
}
|
||||
matrixValue := ev.evalMatrix(ms)
|
||||
for _, samples := range matrixValue {
|
||||
// No sense in trying to compute a delta without at least two points. Drop
|
||||
// No sense in trying to compute a rate without at least two points. Drop
|
||||
// this vector element.
|
||||
if len(samples.Values) < 2 {
|
||||
continue
|
||||
}
|
||||
|
||||
var (
|
||||
counterCorrection model.SampleValue
|
||||
lastValue model.SampleValue
|
||||
|
@ -79,22 +75,48 @@ func funcDelta(ev *evaluator, args Expressions) model.Value {
|
|||
}
|
||||
resultValue := lastValue - samples.Values[0].Value + counterCorrection
|
||||
|
||||
targetInterval := args[0].(*MatrixSelector).Range
|
||||
sampledInterval := samples.Values[len(samples.Values)-1].Timestamp.Sub(samples.Values[0].Timestamp)
|
||||
if sampledInterval == 0 {
|
||||
// Only found one sample. Cannot compute a rate from this.
|
||||
continue
|
||||
// Duration between first/last samples and boundary of range.
|
||||
durationToStart := samples.Values[0].Timestamp.Sub(rangeStart).Seconds()
|
||||
durationToEnd := rangeEnd.Sub(samples.Values[len(samples.Values)-1].Timestamp).Seconds()
|
||||
|
||||
sampledInterval := samples.Values[len(samples.Values)-1].Timestamp.Sub(samples.Values[0].Timestamp).Seconds()
|
||||
averageDurationBetweenSamples := sampledInterval / float64(len(samples.Values)-1)
|
||||
|
||||
if isCounter && resultValue > 0 && samples.Values[0].Value >= 0 {
|
||||
// Counters cannot be negative. If we have any slope at
|
||||
// all (i.e. resultValue went up), we can extrapolate
|
||||
// the zero point of the counter. If the duration to the
|
||||
// zero point is shorter than the durationToStart, we
|
||||
// take the zero point as the start of the series,
|
||||
// thereby avoiding extrapolation to negative counter
|
||||
// values.
|
||||
durationToZero := sampledInterval * float64(samples.Values[0].Value/resultValue)
|
||||
if durationToZero < durationToStart {
|
||||
durationToStart = durationToZero
|
||||
}
|
||||
}
|
||||
|
||||
// If the first/last samples are close to the boundaries of the range,
|
||||
// extrapolate the result. This is as we expect that another sample
|
||||
// will exist given the spacing between samples we've seen thus far,
|
||||
// with an allowance for noise.
|
||||
extrapolationThreshold := averageDurationBetweenSamples * 1.1
|
||||
extrapolateToInterval := sampledInterval
|
||||
|
||||
if durationToStart < extrapolationThreshold {
|
||||
extrapolateToInterval += durationToStart
|
||||
} else {
|
||||
extrapolateToInterval += averageDurationBetweenSamples / 2
|
||||
}
|
||||
if durationToEnd < extrapolationThreshold {
|
||||
extrapolateToInterval += durationToEnd
|
||||
} else {
|
||||
extrapolateToInterval += averageDurationBetweenSamples / 2
|
||||
}
|
||||
resultValue = resultValue * model.SampleValue(extrapolateToInterval/sampledInterval)
|
||||
if isRate {
|
||||
resultValue = resultValue / model.SampleValue(ms.Range.Seconds())
|
||||
}
|
||||
// Correct for differences in target vs. actual delta interval.
|
||||
//
|
||||
// Above, we didn't actually calculate the delta for the specified target
|
||||
// interval, but for an interval between the first and last found samples
|
||||
// under the target interval, which will usually have less time between
|
||||
// them. Depending on how many samples are found under a target interval,
|
||||
// the delta results are distorted and temporal aliasing occurs (ugly
|
||||
// bumps). This effect is corrected for below.
|
||||
intervalCorrection := model.SampleValue(targetInterval) / model.SampleValue(sampledInterval)
|
||||
resultValue *= intervalCorrection
|
||||
|
||||
resultSample := &sample{
|
||||
Metric: samples.Metric,
|
||||
|
@ -107,25 +129,19 @@ func funcDelta(ev *evaluator, args Expressions) model.Value {
|
|||
return resultVector
|
||||
}
|
||||
|
||||
// === delta(matrix model.ValMatrix) Vector ===
|
||||
func funcDelta(ev *evaluator, args Expressions) model.Value {
|
||||
return extrapolatedRate(ev, args[0], false, false)
|
||||
}
|
||||
|
||||
// === rate(node model.ValMatrix) Vector ===
|
||||
func funcRate(ev *evaluator, args Expressions) model.Value {
|
||||
args = append(args, &NumberLiteral{1})
|
||||
vector := funcDelta(ev, args).(vector)
|
||||
|
||||
// TODO: could be other type of model.ValMatrix in the future (right now, only
|
||||
// MatrixSelector exists). Find a better way of getting the duration of a
|
||||
// matrix, such as looking at the samples themselves.
|
||||
interval := args[0].(*MatrixSelector).Range
|
||||
for i := range vector {
|
||||
vector[i].Value /= model.SampleValue(interval / time.Second)
|
||||
}
|
||||
return vector
|
||||
return extrapolatedRate(ev, args[0], true, true)
|
||||
}
|
||||
|
||||
// === increase(node model.ValMatrix) Vector ===
|
||||
func funcIncrease(ev *evaluator, args Expressions) model.Value {
|
||||
args = append(args, &NumberLiteral{1})
|
||||
return funcDelta(ev, args).(vector)
|
||||
return extrapolatedRate(ev, args[0], true, false)
|
||||
}
|
||||
|
||||
// === irate(node model.ValMatrix) Vector ===
|
||||
|
|
|
@ -63,6 +63,10 @@ eval instant at 50m increase(http_requests[50m])
|
|||
{path="/foo"} 100
|
||||
{path="/bar"} 90
|
||||
|
||||
eval instant at 50m increase(http_requests[100m])
|
||||
{path="/foo"} 100
|
||||
{path="/bar"} 90
|
||||
|
||||
clear
|
||||
|
||||
# Tests for irate().
|
||||
|
@ -87,10 +91,10 @@ load 5m
|
|||
http_requests{job="app-server", instance="1", group="canary"} 0+80x10
|
||||
|
||||
# deriv should return the same as rate in simple cases.
|
||||
eval instant at 50m rate(http_requests{group="canary", instance="1", job="app-server"}[60m])
|
||||
eval instant at 50m rate(http_requests{group="canary", instance="1", job="app-server"}[50m])
|
||||
{group="canary", instance="1", job="app-server"} 0.26666666666666666
|
||||
|
||||
eval instant at 50m deriv(http_requests{group="canary", instance="1", job="app-server"}[60m])
|
||||
eval instant at 50m deriv(http_requests{group="canary", instance="1", job="app-server"}[50m])
|
||||
{group="canary", instance="1", job="app-server"} 0.26666666666666666
|
||||
|
||||
# deriv should return correct result.
|
||||
|
|
|
@ -15,6 +15,14 @@ load 5m
|
|||
testcounter_reset_middle 0+10x4 0+10x5
|
||||
testcounter_reset_end 0+10x9 0 10
|
||||
|
||||
load 4m
|
||||
testcounter_zero_cutoff{start="0m"} 0+240x10
|
||||
testcounter_zero_cutoff{start="1m"} 60+240x10
|
||||
testcounter_zero_cutoff{start="2m"} 120+240x10
|
||||
testcounter_zero_cutoff{start="3m"} 180+240x10
|
||||
testcounter_zero_cutoff{start="4m"} 240+240x10
|
||||
testcounter_zero_cutoff{start="5m"} 300+240x10
|
||||
|
||||
load 5m
|
||||
label_grouping_test{a="aa", b="bb"} 0+10x10
|
||||
label_grouping_test{a="a", b="abb"} 0+20x10
|
||||
|
@ -151,11 +159,12 @@ eval instant at 50m delta(http_requests{group="canary", instance="1", job="app-s
|
|||
|
||||
|
||||
# Rates should calculate per-second rates.
|
||||
eval instant at 50m rate(http_requests{group="canary", instance="1", job="app-server"}[60m])
|
||||
eval instant at 50m rate(http_requests{group="canary", instance="1", job="app-server"}[50m])
|
||||
{group="canary", instance="1", job="app-server"} 0.26666666666666666
|
||||
|
||||
|
||||
# Counter resets at in the middle of range are handled correctly by rate().
|
||||
eval instant at 50m rate(testcounter_reset_middle[60m])
|
||||
eval instant at 50m rate(testcounter_reset_middle[50m])
|
||||
{} 0.03
|
||||
|
||||
|
||||
|
@ -163,6 +172,26 @@ eval instant at 50m rate(testcounter_reset_middle[60m])
|
|||
eval instant at 50m rate(testcounter_reset_end[5m])
|
||||
{} 0
|
||||
|
||||
|
||||
# Zero cutoff for left-side extrapolation.
|
||||
eval instant at 10m rate(testcounter_zero_cutoff[20m])
|
||||
{start="0m"} 0.5
|
||||
{start="1m"} 0.55
|
||||
{start="2m"} 0.6
|
||||
{start="3m"} 0.65
|
||||
{start="4m"} 0.7
|
||||
{start="5m"} 0.6
|
||||
|
||||
# Normal half-interval cutoff for left-side extrapolation.
|
||||
eval instant at 50m rate(testcounter_zero_cutoff[20m])
|
||||
{start="0m"} 0.6
|
||||
{start="1m"} 0.6
|
||||
{start="2m"} 0.6
|
||||
{start="3m"} 0.6
|
||||
{start="4m"} 0.6
|
||||
{start="5m"} 0.6
|
||||
|
||||
|
||||
# count_scalar for a non-empty vector should return scalar element count.
|
||||
eval instant at 50m count_scalar(http_requests)
|
||||
8
|
||||
|
|
Loading…
Reference in New Issue