Add histogram_stdvar and histogram_stddev functions (#12614)

* Add new function: histogram_stdvar and histogram_stddev

Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
This commit is contained in:
zenador 2023-08-25 03:02:14 +08:00 committed by GitHub
parent 8ef7dfdeeb
commit 54aaa2bd7e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 308 additions and 4 deletions

View File

@ -145,7 +145,7 @@ delta(cpu_temp_celsius{host="zeus"}[2h])
```
`delta` acts on native histograms by calculating a new histogram where each
compononent (sum and count of observations, buckets) is the difference between
component (sum and count of observations, buckets) is the difference between
the respective component in the first and last native histogram in
`v`. However, each element in `v` that contains a mix of float and native
histogram samples within the range, will be missing from the result vector.
@ -323,6 +323,19 @@ a histogram.
You can use `histogram_quantile(1, v instant-vector)` to get the estimated maximum value stored in
a histogram.
## `histogram_stddev()` and `histogram_stdvar()`
_Both functions only act on native histograms, which are an experimental
feature. The behavior of these functions may change in future versions of
Prometheus, including their removal from PromQL._
`histogram_stddev(v instant-vector)` returns the estimated standard deviation
of observations in a native histogram, based on the geometric mean of the buckets
where the observations lie. Samples that are not native histograms are ignored and
do not show up in the returned vector.
Similarly, `histogram_stdvar(v instant-vector)` returns the estimated standard
variance of observations in a native histogram.
## `holt_winters()`
@ -495,7 +508,7 @@ rate(http_requests_total{job="api-server"}[5m])
```
`rate` acts on native histograms by calculating a new histogram where each
compononent (sum and count of observations, buckets) is the rate of increase
component (sum and count of observations, buckets) is the rate of increase
between the respective component in the first and last native histogram in
`v`. However, each element in `v` that contains a mix of float and native
histogram samples within the range, will be missing from the result vector.

View File

@ -159,7 +159,7 @@ func (h *FloatHistogram) Mul(factor float64) *FloatHistogram {
return h
}
// Div works like Scale but divides instead of multiplies.
// Div works like Mul but divides instead of multiplies.
// When dividing by 0, everything will be set to Inf.
func (h *FloatHistogram) Div(scalar float64) *FloatHistogram {
h.ZeroCount /= scalar

View File

@ -3312,6 +3312,165 @@ func TestNativeHistogram_HistogramCountAndSum(t *testing.T) {
}
}
func TestNativeHistogram_HistogramStdDevVar(t *testing.T) {
// TODO(codesome): Integrate histograms into the PromQL testing framework
// and write more tests there.
testCases := []struct {
name string
h *histogram.Histogram
stdVar float64
}{
{
name: "1, 2, 3, 4 low-res",
h: &histogram.Histogram{
Count: 4,
Sum: 10,
Schema: 2,
PositiveSpans: []histogram.Span{
{Offset: 0, Length: 1},
{Offset: 3, Length: 1},
{Offset: 2, Length: 2},
},
PositiveBuckets: []int64{1, 0, 0, 0},
},
stdVar: 1.163807968526718, // actual variance: 1.25
},
{
name: "1, 2, 3, 4 hi-res",
h: &histogram.Histogram{
Count: 4,
Sum: 10,
Schema: 8,
PositiveSpans: []histogram.Span{
{Offset: 0, Length: 1},
{Offset: 255, Length: 1},
{Offset: 149, Length: 1},
{Offset: 105, Length: 1},
},
PositiveBuckets: []int64{1, 0, 0, 0},
},
stdVar: 1.2471347737158793, // actual variance: 1.25
},
{
name: "-50, -8, 0, 3, 8, 9, 100",
h: &histogram.Histogram{
Count: 7,
ZeroCount: 1,
Sum: 62,
Schema: 3,
PositiveSpans: []histogram.Span{
{Offset: 13, Length: 1},
{Offset: 10, Length: 1},
{Offset: 1, Length: 1},
{Offset: 27, Length: 1},
},
PositiveBuckets: []int64{1, 0, 0, 0},
NegativeSpans: []histogram.Span{
{Offset: 24, Length: 1},
{Offset: 21, Length: 1},
},
NegativeBuckets: []int64{1, 0},
},
stdVar: 1544.8582535368798, // actual variance: 1738.4082
},
{
name: "-50, -8, 0, 3, 8, 9, 100, NaN",
h: &histogram.Histogram{
Count: 8,
ZeroCount: 1,
Sum: math.NaN(),
Schema: 3,
PositiveSpans: []histogram.Span{
{Offset: 13, Length: 1},
{Offset: 10, Length: 1},
{Offset: 1, Length: 1},
{Offset: 27, Length: 1},
},
PositiveBuckets: []int64{1, 0, 0, 0},
NegativeSpans: []histogram.Span{
{Offset: 24, Length: 1},
{Offset: 21, Length: 1},
},
NegativeBuckets: []int64{1, 0},
},
stdVar: math.NaN(),
},
{
name: "-50, -8, 0, 3, 8, 9, 100, +Inf",
h: &histogram.Histogram{
Count: 8,
ZeroCount: 1,
Sum: math.Inf(1),
Schema: 3,
PositiveSpans: []histogram.Span{
{Offset: 13, Length: 1},
{Offset: 10, Length: 1},
{Offset: 1, Length: 1},
{Offset: 27, Length: 1},
},
PositiveBuckets: []int64{1, 0, 0, 0},
NegativeSpans: []histogram.Span{
{Offset: 24, Length: 1},
{Offset: 21, Length: 1},
},
NegativeBuckets: []int64{1, 0},
},
stdVar: math.NaN(),
},
}
for _, tc := range testCases {
for _, floatHisto := range []bool{true, false} {
t.Run(fmt.Sprintf("%s floatHistogram=%t", tc.name, floatHisto), func(t *testing.T) {
engine := newTestEngine()
storage := teststorage.New(t)
t.Cleanup(func() { storage.Close() })
seriesName := "sparse_histogram_series"
lbls := labels.FromStrings("__name__", seriesName)
ts := int64(10 * time.Minute / time.Millisecond)
app := storage.Appender(context.Background())
var err error
if floatHisto {
_, err = app.AppendHistogram(0, lbls, ts, nil, tc.h.ToFloat())
} else {
_, err = app.AppendHistogram(0, lbls, ts, tc.h, nil)
}
require.NoError(t, err)
require.NoError(t, app.Commit())
queryString := fmt.Sprintf("histogram_stdvar(%s)", seriesName)
qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts))
require.NoError(t, err)
res := qry.Exec(context.Background())
require.NoError(t, res.Err)
vector, err := res.Vector()
require.NoError(t, err)
require.Len(t, vector, 1)
require.Nil(t, vector[0].H)
require.InEpsilon(t, tc.stdVar, vector[0].F, 1e-12)
queryString = fmt.Sprintf("histogram_stddev(%s)", seriesName)
qry, err = engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts))
require.NoError(t, err)
res = qry.Exec(context.Background())
require.NoError(t, res.Err)
vector, err = res.Vector()
require.NoError(t, err)
require.Len(t, vector, 1)
require.Nil(t, vector[0].H)
require.InEpsilon(t, math.Sqrt(tc.stdVar), vector[0].F, 1e-12)
})
}
}
}
func TestNativeHistogram_HistogramQuantile(t *testing.T) {
// TODO(codesome): Integrate histograms into the PromQL testing framework
// and write more tests there.

View File

@ -996,6 +996,72 @@ func funcHistogramSum(vals []parser.Value, args parser.Expressions, enh *EvalNod
return enh.Out
}
// === histogram_stddev(Vector parser.ValueTypeVector) Vector ===
func funcHistogramStdDev(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) Vector {
inVec := vals[0].(Vector)
for _, sample := range inVec {
// Skip non-histogram samples.
if sample.H == nil {
continue
}
mean := sample.H.Sum / sample.H.Count
var variance, cVariance float64
it := sample.H.AllBucketIterator()
for it.Next() {
bucket := it.At()
var val float64
if bucket.Lower <= 0 && 0 <= bucket.Upper {
val = 0
} else {
val = math.Sqrt(bucket.Upper * bucket.Lower)
}
delta := val - mean
variance, cVariance = kahanSumInc(bucket.Count*delta*delta, variance, cVariance)
}
variance += cVariance
variance /= sample.H.Count
enh.Out = append(enh.Out, Sample{
Metric: enh.DropMetricName(sample.Metric),
F: math.Sqrt(variance),
})
}
return enh.Out
}
// === histogram_stdvar(Vector parser.ValueTypeVector) Vector ===
func funcHistogramStdVar(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) Vector {
inVec := vals[0].(Vector)
for _, sample := range inVec {
// Skip non-histogram samples.
if sample.H == nil {
continue
}
mean := sample.H.Sum / sample.H.Count
var variance, cVariance float64
it := sample.H.AllBucketIterator()
for it.Next() {
bucket := it.At()
var val float64
if bucket.Lower <= 0 && 0 <= bucket.Upper {
val = 0
} else {
val = math.Sqrt(bucket.Upper * bucket.Lower)
}
delta := val - mean
variance, cVariance = kahanSumInc(bucket.Count*delta*delta, variance, cVariance)
}
variance += cVariance
variance /= sample.H.Count
enh.Out = append(enh.Out, Sample{
Metric: enh.DropMetricName(sample.Metric),
F: variance,
})
}
return enh.Out
}
// === histogram_fraction(lower, upper parser.ValueTypeScalar, Vector parser.ValueTypeVector) Vector ===
func funcHistogramFraction(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) Vector {
lower := vals[0].(Vector)[0].F
@ -1377,6 +1443,8 @@ var FunctionCalls = map[string]FunctionCall{
"histogram_fraction": funcHistogramFraction,
"histogram_quantile": funcHistogramQuantile,
"histogram_sum": funcHistogramSum,
"histogram_stddev": funcHistogramStdDev,
"histogram_stdvar": funcHistogramStdVar,
"holt_winters": funcHoltWinters,
"hour": funcHour,
"idelta": funcIdelta,

View File

@ -173,6 +173,16 @@ var Functions = map[string]*Function{
ArgTypes: []ValueType{ValueTypeVector},
ReturnType: ValueTypeVector,
},
"histogram_stddev": {
Name: "histogram_stddev",
ArgTypes: []ValueType{ValueTypeVector},
ReturnType: ValueTypeVector,
},
"histogram_stdvar": {
Name: "histogram_stdvar",
ArgTypes: []ValueType{ValueTypeVector},
ReturnType: ValueTypeVector,
},
"histogram_fraction": {
Name: "histogram_fraction",
ArgTypes: []ValueType{ValueTypeScalar, ValueTypeScalar, ValueTypeVector},

View File

@ -239,6 +239,18 @@ export const functionIdentifierTerms = [
info: 'Return the sum of observations from a native histogram (experimental feature)',
type: 'function',
},
{
label: 'histogram_stddev',
detail: 'function',
info: 'Estimate the standard deviation of observations from a native histogram (experimental feature)',
type: 'function',
},
{
label: 'histogram_stdvar',
detail: 'function',
info: 'Estimate the standard variance of observations from a native histogram (experimental feature)',
type: 'function',
},
{
label: 'holt_winters',
detail: 'function',
@ -430,7 +442,7 @@ export const functionIdentifierTerms = [
{
label: 'stdvar_over_time',
detail: 'function',
info: 'Calculate the standard variation within input series over time',
info: 'Calculate the standard variance within input series over time',
type: 'function',
},
{

View File

@ -752,6 +752,30 @@ describe('promql operations', () => {
expectedValueType: ValueType.vector,
expectedDiag: [],
},
{
expr:
'histogram_stddev( # Root of the query, final result, returns the standard deviation of observations.\n' +
' sum by(method, path) ( # Argument to histogram_stddev(), an aggregated histogram.\n' +
' rate( # Argument to sum(), the per-second increase of a histogram over 5m.\n' +
' demo_api_request_duration_seconds{job="demo"}[5m] # Argument to rate(), a vector of sparse histogram series over the last 5m.\n' +
' )\n' +
' )\n' +
')',
expectedValueType: ValueType.vector,
expectedDiag: [],
},
{
expr:
'histogram_stdvar( # Root of the query, final result, returns the standard variance of observations.\n' +
' sum by(method, path) ( # Argument to histogram_stdvar(), an aggregated histogram.\n' +
' rate( # Argument to sum(), the per-second increase of a histogram over 5m.\n' +
' demo_api_request_duration_seconds{job="demo"}[5m] # Argument to rate(), a vector of sparse histogram series over the last 5m.\n' +
' )\n' +
' )\n' +
')',
expectedValueType: ValueType.vector,
expectedDiag: [],
},
{
expr: '1 @ start()',
expectedValueType: ValueType.scalar,

View File

@ -42,6 +42,8 @@ import {
HistogramCount,
HistogramFraction,
HistogramQuantile,
HistogramStdDev,
HistogramStdVar,
HistogramSum,
HoltWinters,
Hour,
@ -282,6 +284,18 @@ const promqlFunctions: { [key: number]: PromQLFunction } = {
variadic: 0,
returnType: ValueType.vector,
},
[HistogramStdDev]: {
name: 'histogram_stddev',
argTypes: [ValueType.vector],
variadic: 0,
returnType: ValueType.vector,
},
[HistogramStdVar]: {
name: 'histogram_stdvar',
argTypes: [ValueType.vector],
variadic: 0,
returnType: ValueType.vector,
},
[HistogramSum]: {
name: 'histogram_sum',
argTypes: [ValueType.vector],

View File

@ -135,6 +135,8 @@ FunctionIdentifier {
HistogramCount |
HistogramFraction |
HistogramQuantile |
HistogramStdDev |
HistogramStdVar |
HistogramSum |
HoltWinters |
Hour |
@ -362,6 +364,8 @@ NumberLiteral {
HistogramCount { condFn<"histogram_count"> }
HistogramFraction { condFn<"histogram_fraction"> }
HistogramQuantile { condFn<"histogram_quantile"> }
HistogramStdDev { condFn<"histogram_stddev"> }
HistogramStdVar { condFn<"histogram_stdvar"> }
HistogramSum { condFn<"histogram_sum"> }
HoltWinters { condFn<"holt_winters"> }
Hour { condFn<"hour"> }