Merge pull request #12525 from fatsheep9146/native-histogram-min-max

* Add function for iterating through all buckets in reverse to find max bucket

Signed-off-by: Carrie Edwards <edwrdscarrie@gmail.com>

* enhance histogram_quantile to get min/max value

Signed-off-by: Ziqi Zhao <zhaoziqi9146@gmail.com>

---------

Signed-off-by: Carrie Edwards <edwrdscarrie@gmail.com>
Signed-off-by: Ziqi Zhao <zhaoziqi9146@gmail.com>
Co-authored-by: Carrie Edwards <edwrdscarrie@gmail.com>
This commit is contained in:
Björn Rabenstein 2023-07-12 14:39:57 +02:00 committed by GitHub
commit 5da638d527
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 287 additions and 15 deletions

View File

@ -317,6 +317,12 @@ bound of that bucket is greater than
bucket. Otherwise, the upper bound of the lowest bucket is returned for
quantiles located in the lowest bucket.
You can use `histogram_quantile(0, v instant-vector)` to get the estimated minimum value stored in
a histogram.
You can use `histogram_quantile(1, v instant-vector)` to get the estimated maximum value stored in
a histogram.
## `holt_winters()`

View File

@ -615,10 +615,24 @@ func (h *FloatHistogram) NegativeReverseBucketIterator() BucketIterator[float64]
// set to the zero threshold.
func (h *FloatHistogram) AllBucketIterator() BucketIterator[float64] {
return &allFloatBucketIterator{
h: h,
negIter: h.NegativeReverseBucketIterator(),
posIter: h.PositiveBucketIterator(),
state: -1,
h: h,
leftIter: h.NegativeReverseBucketIterator(),
rightIter: h.PositiveBucketIterator(),
state: -1,
}
}
// AllReverseBucketIterator returns a BucketIterator to iterate over all negative,
// zero, and positive buckets in descending order (starting at the lowest bucket
// and going up). If the highest negative bucket or the lowest positive bucket
// overlap with the zero bucket, their upper or lower boundary, respectively, is
// set to the zero threshold.
func (h *FloatHistogram) AllReverseBucketIterator() BucketIterator[float64] {
return &allFloatBucketIterator{
h: h,
leftIter: h.PositiveReverseBucketIterator(),
rightIter: h.NegativeBucketIterator(),
state: -1,
}
}
@ -903,8 +917,8 @@ func (i *reverseFloatBucketIterator) Next() bool {
}
type allFloatBucketIterator struct {
h *FloatHistogram
negIter, posIter BucketIterator[float64]
h *FloatHistogram
leftIter, rightIter BucketIterator[float64]
// -1 means we are iterating negative buckets.
// 0 means it is time for the zero bucket.
// 1 means we are iterating positive buckets.
@ -916,10 +930,13 @@ type allFloatBucketIterator struct {
func (i *allFloatBucketIterator) Next() bool {
switch i.state {
case -1:
if i.negIter.Next() {
i.currBucket = i.negIter.At()
if i.currBucket.Upper > -i.h.ZeroThreshold {
if i.leftIter.Next() {
i.currBucket = i.leftIter.At()
switch {
case i.currBucket.Upper < 0 && i.currBucket.Upper > -i.h.ZeroThreshold:
i.currBucket.Upper = -i.h.ZeroThreshold
case i.currBucket.Lower > 0 && i.currBucket.Lower < i.h.ZeroThreshold:
i.currBucket.Lower = i.h.ZeroThreshold
}
return true
}
@ -940,10 +957,13 @@ func (i *allFloatBucketIterator) Next() bool {
}
return i.Next()
case 1:
if i.posIter.Next() {
i.currBucket = i.posIter.At()
if i.currBucket.Lower < i.h.ZeroThreshold {
if i.rightIter.Next() {
i.currBucket = i.rightIter.At()
switch {
case i.currBucket.Lower > 0 && i.currBucket.Lower < i.h.ZeroThreshold:
i.currBucket.Lower = i.h.ZeroThreshold
case i.currBucket.Upper < 0 && i.currBucket.Upper > -i.h.ZeroThreshold:
i.currBucket.Upper = -i.h.ZeroThreshold
}
return true
}

View File

@ -1979,3 +1979,229 @@ func TestAllFloatBucketIterator(t *testing.T) {
})
}
}
func TestAllReverseFloatBucketIterator(t *testing.T) {
cases := []struct {
h FloatHistogram
// To determine the expected buckets.
includeNeg, includeZero, includePos bool
}{
{
h: FloatHistogram{
Count: 405,
ZeroCount: 102,
ZeroThreshold: 0.001,
Sum: 1008.4,
Schema: 1,
PositiveSpans: []Span{
{Offset: 0, Length: 4},
{Offset: 1, Length: 0},
{Offset: 3, Length: 3},
{Offset: 3, Length: 0},
{Offset: 2, Length: 0},
{Offset: 5, Length: 3},
},
PositiveBuckets: []float64{100, 344, 123, 55, 3, 63, 2, 54, 235, 33},
NegativeSpans: []Span{
{Offset: 0, Length: 3},
{Offset: 1, Length: 0},
{Offset: 3, Length: 0},
{Offset: 3, Length: 4},
{Offset: 2, Length: 0},
{Offset: 5, Length: 3},
},
NegativeBuckets: []float64{10, 34, 1230, 54, 67, 63, 2, 554, 235, 33},
},
includeNeg: true,
includeZero: true,
includePos: true,
},
{
h: FloatHistogram{
Count: 405,
ZeroCount: 102,
ZeroThreshold: 0.001,
Sum: 1008.4,
Schema: 1,
NegativeSpans: []Span{
{Offset: 0, Length: 3},
{Offset: 1, Length: 0},
{Offset: 3, Length: 0},
{Offset: 3, Length: 4},
{Offset: 2, Length: 0},
{Offset: 5, Length: 3},
},
NegativeBuckets: []float64{10, 34, 1230, 54, 67, 63, 2, 554, 235, 33},
},
includeNeg: true,
includeZero: true,
includePos: false,
},
{
h: FloatHistogram{
Count: 405,
ZeroCount: 102,
ZeroThreshold: 0.001,
Sum: 1008.4,
Schema: 1,
PositiveSpans: []Span{
{Offset: 0, Length: 4},
{Offset: 1, Length: 0},
{Offset: 3, Length: 3},
{Offset: 3, Length: 0},
{Offset: 2, Length: 0},
{Offset: 5, Length: 3},
},
PositiveBuckets: []float64{100, 344, 123, 55, 3, 63, 2, 54, 235, 33},
},
includeNeg: false,
includeZero: true,
includePos: true,
},
{
h: FloatHistogram{
Count: 405,
ZeroCount: 102,
ZeroThreshold: 0.001,
Sum: 1008.4,
Schema: 1,
},
includeNeg: false,
includeZero: true,
includePos: false,
},
{
h: FloatHistogram{
Count: 405,
ZeroCount: 0,
ZeroThreshold: 0.001,
Sum: 1008.4,
Schema: 1,
PositiveSpans: []Span{
{Offset: 0, Length: 4},
{Offset: 1, Length: 0},
{Offset: 3, Length: 3},
{Offset: 3, Length: 0},
{Offset: 2, Length: 0},
{Offset: 5, Length: 3},
},
PositiveBuckets: []float64{100, 344, 123, 55, 3, 63, 2, 54, 235, 33},
NegativeSpans: []Span{
{Offset: 0, Length: 3},
{Offset: 1, Length: 0},
{Offset: 3, Length: 0},
{Offset: 3, Length: 4},
{Offset: 2, Length: 0},
{Offset: 5, Length: 3},
},
NegativeBuckets: []float64{10, 34, 1230, 54, 67, 63, 2, 554, 235, 33},
},
includeNeg: true,
includeZero: false,
includePos: true,
},
{
h: FloatHistogram{
Count: 447,
ZeroCount: 42,
ZeroThreshold: 0.5, // Coinciding with bucket boundary.
Sum: 1008.4,
Schema: 0,
PositiveSpans: []Span{
{Offset: 0, Length: 4},
{Offset: 1, Length: 0},
{Offset: 3, Length: 3},
{Offset: 3, Length: 0},
{Offset: 2, Length: 0},
{Offset: 5, Length: 3},
},
PositiveBuckets: []float64{100, 344, 123, 55, 3, 63, 2, 54, 235, 33},
NegativeSpans: []Span{
{Offset: 0, Length: 3},
{Offset: 1, Length: 0},
{Offset: 3, Length: 0},
{Offset: 3, Length: 4},
{Offset: 2, Length: 0},
{Offset: 5, Length: 3},
},
NegativeBuckets: []float64{10, 34, 1230, 54, 67, 63, 2, 554, 235, 33},
},
includeNeg: true,
includeZero: true,
includePos: true,
},
{
h: FloatHistogram{
Count: 447,
ZeroCount: 42,
ZeroThreshold: 0.6, // Within the bucket closest to zero.
Sum: 1008.4,
Schema: 0,
PositiveSpans: []Span{
{Offset: 0, Length: 4},
{Offset: 1, Length: 0},
{Offset: 3, Length: 3},
{Offset: 3, Length: 0},
{Offset: 2, Length: 0},
{Offset: 5, Length: 3},
},
PositiveBuckets: []float64{100, 344, 123, 55, 3, 63, 2, 54, 235, 33},
NegativeSpans: []Span{
{Offset: 0, Length: 3},
{Offset: 1, Length: 0},
{Offset: 3, Length: 0},
{Offset: 3, Length: 4},
{Offset: 2, Length: 0},
{Offset: 5, Length: 3},
},
NegativeBuckets: []float64{10, 34, 1230, 54, 67, 63, 2, 554, 235, 33},
},
includeNeg: true,
includeZero: true,
includePos: true,
},
}
for i, c := range cases {
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
var expBuckets, actBuckets []Bucket[float64]
if c.includePos {
it := c.h.PositiveReverseBucketIterator()
for it.Next() {
b := it.At()
if c.includeZero && b.Lower < c.h.ZeroThreshold {
b.Lower = c.h.ZeroThreshold
}
expBuckets = append(expBuckets, b)
}
}
if c.includeZero {
expBuckets = append(expBuckets, Bucket[float64]{
Lower: -c.h.ZeroThreshold,
Upper: c.h.ZeroThreshold,
LowerInclusive: true,
UpperInclusive: true,
Count: c.h.ZeroCount,
})
}
if c.includeNeg {
it := c.h.NegativeBucketIterator()
for it.Next() {
b := it.At()
if c.includeZero && b.Upper > -c.h.ZeroThreshold {
b.Upper = -c.h.ZeroThreshold
}
expBuckets = append(expBuckets, b)
}
}
it := c.h.AllReverseBucketIterator()
for it.Next() {
actBuckets = append(actBuckets, it.At())
}
require.Equal(t, expBuckets, actBuckets)
})
}
}

View File

@ -158,9 +158,21 @@ func histogramQuantile(q float64, h *histogram.FloatHistogram) float64 {
var (
bucket histogram.Bucket[float64]
count float64
it = h.AllBucketIterator()
rank = q * h.Count
it histogram.BucketIterator[float64]
rank float64
)
// if there are NaN observations in the histogram (h.Sum is NaN), use the forward iterator
// if the q < 0.5, use the forward iterator
// if the q >= 0.5, use the reverse iterator
if math.IsNaN(h.Sum) || q < 0.5 {
it = h.AllBucketIterator()
rank = q * h.Count
} else {
it = h.AllReverseBucketIterator()
rank = (1 - q) * h.Count
}
for it.Next() {
bucket = it.At()
count += bucket.Count
@ -193,7 +205,15 @@ func histogramQuantile(q float64, h *histogram.FloatHistogram) float64 {
return bucket.Upper
}
rank -= count - bucket.Count
// if there are NaN observations in the histogram (h.Sum is NaN), use the forward iterator
// if the q < 0.5, use the forward iterator
// if the q >= 0.5, use the reverse iterator
if math.IsNaN(h.Sum) || q < 0.5 {
rank -= count - bucket.Count
} else {
rank = count - rank
}
// TODO(codesome): Use a better estimation than linear.
return bucket.Lower + (bucket.Upper-bucket.Lower)*(rank/bucket.Count)
}