histograms: Add Compact method to the normal integer Histogram
And use the new method to call to compact Histograms during
parsing. This happens for both `Histogram` and `FloatHistogram`. In
this way, if targets decide to optimize the exposition size by merging
spans with empty buckets in between, we still get a normalized
results. It will also normalize away any valid but weird
representations like empty spans, spans with offset zero, and empty
buckets at the start or end of a span.
The implementation seemed easy at first as it just turns the
`compactBuckets` helper into a generic function (which now got its own
file). However, the integer Histograms have delta buckets instead of
absolute buckets, which had to be treated specially in the generic
`compactBuckets` function. To make sure it works, I have added plenty
of explicit tests for `Histogram` in addition to the `FloatHistogram`
tests.
I have also updated the doc comment for the `Compact` method.
Based on the insights now expressed in the doc comment, compacting
with a maxEmptyBuckets > 0 is rarely useful. Therefore, this commit
also sets the value to 0 in the two cases we were using 3 so far. We
might still want to reconsider, so I don't want to remove the
maxEmptyBuckets parameter right now.
Signed-off-by: beorn7 <beorn@grafana.com>
2022-09-27 11:04:16 +00:00
|
|
|
// Copyright 2022 The Prometheus Authors
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package histogram
|
|
|
|
|
2022-10-03 11:15:27 +00:00
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"strings"
|
|
|
|
)
|
|
|
|
|
|
|
|
// BucketCount is a type constraint for the count in a bucket, which can be
|
|
|
|
// float64 (for type FloatHistogram) or uint64 (for type Histogram).
|
|
|
|
type BucketCount interface {
|
|
|
|
float64 | uint64
|
|
|
|
}
|
|
|
|
|
|
|
|
// internalBucketCount is used internally by Histogram and FloatHistogram. The
|
|
|
|
// difference to the BucketCount above is that Histogram internally uses deltas
|
|
|
|
// between buckets rather than absolute counts (while FloatHistogram uses
|
|
|
|
// absolute counts directly). Go type parameters don't allow type
|
|
|
|
// specialization. Therefore, where special treatment of deltas between buckets
|
|
|
|
// vs. absolute counts is important, this information has to be provided as a
|
|
|
|
// separate boolean parameter "deltaBuckets"
|
|
|
|
type internalBucketCount interface {
|
|
|
|
float64 | int64
|
|
|
|
}
|
|
|
|
|
|
|
|
// Bucket represents a bucket with lower and upper limit and the absolute count
|
|
|
|
// of samples in the bucket. It also specifies if each limit is inclusive or
|
|
|
|
// not. (Mathematically, inclusive limits create a closed interval, and
|
|
|
|
// non-inclusive limits an open interval.)
|
|
|
|
//
|
|
|
|
// To represent cumulative buckets, Lower is set to -Inf, and the Count is then
|
|
|
|
// cumulative (including the counts of all buckets for smaller values).
|
|
|
|
type Bucket[BC BucketCount] struct {
|
|
|
|
Lower, Upper float64
|
|
|
|
LowerInclusive, UpperInclusive bool
|
|
|
|
Count BC
|
|
|
|
|
|
|
|
// Index within schema. To easily compare buckets that share the same
|
|
|
|
// schema and sign (positive or negative). Irrelevant for the zero bucket.
|
|
|
|
Index int32
|
|
|
|
}
|
|
|
|
|
|
|
|
// String returns a string representation of a Bucket, using the usual
|
|
|
|
// mathematical notation of '['/']' for inclusive bounds and '('/')' for
|
|
|
|
// non-inclusive bounds.
|
|
|
|
func (b Bucket[BC]) String() string {
|
|
|
|
var sb strings.Builder
|
|
|
|
if b.LowerInclusive {
|
|
|
|
sb.WriteRune('[')
|
|
|
|
} else {
|
|
|
|
sb.WriteRune('(')
|
|
|
|
}
|
|
|
|
fmt.Fprintf(&sb, "%g,%g", b.Lower, b.Upper)
|
|
|
|
if b.UpperInclusive {
|
|
|
|
sb.WriteRune(']')
|
|
|
|
} else {
|
|
|
|
sb.WriteRune(')')
|
|
|
|
}
|
|
|
|
fmt.Fprintf(&sb, ":%v", b.Count)
|
|
|
|
return sb.String()
|
|
|
|
}
|
|
|
|
|
|
|
|
// BucketIterator iterates over the buckets of a Histogram, returning decoded
|
|
|
|
// buckets.
|
|
|
|
type BucketIterator[BC BucketCount] interface {
|
|
|
|
// Next advances the iterator by one.
|
|
|
|
Next() bool
|
|
|
|
// At returns the current bucket.
|
|
|
|
At() Bucket[BC]
|
|
|
|
}
|
|
|
|
|
|
|
|
// baseBucketIterator provides a struct that is shared by most BucketIterator
|
|
|
|
// implementations, together with an implementation of the At method. This
|
|
|
|
// iterator can be embedded in full implementations of BucketIterator to save on
|
|
|
|
// code replication.
|
|
|
|
type baseBucketIterator[BC BucketCount, IBC internalBucketCount] struct {
|
|
|
|
schema int32
|
|
|
|
spans []Span
|
|
|
|
buckets []IBC
|
|
|
|
|
|
|
|
positive bool // Whether this is for positive buckets.
|
|
|
|
|
|
|
|
spansIdx int // Current span within spans slice.
|
|
|
|
idxInSpan uint32 // Index in the current span. 0 <= idxInSpan < span.Length.
|
|
|
|
bucketsIdx int // Current bucket within buckets slice.
|
|
|
|
|
|
|
|
currCount IBC // Count in the current bucket.
|
|
|
|
currIdx int32 // The actual bucket index.
|
|
|
|
}
|
|
|
|
|
|
|
|
func (b baseBucketIterator[BC, IBC]) At() Bucket[BC] {
|
|
|
|
bucket := Bucket[BC]{
|
|
|
|
Count: BC(b.currCount),
|
|
|
|
Index: b.currIdx,
|
|
|
|
}
|
|
|
|
if b.positive {
|
|
|
|
bucket.Upper = getBound(b.currIdx, b.schema)
|
|
|
|
bucket.Lower = getBound(b.currIdx-1, b.schema)
|
|
|
|
} else {
|
|
|
|
bucket.Lower = -getBound(b.currIdx, b.schema)
|
|
|
|
bucket.Upper = -getBound(b.currIdx-1, b.schema)
|
|
|
|
}
|
|
|
|
bucket.LowerInclusive = bucket.Lower < 0
|
|
|
|
bucket.UpperInclusive = bucket.Upper > 0
|
|
|
|
return bucket
|
|
|
|
}
|
|
|
|
|
histograms: Add Compact method to the normal integer Histogram
And use the new method to call to compact Histograms during
parsing. This happens for both `Histogram` and `FloatHistogram`. In
this way, if targets decide to optimize the exposition size by merging
spans with empty buckets in between, we still get a normalized
results. It will also normalize away any valid but weird
representations like empty spans, spans with offset zero, and empty
buckets at the start or end of a span.
The implementation seemed easy at first as it just turns the
`compactBuckets` helper into a generic function (which now got its own
file). However, the integer Histograms have delta buckets instead of
absolute buckets, which had to be treated specially in the generic
`compactBuckets` function. To make sure it works, I have added plenty
of explicit tests for `Histogram` in addition to the `FloatHistogram`
tests.
I have also updated the doc comment for the `Compact` method.
Based on the insights now expressed in the doc comment, compacting
with a maxEmptyBuckets > 0 is rarely useful. Therefore, this commit
also sets the value to 0 in the two cases we were using 3 so far. We
might still want to reconsider, so I don't want to remove the
maxEmptyBuckets parameter right now.
Signed-off-by: beorn7 <beorn@grafana.com>
2022-09-27 11:04:16 +00:00
|
|
|
// compactBuckets is a generic function used by both Histogram.Compact and
|
|
|
|
// FloatHistogram.Compact. Set deltaBuckets to true if the provided buckets are
|
|
|
|
// deltas. Set it to false if the buckets contain absolute counts.
|
2022-10-03 11:15:27 +00:00
|
|
|
func compactBuckets[IBC internalBucketCount](buckets []IBC, spans []Span, maxEmptyBuckets int, deltaBuckets bool) ([]IBC, []Span) {
|
histograms: Add Compact method to the normal integer Histogram
And use the new method to call to compact Histograms during
parsing. This happens for both `Histogram` and `FloatHistogram`. In
this way, if targets decide to optimize the exposition size by merging
spans with empty buckets in between, we still get a normalized
results. It will also normalize away any valid but weird
representations like empty spans, spans with offset zero, and empty
buckets at the start or end of a span.
The implementation seemed easy at first as it just turns the
`compactBuckets` helper into a generic function (which now got its own
file). However, the integer Histograms have delta buckets instead of
absolute buckets, which had to be treated specially in the generic
`compactBuckets` function. To make sure it works, I have added plenty
of explicit tests for `Histogram` in addition to the `FloatHistogram`
tests.
I have also updated the doc comment for the `Compact` method.
Based on the insights now expressed in the doc comment, compacting
with a maxEmptyBuckets > 0 is rarely useful. Therefore, this commit
also sets the value to 0 in the two cases we were using 3 so far. We
might still want to reconsider, so I don't want to remove the
maxEmptyBuckets parameter right now.
Signed-off-by: beorn7 <beorn@grafana.com>
2022-09-27 11:04:16 +00:00
|
|
|
// Fast path: If there are no empty buckets AND no offset in any span is
|
|
|
|
// <= maxEmptyBuckets AND no span has length 0, there is nothing to do and we can return
|
|
|
|
// immediately. We check that first because it's cheap and presumably
|
|
|
|
// common.
|
|
|
|
nothingToDo := true
|
2022-10-03 11:15:27 +00:00
|
|
|
var currentBucketAbsolute IBC
|
histograms: Add Compact method to the normal integer Histogram
And use the new method to call to compact Histograms during
parsing. This happens for both `Histogram` and `FloatHistogram`. In
this way, if targets decide to optimize the exposition size by merging
spans with empty buckets in between, we still get a normalized
results. It will also normalize away any valid but weird
representations like empty spans, spans with offset zero, and empty
buckets at the start or end of a span.
The implementation seemed easy at first as it just turns the
`compactBuckets` helper into a generic function (which now got its own
file). However, the integer Histograms have delta buckets instead of
absolute buckets, which had to be treated specially in the generic
`compactBuckets` function. To make sure it works, I have added plenty
of explicit tests for `Histogram` in addition to the `FloatHistogram`
tests.
I have also updated the doc comment for the `Compact` method.
Based on the insights now expressed in the doc comment, compacting
with a maxEmptyBuckets > 0 is rarely useful. Therefore, this commit
also sets the value to 0 in the two cases we were using 3 so far. We
might still want to reconsider, so I don't want to remove the
maxEmptyBuckets parameter right now.
Signed-off-by: beorn7 <beorn@grafana.com>
2022-09-27 11:04:16 +00:00
|
|
|
for _, bucket := range buckets {
|
|
|
|
if deltaBuckets {
|
|
|
|
currentBucketAbsolute += bucket
|
|
|
|
} else {
|
|
|
|
currentBucketAbsolute = bucket
|
|
|
|
}
|
|
|
|
if currentBucketAbsolute == 0 {
|
|
|
|
nothingToDo = false
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if nothingToDo {
|
|
|
|
for _, span := range spans {
|
|
|
|
if int(span.Offset) <= maxEmptyBuckets || span.Length == 0 {
|
|
|
|
nothingToDo = false
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if nothingToDo {
|
|
|
|
return buckets, spans
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
var iBucket, iSpan int
|
|
|
|
var posInSpan uint32
|
|
|
|
currentBucketAbsolute = 0
|
|
|
|
|
|
|
|
// Helper function.
|
|
|
|
emptyBucketsHere := func() int {
|
|
|
|
i := 0
|
|
|
|
abs := currentBucketAbsolute
|
|
|
|
for uint32(i)+posInSpan < spans[iSpan].Length && abs == 0 {
|
|
|
|
i++
|
|
|
|
if i+iBucket >= len(buckets) {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
abs = buckets[i+iBucket]
|
|
|
|
}
|
|
|
|
return i
|
|
|
|
}
|
|
|
|
|
|
|
|
// Merge spans with zero-offset to avoid special cases later.
|
|
|
|
if len(spans) > 1 {
|
|
|
|
for i, span := range spans[1:] {
|
|
|
|
if span.Offset == 0 {
|
|
|
|
spans[iSpan].Length += span.Length
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
iSpan++
|
|
|
|
if i+1 != iSpan {
|
|
|
|
spans[iSpan] = span
|
|
|
|
}
|
|
|
|
}
|
|
|
|
spans = spans[:iSpan+1]
|
|
|
|
iSpan = 0
|
|
|
|
}
|
|
|
|
|
|
|
|
// Merge spans with zero-length to avoid special cases later.
|
|
|
|
for i, span := range spans {
|
|
|
|
if span.Length == 0 {
|
|
|
|
if i+1 < len(spans) {
|
|
|
|
spans[i+1].Offset += span.Offset
|
|
|
|
}
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if i != iSpan {
|
|
|
|
spans[iSpan] = span
|
|
|
|
}
|
|
|
|
iSpan++
|
|
|
|
}
|
|
|
|
spans = spans[:iSpan]
|
|
|
|
iSpan = 0
|
|
|
|
|
|
|
|
// Cut out empty buckets from start and end of spans, no matter
|
|
|
|
// what. Also cut out empty buckets from the middle of a span but only
|
|
|
|
// if there are more than maxEmptyBuckets consecutive empty buckets.
|
|
|
|
for iBucket < len(buckets) {
|
|
|
|
if deltaBuckets {
|
|
|
|
currentBucketAbsolute += buckets[iBucket]
|
|
|
|
} else {
|
|
|
|
currentBucketAbsolute = buckets[iBucket]
|
|
|
|
}
|
|
|
|
if nEmpty := emptyBucketsHere(); nEmpty > 0 {
|
|
|
|
if posInSpan > 0 &&
|
|
|
|
nEmpty < int(spans[iSpan].Length-posInSpan) &&
|
|
|
|
nEmpty <= maxEmptyBuckets {
|
|
|
|
// The empty buckets are in the middle of a
|
|
|
|
// span, and there are few enough to not bother.
|
|
|
|
// Just fast-forward.
|
|
|
|
iBucket += nEmpty
|
|
|
|
if deltaBuckets {
|
|
|
|
currentBucketAbsolute = 0
|
|
|
|
}
|
|
|
|
posInSpan += uint32(nEmpty)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
// In all other cases, we cut out the empty buckets.
|
|
|
|
if deltaBuckets && iBucket+nEmpty < len(buckets) {
|
|
|
|
currentBucketAbsolute = -buckets[iBucket]
|
|
|
|
buckets[iBucket+nEmpty] += buckets[iBucket]
|
|
|
|
}
|
|
|
|
buckets = append(buckets[:iBucket], buckets[iBucket+nEmpty:]...)
|
|
|
|
if posInSpan == 0 {
|
|
|
|
// Start of span.
|
|
|
|
if nEmpty == int(spans[iSpan].Length) {
|
|
|
|
// The whole span is empty.
|
|
|
|
offset := spans[iSpan].Offset
|
|
|
|
spans = append(spans[:iSpan], spans[iSpan+1:]...)
|
|
|
|
if len(spans) > iSpan {
|
|
|
|
spans[iSpan].Offset += offset + int32(nEmpty)
|
|
|
|
}
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
spans[iSpan].Length -= uint32(nEmpty)
|
|
|
|
spans[iSpan].Offset += int32(nEmpty)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
// It's in the middle or in the end of the span.
|
|
|
|
// Split the current span.
|
|
|
|
newSpan := Span{
|
|
|
|
Offset: int32(nEmpty),
|
|
|
|
Length: spans[iSpan].Length - posInSpan - uint32(nEmpty),
|
|
|
|
}
|
|
|
|
spans[iSpan].Length = posInSpan
|
|
|
|
// In any case, we have to split to the next span.
|
|
|
|
iSpan++
|
|
|
|
posInSpan = 0
|
|
|
|
if newSpan.Length == 0 {
|
|
|
|
// The span is empty, so we were already at the end of a span.
|
|
|
|
// We don't have to insert the new span, just adjust the next
|
|
|
|
// span's offset, if there is one.
|
|
|
|
if iSpan < len(spans) {
|
|
|
|
spans[iSpan].Offset += int32(nEmpty)
|
|
|
|
}
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
// Insert the new span.
|
|
|
|
spans = append(spans, Span{})
|
|
|
|
if iSpan+1 < len(spans) {
|
|
|
|
copy(spans[iSpan+1:], spans[iSpan:])
|
|
|
|
}
|
|
|
|
spans[iSpan] = newSpan
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
iBucket++
|
|
|
|
posInSpan++
|
|
|
|
if posInSpan >= spans[iSpan].Length {
|
|
|
|
posInSpan = 0
|
|
|
|
iSpan++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if maxEmptyBuckets == 0 || len(buckets) == 0 {
|
|
|
|
return buckets, spans
|
|
|
|
}
|
|
|
|
|
|
|
|
// Finally, check if any offsets between spans are small enough to merge
|
|
|
|
// the spans.
|
|
|
|
iBucket = int(spans[0].Length)
|
|
|
|
if deltaBuckets {
|
|
|
|
currentBucketAbsolute = 0
|
|
|
|
for _, bucket := range buckets[:iBucket] {
|
|
|
|
currentBucketAbsolute += bucket
|
|
|
|
}
|
|
|
|
}
|
|
|
|
iSpan = 1
|
|
|
|
for iSpan < len(spans) {
|
|
|
|
if int(spans[iSpan].Offset) > maxEmptyBuckets {
|
|
|
|
l := int(spans[iSpan].Length)
|
|
|
|
if deltaBuckets {
|
|
|
|
for _, bucket := range buckets[iBucket : iBucket+l] {
|
|
|
|
currentBucketAbsolute += bucket
|
|
|
|
}
|
|
|
|
}
|
|
|
|
iBucket += l
|
|
|
|
iSpan++
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
// Merge span with previous one and insert empty buckets.
|
|
|
|
offset := int(spans[iSpan].Offset)
|
|
|
|
spans[iSpan-1].Length += uint32(offset) + spans[iSpan].Length
|
|
|
|
spans = append(spans[:iSpan], spans[iSpan+1:]...)
|
2022-10-03 11:15:27 +00:00
|
|
|
newBuckets := make([]IBC, len(buckets)+offset)
|
histograms: Add Compact method to the normal integer Histogram
And use the new method to call to compact Histograms during
parsing. This happens for both `Histogram` and `FloatHistogram`. In
this way, if targets decide to optimize the exposition size by merging
spans with empty buckets in between, we still get a normalized
results. It will also normalize away any valid but weird
representations like empty spans, spans with offset zero, and empty
buckets at the start or end of a span.
The implementation seemed easy at first as it just turns the
`compactBuckets` helper into a generic function (which now got its own
file). However, the integer Histograms have delta buckets instead of
absolute buckets, which had to be treated specially in the generic
`compactBuckets` function. To make sure it works, I have added plenty
of explicit tests for `Histogram` in addition to the `FloatHistogram`
tests.
I have also updated the doc comment for the `Compact` method.
Based on the insights now expressed in the doc comment, compacting
with a maxEmptyBuckets > 0 is rarely useful. Therefore, this commit
also sets the value to 0 in the two cases we were using 3 so far. We
might still want to reconsider, so I don't want to remove the
maxEmptyBuckets parameter right now.
Signed-off-by: beorn7 <beorn@grafana.com>
2022-09-27 11:04:16 +00:00
|
|
|
copy(newBuckets, buckets[:iBucket])
|
|
|
|
copy(newBuckets[iBucket+offset:], buckets[iBucket:])
|
|
|
|
if deltaBuckets {
|
|
|
|
newBuckets[iBucket] = -currentBucketAbsolute
|
|
|
|
newBuckets[iBucket+offset] += currentBucketAbsolute
|
|
|
|
}
|
|
|
|
iBucket += offset
|
|
|
|
buckets = newBuckets
|
|
|
|
currentBucketAbsolute = buckets[iBucket]
|
|
|
|
// Note that with many merges, it would be more efficient to
|
|
|
|
// first record all the chunks of empty buckets to insert and
|
|
|
|
// then do it in one go through all the buckets.
|
|
|
|
}
|
|
|
|
|
|
|
|
return buckets, spans
|
|
|
|
}
|