Merge pull request #12954 from linasm/linasm/optimize-floatBucketIterator2

Histogram performance: optimize floatBucketIterator
This commit is contained in:
Björn Rabenstein 2023-11-02 23:59:32 +01:00 committed by GitHub
commit fae4561369
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 110 additions and 37 deletions

View File

@ -466,25 +466,25 @@ func (h *FloatHistogram) DetectReset(previous *FloatHistogram) bool {
}
currIt := h.floatBucketIterator(true, h.ZeroThreshold, h.Schema)
prevIt := previous.floatBucketIterator(true, h.ZeroThreshold, h.Schema)
if detectReset(currIt, prevIt) {
if detectReset(&currIt, &prevIt) {
return true
}
currIt = h.floatBucketIterator(false, h.ZeroThreshold, h.Schema)
prevIt = previous.floatBucketIterator(false, h.ZeroThreshold, h.Schema)
return detectReset(currIt, prevIt)
return detectReset(&currIt, &prevIt)
}
func detectReset(currIt, prevIt BucketIterator[float64]) bool {
func detectReset(currIt, prevIt *floatBucketIterator) bool {
if !prevIt.Next() {
return false // If no buckets in previous histogram, nothing can be reset.
}
prevBucket := prevIt.At()
prevBucket := prevIt.strippedAt()
if !currIt.Next() {
// No bucket in current, but at least one in previous
// histogram. Check if any of those are non-zero, in which case
// this is a reset.
for {
if prevBucket.Count != 0 {
if prevBucket.count != 0 {
return true
}
if !prevIt.Next() {
@ -492,10 +492,10 @@ func detectReset(currIt, prevIt BucketIterator[float64]) bool {
}
}
}
currBucket := currIt.At()
currBucket := currIt.strippedAt()
for {
// Forward currIt until we find the bucket corresponding to prevBucket.
for currBucket.Index < prevBucket.Index {
for currBucket.index < prevBucket.index {
if !currIt.Next() {
// Reached end of currIt early, therefore
// previous histogram has a bucket that the
@ -503,7 +503,7 @@ func detectReset(currIt, prevIt BucketIterator[float64]) bool {
// remaining buckets in the previous histogram
// are unpopulated, this is a reset.
for {
if prevBucket.Count != 0 {
if prevBucket.count != 0 {
return true
}
if !prevIt.Next() {
@ -511,18 +511,18 @@ func detectReset(currIt, prevIt BucketIterator[float64]) bool {
}
}
}
currBucket = currIt.At()
currBucket = currIt.strippedAt()
}
if currBucket.Index > prevBucket.Index {
if currBucket.index > prevBucket.index {
// Previous histogram has a bucket the current one does
// not have. If it's populated, it's a reset.
if prevBucket.Count != 0 {
if prevBucket.count != 0 {
return true
}
} else {
// We have reached corresponding buckets in both iterators.
// We can finally compare the counts.
if currBucket.Count < prevBucket.Count {
if currBucket.count < prevBucket.count {
return true
}
}
@ -530,35 +530,39 @@ func detectReset(currIt, prevIt BucketIterator[float64]) bool {
// Reached end of prevIt without finding offending buckets.
return false
}
prevBucket = prevIt.At()
prevBucket = prevIt.strippedAt()
}
}
// PositiveBucketIterator returns a BucketIterator to iterate over all positive
// buckets in ascending order (starting next to the zero bucket and going up).
func (h *FloatHistogram) PositiveBucketIterator() BucketIterator[float64] {
return h.floatBucketIterator(true, 0, h.Schema)
it := h.floatBucketIterator(true, 0, h.Schema)
return &it
}
// NegativeBucketIterator returns a BucketIterator to iterate over all negative
// buckets in descending order (starting next to the zero bucket and going
// down).
func (h *FloatHistogram) NegativeBucketIterator() BucketIterator[float64] {
return h.floatBucketIterator(false, 0, h.Schema)
it := h.floatBucketIterator(false, 0, h.Schema)
return &it
}
// PositiveReverseBucketIterator returns a BucketIterator to iterate over all
// positive buckets in descending order (starting at the highest bucket and
// going down towards the zero bucket).
func (h *FloatHistogram) PositiveReverseBucketIterator() BucketIterator[float64] {
return newReverseFloatBucketIterator(h.PositiveSpans, h.PositiveBuckets, h.Schema, true)
it := newReverseFloatBucketIterator(h.PositiveSpans, h.PositiveBuckets, h.Schema, true)
return &it
}
// NegativeReverseBucketIterator returns a BucketIterator to iterate over all
// negative buckets in ascending order (starting at the lowest bucket and going
// up towards the zero bucket).
func (h *FloatHistogram) NegativeReverseBucketIterator() BucketIterator[float64] {
return newReverseFloatBucketIterator(h.NegativeSpans, h.NegativeBuckets, h.Schema, false)
it := newReverseFloatBucketIterator(h.NegativeSpans, h.NegativeBuckets, h.Schema, false)
return &it
}
// AllBucketIterator returns a BucketIterator to iterate over all negative,
@ -569,8 +573,8 @@ func (h *FloatHistogram) NegativeReverseBucketIterator() BucketIterator[float64]
func (h *FloatHistogram) AllBucketIterator() BucketIterator[float64] {
return &allFloatBucketIterator{
h: h,
leftIter: h.NegativeReverseBucketIterator(),
rightIter: h.PositiveBucketIterator(),
leftIter: newReverseFloatBucketIterator(h.NegativeSpans, h.NegativeBuckets, h.Schema, false),
rightIter: h.floatBucketIterator(true, 0, h.Schema),
state: -1,
}
}
@ -583,8 +587,8 @@ func (h *FloatHistogram) AllBucketIterator() BucketIterator[float64] {
func (h *FloatHistogram) AllReverseBucketIterator() BucketIterator[float64] {
return &allFloatBucketIterator{
h: h,
leftIter: h.PositiveReverseBucketIterator(),
rightIter: h.NegativeBucketIterator(),
leftIter: newReverseFloatBucketIterator(h.PositiveSpans, h.PositiveBuckets, h.Schema, true),
rightIter: h.floatBucketIterator(false, 0, h.Schema),
state: -1,
}
}
@ -715,11 +719,11 @@ func (h *FloatHistogram) reconcileZeroBuckets(other *FloatHistogram) float64 {
// targetSchema prior to iterating (without mutating FloatHistogram).
func (h *FloatHistogram) floatBucketIterator(
positive bool, absoluteStartValue float64, targetSchema int32,
) *floatBucketIterator {
) floatBucketIterator {
if targetSchema > h.Schema {
panic(fmt.Errorf("cannot merge from schema %d to %d", h.Schema, targetSchema))
}
i := &floatBucketIterator{
i := floatBucketIterator{
baseBucketIterator: baseBucketIterator[float64, float64]{
schema: h.Schema,
positive: positive,
@ -737,11 +741,11 @@ func (h *FloatHistogram) floatBucketIterator(
return i
}
// reverseFloatbucketiterator is a low-level constructor for reverse bucket iterators.
// reverseFloatBucketIterator is a low-level constructor for reverse bucket iterators.
func newReverseFloatBucketIterator(
spans []Span, buckets []float64, schema int32, positive bool,
) *reverseFloatBucketIterator {
r := &reverseFloatBucketIterator{
) reverseFloatBucketIterator {
r := reverseFloatBucketIterator{
baseBucketIterator: baseBucketIterator[float64, float64]{
schema: schema,
spans: spans,
@ -769,6 +773,8 @@ type floatBucketIterator struct {
targetSchema int32 // targetSchema is the schema to merge to and must be ≤ schema.
origIdx int32 // The bucket index within the original schema.
absoluteStartValue float64 // Never return buckets with an upper bound ≤ this value.
boundReachedStartValue bool // Has getBound reached absoluteStartValue already?
}
func (i *floatBucketIterator) At() Bucket[float64] {
@ -832,9 +838,10 @@ mergeLoop: // Merge together all buckets from the original schema that fall into
}
// Skip buckets before absoluteStartValue.
// TODO(beorn7): Maybe do something more efficient than this recursive call.
if getBound(i.currIdx, i.targetSchema) <= i.absoluteStartValue {
if !i.boundReachedStartValue && getBound(i.currIdx, i.targetSchema) <= i.absoluteStartValue {
return i.Next()
}
i.boundReachedStartValue = true
return true
}
@ -876,7 +883,8 @@ func (i *reverseFloatBucketIterator) Next() bool {
type allFloatBucketIterator struct {
h *FloatHistogram
leftIter, rightIter BucketIterator[float64]
leftIter reverseFloatBucketIterator
rightIter floatBucketIterator
// -1 means we are iterating negative buckets.
// 0 means it is time for the zero bucket.
// 1 means we are iterating positive buckets.

View File

@ -16,6 +16,7 @@ package histogram
import (
"fmt"
"math"
"math/rand"
"testing"
"github.com/stretchr/testify/require"
@ -2393,3 +2394,51 @@ func TestFloatHistogramSize(t *testing.T) {
})
}
}
func BenchmarkFloatHistogramAllBucketIterator(b *testing.B) {
rng := rand.New(rand.NewSource(0))
fh := createRandomFloatHistogram(rng, 50)
b.ReportAllocs() // the current implementation reports 1 alloc
b.ResetTimer()
for n := 0; n < b.N; n++ {
for it := fh.AllBucketIterator(); it.Next(); {
}
}
}
func BenchmarkFloatHistogramDetectReset(b *testing.B) {
rng := rand.New(rand.NewSource(0))
fh := createRandomFloatHistogram(rng, 50)
b.ReportAllocs() // the current implementation reports 0 allocs
b.ResetTimer()
for n := 0; n < b.N; n++ {
// Detect against the itself (no resets is the worst case input).
fh.DetectReset(fh)
}
}
func createRandomFloatHistogram(rng *rand.Rand, spanNum int32) *FloatHistogram {
f := &FloatHistogram{}
f.PositiveSpans, f.PositiveBuckets = createRandomSpans(rng, spanNum)
f.NegativeSpans, f.NegativeBuckets = createRandomSpans(rng, spanNum)
return f
}
func createRandomSpans(rng *rand.Rand, spanNum int32) ([]Span, []float64) {
Spans := make([]Span, spanNum)
Buckets := make([]float64, 0)
for i := 0; i < int(spanNum); i++ {
Spans[i].Offset = rng.Int31n(spanNum) + 1
Spans[i].Length = uint32(rng.Int31n(spanNum) + 1)
for j := 0; j < int(Spans[i].Length); j++ {
Buckets = append(Buckets, float64(rng.Int31n(spanNum)+1))
}
}
return Spans, Buckets
}

View File

@ -53,6 +53,13 @@ type Bucket[BC BucketCount] struct {
Index int32
}
// strippedBucket is Bucket without bound values (which are expensive to calculate
// and not used in certain use cases).
type strippedBucket[BC BucketCount] struct {
count BC
index int32
}
// String returns a string representation of a Bucket, using the usual
// mathematical notation of '['/']' for inclusive bounds and '('/')' for
// non-inclusive bounds.
@ -101,13 +108,12 @@ type baseBucketIterator[BC BucketCount, IBC InternalBucketCount] struct {
currIdx int32 // The actual bucket index.
}
func (b baseBucketIterator[BC, IBC]) At() Bucket[BC] {
func (b *baseBucketIterator[BC, IBC]) At() Bucket[BC] {
return b.at(b.schema)
}
// at is an internal version of the exported At to enable using a different
// schema.
func (b baseBucketIterator[BC, IBC]) at(schema int32) Bucket[BC] {
// at is an internal version of the exported At to enable using a different schema.
func (b *baseBucketIterator[BC, IBC]) at(schema int32) Bucket[BC] {
bucket := Bucket[BC]{
Count: BC(b.currCount),
Index: b.currIdx,
@ -124,6 +130,14 @@ func (b baseBucketIterator[BC, IBC]) at(schema int32) Bucket[BC] {
return bucket
}
// strippedAt returns current strippedBucket (which lacks bucket bounds but is cheaper to compute).
func (b *baseBucketIterator[BC, IBC]) strippedAt() strippedBucket[BC] {
return strippedBucket[BC]{
count: BC(b.currCount),
index: b.currIdx,
}
}
// compactBuckets is a generic function used by both Histogram.Compact and
// FloatHistogram.Compact. Set deltaBuckets to true if the provided buckets are
// deltas. Set it to false if the buckets contain absolute counts.

View File

@ -150,13 +150,15 @@ func (h *Histogram) ZeroBucket() Bucket[uint64] {
// PositiveBucketIterator returns a BucketIterator to iterate over all positive
// buckets in ascending order (starting next to the zero bucket and going up).
func (h *Histogram) PositiveBucketIterator() BucketIterator[uint64] {
return newRegularBucketIterator(h.PositiveSpans, h.PositiveBuckets, h.Schema, true)
it := newRegularBucketIterator(h.PositiveSpans, h.PositiveBuckets, h.Schema, true)
return &it
}
// NegativeBucketIterator returns a BucketIterator to iterate over all negative
// buckets in descending order (starting next to the zero bucket and going down).
func (h *Histogram) NegativeBucketIterator() BucketIterator[uint64] {
return newRegularBucketIterator(h.NegativeSpans, h.NegativeBuckets, h.Schema, false)
it := newRegularBucketIterator(h.NegativeSpans, h.NegativeBuckets, h.Schema, false)
return &it
}
// CumulativeBucketIterator returns a BucketIterator to iterate over a
@ -330,14 +332,14 @@ type regularBucketIterator struct {
baseBucketIterator[uint64, int64]
}
func newRegularBucketIterator(spans []Span, buckets []int64, schema int32, positive bool) *regularBucketIterator {
func newRegularBucketIterator(spans []Span, buckets []int64, schema int32, positive bool) regularBucketIterator {
i := baseBucketIterator[uint64, int64]{
schema: schema,
spans: spans,
buckets: buckets,
positive: positive,
}
return &regularBucketIterator{i}
return regularBucketIterator{i}
}
func (r *regularBucketIterator) Next() bool {