diff --git a/promql/promqltest/test.go b/promql/promqltest/test.go index 642b47444a..e078bcb60b 100644 --- a/promql/promqltest/test.go +++ b/promql/promqltest/test.go @@ -39,6 +39,7 @@ import ( "github.com/prometheus/prometheus/promql/parser/posrange" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/util/almost" + "github.com/prometheus/prometheus/util/convertnhcb" "github.com/prometheus/prometheus/util/teststorage" "github.com/prometheus/prometheus/util/testutil" ) @@ -479,43 +480,22 @@ func (cmd *loadCmd) append(a storage.Appender) error { return nil } -func getHistogramMetricBase(m labels.Labels, suffix string) (labels.Labels, uint64) { - mName := m.Get(labels.MetricName) - baseM := labels.NewBuilder(m). - Set(labels.MetricName, strings.TrimSuffix(mName, suffix)). - Del(labels.BucketLabel). - Labels() - hash := baseM.Hash() - return baseM, hash -} - type tempHistogramWrapper struct { metric labels.Labels upperBounds []float64 - histogramByTs map[int64]tempHistogram + histogramByTs map[int64]convertnhcb.TempHistogram } func newTempHistogramWrapper() tempHistogramWrapper { return tempHistogramWrapper{ upperBounds: []float64{}, - histogramByTs: map[int64]tempHistogram{}, + histogramByTs: map[int64]convertnhcb.TempHistogram{}, } } -type tempHistogram struct { - bucketCounts map[float64]float64 - count float64 - sum float64 -} - -func newTempHistogram() tempHistogram { - return tempHistogram{ - bucketCounts: map[float64]float64{}, - } -} - -func processClassicHistogramSeries(m labels.Labels, suffix string, histogramMap map[uint64]tempHistogramWrapper, smpls []promql.Sample, updateHistogramWrapper func(*tempHistogramWrapper), updateHistogram func(*tempHistogram, float64)) { - m2, m2hash := getHistogramMetricBase(m, suffix) +func processClassicHistogramSeries(m labels.Labels, suffix string, histogramMap map[uint64]tempHistogramWrapper, smpls []promql.Sample, updateHistogramWrapper func(*tempHistogramWrapper), updateHistogram func(*convertnhcb.TempHistogram, float64)) { + m2 := convertnhcb.GetHistogramMetricBase(m, suffix) + m2hash := m2.Hash() histogramWrapper, exists := histogramMap[m2hash] if !exists { histogramWrapper = newTempHistogramWrapper() @@ -530,7 +510,7 @@ func processClassicHistogramSeries(m labels.Labels, suffix string, histogramMap } histogram, exists := histogramWrapper.histogramByTs[s.T] if !exists { - histogram = newTempHistogram() + histogram = convertnhcb.NewTempHistogram() } updateHistogram(&histogram, s.F) histogramWrapper.histogramByTs[s.T] = histogram @@ -538,34 +518,6 @@ func processClassicHistogramSeries(m labels.Labels, suffix string, histogramMap histogramMap[m2hash] = histogramWrapper } -func processUpperBoundsAndCreateBaseHistogram(upperBounds0 []float64) ([]float64, *histogram.FloatHistogram) { - sort.Float64s(upperBounds0) - upperBounds := make([]float64, 0, len(upperBounds0)) - prevLE := math.Inf(-1) - for _, le := range upperBounds0 { - if le != prevLE { // deduplicate - upperBounds = append(upperBounds, le) - prevLE = le - } - } - var customBounds []float64 - if upperBounds[len(upperBounds)-1] == math.Inf(1) { - customBounds = upperBounds[:len(upperBounds)-1] - } else { - customBounds = upperBounds - } - return upperBounds, &histogram.FloatHistogram{ - Count: 0, - Sum: 0, - Schema: histogram.CustomBucketsSchema, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: uint32(len(upperBounds))}, - }, - PositiveBuckets: make([]float64, len(upperBounds)), - CustomValues: customBounds, - } -} - // If classic histograms are defined, convert them into native histograms with custom // bounds and append the defined time series to the storage. func (cmd *loadCmd) appendCustomHistogram(a storage.Appender) error { @@ -584,16 +536,16 @@ func (cmd *loadCmd) appendCustomHistogram(a storage.Appender) error { } processClassicHistogramSeries(m, "_bucket", histogramMap, smpls, func(histogramWrapper *tempHistogramWrapper) { histogramWrapper.upperBounds = append(histogramWrapper.upperBounds, le) - }, func(histogram *tempHistogram, f float64) { - histogram.bucketCounts[le] = f + }, func(histogram *convertnhcb.TempHistogram, f float64) { + histogram.BucketCounts[le] = f }) case strings.HasSuffix(mName, "_count"): - processClassicHistogramSeries(m, "_count", histogramMap, smpls, nil, func(histogram *tempHistogram, f float64) { - histogram.count = f + processClassicHistogramSeries(m, "_count", histogramMap, smpls, nil, func(histogram *convertnhcb.TempHistogram, f float64) { + histogram.Count = f }) case strings.HasSuffix(mName, "_sum"): - processClassicHistogramSeries(m, "_sum", histogramMap, smpls, nil, func(histogram *tempHistogram, f float64) { - histogram.sum = f + processClassicHistogramSeries(m, "_sum", histogramMap, smpls, nil, func(histogram *convertnhcb.TempHistogram, f float64) { + histogram.Sum = f }) } } @@ -601,30 +553,21 @@ func (cmd *loadCmd) appendCustomHistogram(a storage.Appender) error { // Convert the collated classic histogram data into native histograms // with custom bounds and append them to the storage. for _, histogramWrapper := range histogramMap { - upperBounds, fhBase := processUpperBoundsAndCreateBaseHistogram(histogramWrapper.upperBounds) + upperBounds, hBase := convertnhcb.ProcessUpperBoundsAndCreateBaseHistogram(histogramWrapper.upperBounds, true) + fhBase := hBase.ToFloat(nil) samples := make([]promql.Sample, 0, len(histogramWrapper.histogramByTs)) for t, histogram := range histogramWrapper.histogramByTs { - fh := fhBase.Copy() - var prevCount, total float64 - for i, le := range upperBounds { - currCount, exists := histogram.bucketCounts[le] - if !exists { - currCount = 0 + h, fh := convertnhcb.NewHistogram(histogram, upperBounds, hBase, fhBase) + if fh == nil { + if err := h.Validate(); err != nil { + return err } - count := currCount - prevCount - fh.PositiveBuckets[i] = count - total += count - prevCount = currCount + fh = h.ToFloat(nil) } - fh.Sum = histogram.sum - if histogram.count != 0 { - total = histogram.count - } - fh.Count = total - s := promql.Sample{T: t, H: fh.Compact(0)} - if err := s.H.Validate(); err != nil { + if err := fh.Validate(); err != nil { return err } + s := promql.Sample{T: t, H: fh} samples = append(samples, s) } sort.Slice(samples, func(i, j int) bool { return samples[i].T < samples[j].T }) diff --git a/util/convertnhcb/convertnhcb.go b/util/convertnhcb/convertnhcb.go new file mode 100644 index 0000000000..acbb4bec85 --- /dev/null +++ b/util/convertnhcb/convertnhcb.go @@ -0,0 +1,184 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package convertnhcb + +import ( + "fmt" + "math" + "sort" + "strings" + + "github.com/grafana/regexp" + + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/labels" +) + +var histogramNameSuffixReplacements = []struct { + pattern *regexp.Regexp + repl string +}{ + { + pattern: regexp.MustCompile(`_bucket$`), + repl: "", + }, + { + pattern: regexp.MustCompile(`_sum$`), + repl: "", + }, + { + pattern: regexp.MustCompile(`_count$`), + repl: "", + }, +} + +// TempHistogram is used to collect information about classic histogram +// samples incrementally before creating a histogram.Histogram or +// histogram.FloatHistogram based on the values collected. +type TempHistogram struct { + BucketCounts map[float64]float64 + Count float64 + Sum float64 + HasFloat bool +} + +// NewTempHistogram creates a new TempHistogram to +// collect information about classic histogram samples. +func NewTempHistogram() TempHistogram { + return TempHistogram{ + BucketCounts: map[float64]float64{}, + } +} + +func (h TempHistogram) getIntBucketCounts() (map[float64]int64, error) { + bucketCounts := map[float64]int64{} + for le, count := range h.BucketCounts { + intCount := int64(math.Round(count)) + if float64(intCount) != count { + return nil, fmt.Errorf("bucket count %f for le %g is not an integer", count, le) + } + bucketCounts[le] = intCount + } + return bucketCounts, nil +} + +// ProcessUpperBoundsAndCreateBaseHistogram prepares an integer native +// histogram with custom buckets based on the provided upper bounds. +// Everything is set except the bucket counts. +// The sorted upper bounds are also returned. +func ProcessUpperBoundsAndCreateBaseHistogram(upperBounds0 []float64, needsDedup bool) ([]float64, *histogram.Histogram) { + sort.Float64s(upperBounds0) + var upperBounds []float64 + if needsDedup { + upperBounds = make([]float64, 0, len(upperBounds0)) + prevLE := math.Inf(-1) + for _, le := range upperBounds0 { + if le != prevLE { + upperBounds = append(upperBounds, le) + prevLE = le + } + } + } else { + upperBounds = upperBounds0 + } + var customBounds []float64 + if upperBounds[len(upperBounds)-1] == math.Inf(1) { + customBounds = upperBounds[:len(upperBounds)-1] + } else { + customBounds = upperBounds + } + return upperBounds, &histogram.Histogram{ + Count: 0, + Sum: 0, + Schema: histogram.CustomBucketsSchema, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: uint32(len(upperBounds))}, + }, + PositiveBuckets: make([]int64, len(upperBounds)), + CustomValues: customBounds, + } +} + +// NewHistogram fills the bucket counts in the provided histogram.Histogram +// or histogram.FloatHistogram based on the provided temporary histogram and +// upper bounds. +func NewHistogram(histogram TempHistogram, upperBounds []float64, hBase *histogram.Histogram, fhBase *histogram.FloatHistogram) (*histogram.Histogram, *histogram.FloatHistogram) { + intBucketCounts, err := histogram.getIntBucketCounts() + if err != nil { + return nil, newFloatHistogram(histogram, upperBounds, histogram.BucketCounts, fhBase) + } + return newIntegerHistogram(histogram, upperBounds, intBucketCounts, hBase), nil +} + +func newIntegerHistogram(histogram TempHistogram, upperBounds []float64, bucketCounts map[float64]int64, hBase *histogram.Histogram) *histogram.Histogram { + h := hBase.Copy() + absBucketCounts := make([]int64, len(h.PositiveBuckets)) + var prevCount, total int64 + for i, le := range upperBounds { + currCount, exists := bucketCounts[le] + if !exists { + currCount = 0 + } + count := currCount - prevCount + absBucketCounts[i] = count + total += count + prevCount = currCount + } + h.PositiveBuckets[0] = absBucketCounts[0] + for i := 1; i < len(h.PositiveBuckets); i++ { + h.PositiveBuckets[i] = absBucketCounts[i] - absBucketCounts[i-1] + } + h.Sum = histogram.Sum + if histogram.Count != 0 { + total = int64(histogram.Count) + } + h.Count = uint64(total) + return h.Compact(0) +} + +func newFloatHistogram(histogram TempHistogram, upperBounds []float64, bucketCounts map[float64]float64, fhBase *histogram.FloatHistogram) *histogram.FloatHistogram { + fh := fhBase.Copy() + var prevCount, total float64 + for i, le := range upperBounds { + currCount, exists := bucketCounts[le] + if !exists { + currCount = 0 + } + count := currCount - prevCount + fh.PositiveBuckets[i] = count + total += count + prevCount = currCount + } + fh.Sum = histogram.Sum + if histogram.Count != 0 { + total = histogram.Count + } + fh.Count = total + return fh.Compact(0) +} + +func GetHistogramMetricBase(m labels.Labels, suffix string) labels.Labels { + mName := m.Get(labels.MetricName) + return labels.NewBuilder(m). + Set(labels.MetricName, strings.TrimSuffix(mName, suffix)). + Del(labels.BucketLabel). + Labels() +} + +func GetHistogramMetricBaseName(s string) string { + for _, rep := range histogramNameSuffixReplacements { + s = rep.pattern.ReplaceAllString(s, rep.repl) + } + return s +}