From 1cd6c1cde5a2e44dc5869016b294c7dccba269dc Mon Sep 17 00:00:00 2001 From: Linas Medziunas Date: Fri, 13 Oct 2023 10:58:26 +0300 Subject: [PATCH 1/3] ValidateHistogram: strict Count check in absence of NaNs Signed-off-by: Linas Medziunas --- promql/engine_test.go | 2 +- storage/interface.go | 1 + tsdb/db_test.go | 5 +++-- tsdb/head_append.go | 20 +++++++++++++++----- tsdb/head_test.go | 36 +++++++++++++++++++++--------------- 5 files changed, 41 insertions(+), 23 deletions(-) diff --git a/promql/engine_test.go b/promql/engine_test.go index baca992b8..7532a3294 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -3399,7 +3399,7 @@ func TestNativeHistogram_HistogramStdDevVar(t *testing.T) { { name: "-50, -8, 0, 3, 8, 9, 100, +Inf", h: &histogram.Histogram{ - Count: 8, + Count: 7, ZeroCount: 1, Sum: math.Inf(1), Schema: 3, diff --git a/storage/interface.go b/storage/interface.go index 211bcbc41..4da152aa4 100644 --- a/storage/interface.go +++ b/storage/interface.go @@ -44,6 +44,7 @@ var ( ErrExemplarsDisabled = fmt.Errorf("exemplar storage is disabled or max exemplars is less than or equal to 0") ErrNativeHistogramsDisabled = fmt.Errorf("native histograms are disabled") ErrHistogramCountNotBigEnough = errors.New("histogram's observation count should be at least the number of observations found in the buckets") + ErrHistogramCountMismatch = errors.New("histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)") ErrHistogramNegativeBucketCount = errors.New("histogram has a bucket whose observation count is negative") ErrHistogramSpanNegativeOffset = errors.New("histogram has a span whose offset is negative") ErrHistogramSpansBucketsMismatch = errors.New("histogram spans specify different number of buckets than provided") diff --git a/tsdb/db_test.go b/tsdb/db_test.go index 70d1844d4..f021faba9 100644 --- a/tsdb/db_test.go +++ b/tsdb/db_test.go @@ -508,7 +508,7 @@ func TestAmendHistogramDatapointCausesError(t *testing.T) { h := histogram.Histogram{ Schema: 3, - Count: 61, + Count: 52, Sum: 2.7, ZeroThreshold: 0.1, ZeroCount: 42, @@ -6314,6 +6314,7 @@ func testHistogramAppendAndQueryHelper(t *testing.T, floatHistogram bool) { t.Run("buckets disappearing", func(t *testing.T) { h.PositiveSpans[1].Length-- h.PositiveBuckets = h.PositiveBuckets[:len(h.PositiveBuckets)-1] + h.Count -= 3 appendHistogram(series1, 110, h, &exp1, histogram.CounterReset) testQuery("foo", "bar1", map[string][]chunks.Sample{series1.String(): exp1}) }) @@ -6533,7 +6534,7 @@ func TestNativeHistogramFlag(t *testing.T) { require.NoError(t, db.Close()) }) h := &histogram.Histogram{ - Count: 10, + Count: 9, ZeroCount: 4, ZeroThreshold: 0.001, Sum: 35.5, diff --git a/tsdb/head_append.go b/tsdb/head_append.go index d1f4d3035..330caad78 100644 --- a/tsdb/head_append.go +++ b/tsdb/head_append.go @@ -659,11 +659,21 @@ func ValidateHistogram(h *histogram.Histogram) error { return errors.Wrap(err, "positive side") } - if c := nCount + pCount + h.ZeroCount; c > h.Count { - return errors.Wrap( - storage.ErrHistogramCountNotBigEnough, - fmt.Sprintf("%d observations found in buckets, but the Count field is %d", c, h.Count), - ) + sumOfBuckets := nCount + pCount + h.ZeroCount + if math.IsNaN(h.Sum) { + if sumOfBuckets > h.Count { + return errors.Wrap( + storage.ErrHistogramCountNotBigEnough, + fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count), + ) + } + } else { + if sumOfBuckets != h.Count { + return errors.Wrap( + storage.ErrHistogramCountMismatch, + fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count), + ) + } } return nil diff --git a/tsdb/head_test.go b/tsdb/head_test.go index edecf8dfe..2feb745f1 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -3419,7 +3419,6 @@ func TestHistogramInWALAndMmapChunk(t *testing.T) { hists = tsdbutil.GenerateTestHistograms(numHistograms) } for _, h := range hists { - h.Count *= 2 h.NegativeSpans = h.PositiveSpans h.NegativeBuckets = h.PositiveBuckets _, err := app.AppendHistogram(0, s1, ts, h, nil) @@ -3442,7 +3441,6 @@ func TestHistogramInWALAndMmapChunk(t *testing.T) { hists = tsdbutil.GenerateTestFloatHistograms(numHistograms) } for _, h := range hists { - h.Count *= 2 h.NegativeSpans = h.PositiveSpans h.NegativeBuckets = h.PositiveBuckets _, err := app.AppendHistogram(0, s1, ts, nil, h) @@ -3484,7 +3482,6 @@ func TestHistogramInWALAndMmapChunk(t *testing.T) { } for _, h := range hists { ts++ - h.Count *= 2 h.NegativeSpans = h.PositiveSpans h.NegativeBuckets = h.PositiveBuckets _, err := app.AppendHistogram(0, s2, ts, h, nil) @@ -3521,7 +3518,6 @@ func TestHistogramInWALAndMmapChunk(t *testing.T) { } for _, h := range hists { ts++ - h.Count *= 2 h.NegativeSpans = h.PositiveSpans h.NegativeBuckets = h.PositiveBuckets _, err := app.AppendHistogram(0, s2, ts, nil, h) @@ -4907,7 +4903,7 @@ func TestHistogramValidation(t *testing.T) { "valid histogram": { h: tsdbutil.GenerateTestHistograms(1)[0], }, - "valid histogram that has its Count (4) higher than the actual total of buckets (2 + 1)": { + "valid histogram with NaN observations that has its Count (4) higher than the actual total of buckets (2 + 1)": { // This case is possible if NaN values (which do not fall into any bucket) are observed. h: &histogram.Histogram{ ZeroCount: 2, @@ -4917,6 +4913,17 @@ func TestHistogramValidation(t *testing.T) { PositiveBuckets: []int64{1}, }, }, + "rejects histogram without NaN observations that has its Count (4) higher than the actual total of buckets (2 + 1)": { + h: &histogram.Histogram{ + ZeroCount: 2, + Count: 4, + Sum: 333, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}}, + PositiveBuckets: []int64{1}, + }, + errMsg: `3 observations found in buckets, but the Count field is 4: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`, + skipFloat: true, + }, "rejects histogram that has too few negative buckets": { h: &histogram.Histogram{ NegativeSpans: []histogram.Span{{Offset: 0, Length: 1}}, @@ -4981,7 +4988,7 @@ func TestHistogramValidation(t *testing.T) { NegativeBuckets: []int64{1}, PositiveBuckets: []int64{1}, }, - errMsg: `2 observations found in buckets, but the Count field is 0: histogram's observation count should be at least the number of observations found in the buckets`, + errMsg: `2 observations found in buckets, but the Count field is 0: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`, skipFloat: true, }, "rejects a histogram that doesn't count the zero bucket in its count": { @@ -4993,7 +5000,7 @@ func TestHistogramValidation(t *testing.T) { NegativeBuckets: []int64{1}, PositiveBuckets: []int64{1}, }, - errMsg: `3 observations found in buckets, but the Count field is 2: histogram's observation count should be at least the number of observations found in the buckets`, + errMsg: `3 observations found in buckets, but the Count field is 2: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`, skipFloat: true, }, } @@ -5029,8 +5036,8 @@ func generateBigTestHistograms(numHistograms, numBuckets int) []*histogram.Histo numSpans := numBuckets / 10 bucketsPerSide := numBuckets / 2 spanLength := uint32(bucketsPerSide / numSpans) - // Given all bucket deltas are 1, sum numHistograms + 1. - observationCount := numBuckets / 2 * (1 + numBuckets) + // Given all bucket deltas are 1, sum bucketsPerSide + 1. + observationCount := bucketsPerSide * (1 + bucketsPerSide) var histograms []*histogram.Histogram for i := 0; i < numHistograms; i++ { @@ -5491,14 +5498,13 @@ func TestCuttingNewHeadChunks(t *testing.T) { numSamples int numBytes int }{ - {30, 696}, - {30, 700}, - {30, 708}, - {30, 693}, + {40, 896}, + {40, 899}, + {40, 896}, + {30, 690}, {30, 691}, - {30, 692}, - {30, 695}, {30, 694}, + {30, 693}, }, }, "really large histograms": { From 1f8aea11d6f1a8b12a89011c1bb17c400454f0b4 Mon Sep 17 00:00:00 2001 From: Linas Medziunas Date: Fri, 13 Oct 2023 10:58:48 +0300 Subject: [PATCH 2/3] Move histogram validation code to model/histogram Signed-off-by: Linas Medziunas --- model/histogram/test_utils.go | 52 +++++++++ model/histogram/validate.go | 136 +++++++++++++++++++++++ model/histogram/validate_test.go | 175 +++++++++++++++++++++++++++++ storage/interface.go | 17 +-- tsdb/agent/db.go | 4 +- tsdb/head_append.go | 111 +------------------ tsdb/head_test.go | 183 +------------------------------ 7 files changed, 377 insertions(+), 301 deletions(-) create mode 100644 model/histogram/test_utils.go create mode 100644 model/histogram/validate.go create mode 100644 model/histogram/validate_test.go diff --git a/model/histogram/test_utils.go b/model/histogram/test_utils.go new file mode 100644 index 000000000..9e9a711c2 --- /dev/null +++ b/model/histogram/test_utils.go @@ -0,0 +1,52 @@ +// Copyright 2023 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package histogram + +// GenerateBigTestHistograms generates a slice of histograms with given number of buckets each. +func GenerateBigTestHistograms(numHistograms, numBuckets int) []*Histogram { + numSpans := numBuckets / 10 + bucketsPerSide := numBuckets / 2 + spanLength := uint32(bucketsPerSide / numSpans) + // Given all bucket deltas are 1, sum bucketsPerSide + 1. + observationCount := bucketsPerSide * (1 + bucketsPerSide) + + var histograms []*Histogram + for i := 0; i < numHistograms; i++ { + h := &Histogram{ + Count: uint64(i + observationCount), + ZeroCount: uint64(i), + ZeroThreshold: 1e-128, + Sum: 18.4 * float64(i+1), + Schema: 2, + NegativeSpans: make([]Span, numSpans), + PositiveSpans: make([]Span, numSpans), + NegativeBuckets: make([]int64, bucketsPerSide), + PositiveBuckets: make([]int64, bucketsPerSide), + } + + for j := 0; j < numSpans; j++ { + s := Span{Offset: 1, Length: spanLength} + h.NegativeSpans[j] = s + h.PositiveSpans[j] = s + } + + for j := 0; j < bucketsPerSide; j++ { + h.NegativeBuckets[j] = 1 + h.PositiveBuckets[j] = 1 + } + + histograms = append(histograms, h) + } + return histograms +} diff --git a/model/histogram/validate.go b/model/histogram/validate.go new file mode 100644 index 000000000..41649b798 --- /dev/null +++ b/model/histogram/validate.go @@ -0,0 +1,136 @@ +// Copyright 2023 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package histogram + +import ( + "fmt" + "math" + + "github.com/pkg/errors" +) + +var ( + ErrHistogramCountNotBigEnough = errors.New("histogram's observation count should be at least the number of observations found in the buckets") + ErrHistogramCountMismatch = errors.New("histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)") + ErrHistogramNegativeBucketCount = errors.New("histogram has a bucket whose observation count is negative") + ErrHistogramSpanNegativeOffset = errors.New("histogram has a span whose offset is negative") + ErrHistogramSpansBucketsMismatch = errors.New("histogram spans specify different number of buckets than provided") +) + +func ValidateHistogram(h *Histogram) error { + if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil { + return errors.Wrap(err, "negative side") + } + if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil { + return errors.Wrap(err, "positive side") + } + var nCount, pCount uint64 + err := checkHistogramBuckets(h.NegativeBuckets, &nCount, true) + if err != nil { + return errors.Wrap(err, "negative side") + } + err = checkHistogramBuckets(h.PositiveBuckets, &pCount, true) + if err != nil { + return errors.Wrap(err, "positive side") + } + + sumOfBuckets := nCount + pCount + h.ZeroCount + if math.IsNaN(h.Sum) { + if sumOfBuckets > h.Count { + return errors.Wrap( + ErrHistogramCountNotBigEnough, + fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count), + ) + } + } else { + if sumOfBuckets != h.Count { + return errors.Wrap( + ErrHistogramCountMismatch, + fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count), + ) + } + } + + return nil +} + +func ValidateFloatHistogram(h *FloatHistogram) error { + if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil { + return errors.Wrap(err, "negative side") + } + if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil { + return errors.Wrap(err, "positive side") + } + var nCount, pCount float64 + err := checkHistogramBuckets(h.NegativeBuckets, &nCount, false) + if err != nil { + return errors.Wrap(err, "negative side") + } + err = checkHistogramBuckets(h.PositiveBuckets, &pCount, false) + if err != nil { + return errors.Wrap(err, "positive side") + } + + // We do not check for h.Count being at least as large as the sum of the + // counts in the buckets because floating point precision issues can + // create false positives here. + + return nil +} + +func checkHistogramSpans(spans []Span, numBuckets int) error { + var spanBuckets int + for n, span := range spans { + if n > 0 && span.Offset < 0 { + return errors.Wrap( + ErrHistogramSpanNegativeOffset, + fmt.Sprintf("span number %d with offset %d", n+1, span.Offset), + ) + } + spanBuckets += int(span.Length) + } + if spanBuckets != numBuckets { + return errors.Wrap( + ErrHistogramSpansBucketsMismatch, + fmt.Sprintf("spans need %d buckets, have %d buckets", spanBuckets, numBuckets), + ) + } + return nil +} + +func checkHistogramBuckets[BC BucketCount, IBC InternalBucketCount](buckets []IBC, count *BC, deltas bool) error { + if len(buckets) == 0 { + return nil + } + + var last IBC + for i := 0; i < len(buckets); i++ { + var c IBC + if deltas { + c = last + buckets[i] + } else { + c = buckets[i] + } + if c < 0 { + return errors.Wrap( + ErrHistogramNegativeBucketCount, + fmt.Sprintf("bucket number %d has observation count of %v", i+1, c), + ) + } + last = c + *count += BC(c) + } + + return nil +} diff --git a/model/histogram/validate_test.go b/model/histogram/validate_test.go new file mode 100644 index 000000000..d9d8f0639 --- /dev/null +++ b/model/histogram/validate_test.go @@ -0,0 +1,175 @@ +// Copyright 2023 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package histogram + +import ( + "math" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestHistogramValidation(t *testing.T) { + tests := map[string]struct { + h *Histogram + errMsg string + skipFloat bool + }{ + "valid histogram": { + h: &Histogram{ + Count: 12, + ZeroCount: 2, + ZeroThreshold: 0.001, + Sum: 19.4, + Schema: 1, + PositiveSpans: []Span{ + {Offset: 0, Length: 2}, + {Offset: 1, Length: 2}, + }, + PositiveBuckets: []int64{1, 1, -1, 0}, + NegativeSpans: []Span{ + {Offset: 0, Length: 2}, + {Offset: 1, Length: 2}, + }, + NegativeBuckets: []int64{1, 1, -1, 0}, + }, + }, + "valid histogram with NaN observations that has its Count (4) higher than the actual total of buckets (2 + 1)": { + // This case is possible if NaN values (which do not fall into any bucket) are observed. + h: &Histogram{ + ZeroCount: 2, + Count: 4, + Sum: math.NaN(), + PositiveSpans: []Span{{Offset: 0, Length: 1}}, + PositiveBuckets: []int64{1}, + }, + }, + "rejects histogram without NaN observations that has its Count (4) higher than the actual total of buckets (2 + 1)": { + h: &Histogram{ + ZeroCount: 2, + Count: 4, + Sum: 333, + PositiveSpans: []Span{{Offset: 0, Length: 1}}, + PositiveBuckets: []int64{1}, + }, + errMsg: `3 observations found in buckets, but the Count field is 4: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`, + skipFloat: true, + }, + "rejects histogram that has too few negative buckets": { + h: &Histogram{ + NegativeSpans: []Span{{Offset: 0, Length: 1}}, + NegativeBuckets: []int64{}, + }, + errMsg: `negative side: spans need 1 buckets, have 0 buckets: histogram spans specify different number of buckets than provided`, + }, + "rejects histogram that has too few positive buckets": { + h: &Histogram{ + PositiveSpans: []Span{{Offset: 0, Length: 1}}, + PositiveBuckets: []int64{}, + }, + errMsg: `positive side: spans need 1 buckets, have 0 buckets: histogram spans specify different number of buckets than provided`, + }, + "rejects histogram that has too many negative buckets": { + h: &Histogram{ + NegativeSpans: []Span{{Offset: 0, Length: 1}}, + NegativeBuckets: []int64{1, 2}, + }, + errMsg: `negative side: spans need 1 buckets, have 2 buckets: histogram spans specify different number of buckets than provided`, + }, + "rejects histogram that has too many positive buckets": { + h: &Histogram{ + PositiveSpans: []Span{{Offset: 0, Length: 1}}, + PositiveBuckets: []int64{1, 2}, + }, + errMsg: `positive side: spans need 1 buckets, have 2 buckets: histogram spans specify different number of buckets than provided`, + }, + "rejects a histogram that has a negative span with a negative offset": { + h: &Histogram{ + NegativeSpans: []Span{{Offset: -1, Length: 1}, {Offset: -1, Length: 1}}, + NegativeBuckets: []int64{1, 2}, + }, + errMsg: `negative side: span number 2 with offset -1: histogram has a span whose offset is negative`, + }, + "rejects a histogram which has a positive span with a negative offset": { + h: &Histogram{ + PositiveSpans: []Span{{Offset: -1, Length: 1}, {Offset: -1, Length: 1}}, + PositiveBuckets: []int64{1, 2}, + }, + errMsg: `positive side: span number 2 with offset -1: histogram has a span whose offset is negative`, + }, + "rejects a histogram that has a negative bucket with a negative count": { + h: &Histogram{ + NegativeSpans: []Span{{Offset: -1, Length: 1}}, + NegativeBuckets: []int64{-1}, + }, + errMsg: `negative side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative`, + }, + "rejects a histogram that has a positive bucket with a negative count": { + h: &Histogram{ + PositiveSpans: []Span{{Offset: -1, Length: 1}}, + PositiveBuckets: []int64{-1}, + }, + errMsg: `positive side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative`, + }, + "rejects a histogram that has a lower count than count in buckets": { + h: &Histogram{ + Count: 0, + NegativeSpans: []Span{{Offset: -1, Length: 1}}, + PositiveSpans: []Span{{Offset: -1, Length: 1}}, + NegativeBuckets: []int64{1}, + PositiveBuckets: []int64{1}, + }, + errMsg: `2 observations found in buckets, but the Count field is 0: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`, + skipFloat: true, + }, + "rejects a histogram that doesn't count the zero bucket in its count": { + h: &Histogram{ + Count: 2, + ZeroCount: 1, + NegativeSpans: []Span{{Offset: -1, Length: 1}}, + PositiveSpans: []Span{{Offset: -1, Length: 1}}, + NegativeBuckets: []int64{1}, + PositiveBuckets: []int64{1}, + }, + errMsg: `3 observations found in buckets, but the Count field is 2: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`, + skipFloat: true, + }, + } + + for testName, tc := range tests { + t.Run(testName, func(t *testing.T) { + if err := ValidateHistogram(tc.h); tc.errMsg != "" { + require.EqualError(t, err, tc.errMsg) + } else { + require.NoError(t, err) + } + if tc.skipFloat { + return + } + if err := ValidateFloatHistogram(tc.h.ToFloat()); tc.errMsg != "" { + require.EqualError(t, err, tc.errMsg) + } else { + require.NoError(t, err) + } + }) + } +} + +func BenchmarkHistogramValidation(b *testing.B) { + histograms := GenerateBigTestHistograms(b.N, 500) + b.ResetTimer() + for _, h := range histograms { + require.NoError(b, ValidateHistogram(h)) + } +} diff --git a/storage/interface.go b/storage/interface.go index 4da152aa4..2b1b6a63e 100644 --- a/storage/interface.go +++ b/storage/interface.go @@ -37,17 +37,12 @@ var ( // ErrTooOldSample is when out of order support is enabled but the sample is outside the time window allowed. ErrTooOldSample = errors.New("too old sample") // ErrDuplicateSampleForTimestamp is when the sample has same timestamp but different value. - ErrDuplicateSampleForTimestamp = errors.New("duplicate sample for timestamp") - ErrOutOfOrderExemplar = errors.New("out of order exemplar") - ErrDuplicateExemplar = errors.New("duplicate exemplar") - ErrExemplarLabelLength = fmt.Errorf("label length for exemplar exceeds maximum of %d UTF-8 characters", exemplar.ExemplarMaxLabelSetLength) - ErrExemplarsDisabled = fmt.Errorf("exemplar storage is disabled or max exemplars is less than or equal to 0") - ErrNativeHistogramsDisabled = fmt.Errorf("native histograms are disabled") - ErrHistogramCountNotBigEnough = errors.New("histogram's observation count should be at least the number of observations found in the buckets") - ErrHistogramCountMismatch = errors.New("histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)") - ErrHistogramNegativeBucketCount = errors.New("histogram has a bucket whose observation count is negative") - ErrHistogramSpanNegativeOffset = errors.New("histogram has a span whose offset is negative") - ErrHistogramSpansBucketsMismatch = errors.New("histogram spans specify different number of buckets than provided") + ErrDuplicateSampleForTimestamp = errors.New("duplicate sample for timestamp") + ErrOutOfOrderExemplar = errors.New("out of order exemplar") + ErrDuplicateExemplar = errors.New("duplicate exemplar") + ErrExemplarLabelLength = fmt.Errorf("label length for exemplar exceeds maximum of %d UTF-8 characters", exemplar.ExemplarMaxLabelSetLength) + ErrExemplarsDisabled = fmt.Errorf("exemplar storage is disabled or max exemplars is less than or equal to 0") + ErrNativeHistogramsDisabled = fmt.Errorf("native histograms are disabled") ) // SeriesRef is a generic series reference. In prometheus it is either a diff --git a/tsdb/agent/db.go b/tsdb/agent/db.go index 3912b9d52..188b10585 100644 --- a/tsdb/agent/db.go +++ b/tsdb/agent/db.go @@ -883,13 +883,13 @@ func (a *appender) AppendExemplar(ref storage.SeriesRef, _ labels.Labels, e exem func (a *appender) AppendHistogram(ref storage.SeriesRef, l labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { if h != nil { - if err := tsdb.ValidateHistogram(h); err != nil { + if err := histogram.ValidateHistogram(h); err != nil { return 0, err } } if fh != nil { - if err := tsdb.ValidateFloatHistogram(fh); err != nil { + if err := histogram.ValidateFloatHistogram(fh); err != nil { return 0, err } } diff --git a/tsdb/head_append.go b/tsdb/head_append.go index 330caad78..eeaaa369f 100644 --- a/tsdb/head_append.go +++ b/tsdb/head_append.go @@ -521,13 +521,13 @@ func (a *headAppender) AppendHistogram(ref storage.SeriesRef, lset labels.Labels } if h != nil { - if err := ValidateHistogram(h); err != nil { + if err := histogram.ValidateHistogram(h); err != nil { return 0, err } } if fh != nil { - if err := ValidateFloatHistogram(fh); err != nil { + if err := histogram.ValidateFloatHistogram(fh); err != nil { return 0, err } } @@ -642,113 +642,6 @@ func (a *headAppender) UpdateMetadata(ref storage.SeriesRef, lset labels.Labels, return ref, nil } -func ValidateHistogram(h *histogram.Histogram) error { - if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil { - return errors.Wrap(err, "negative side") - } - if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil { - return errors.Wrap(err, "positive side") - } - var nCount, pCount uint64 - err := checkHistogramBuckets(h.NegativeBuckets, &nCount, true) - if err != nil { - return errors.Wrap(err, "negative side") - } - err = checkHistogramBuckets(h.PositiveBuckets, &pCount, true) - if err != nil { - return errors.Wrap(err, "positive side") - } - - sumOfBuckets := nCount + pCount + h.ZeroCount - if math.IsNaN(h.Sum) { - if sumOfBuckets > h.Count { - return errors.Wrap( - storage.ErrHistogramCountNotBigEnough, - fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count), - ) - } - } else { - if sumOfBuckets != h.Count { - return errors.Wrap( - storage.ErrHistogramCountMismatch, - fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count), - ) - } - } - - return nil -} - -func ValidateFloatHistogram(h *histogram.FloatHistogram) error { - if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil { - return errors.Wrap(err, "negative side") - } - if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil { - return errors.Wrap(err, "positive side") - } - var nCount, pCount float64 - err := checkHistogramBuckets(h.NegativeBuckets, &nCount, false) - if err != nil { - return errors.Wrap(err, "negative side") - } - err = checkHistogramBuckets(h.PositiveBuckets, &pCount, false) - if err != nil { - return errors.Wrap(err, "positive side") - } - - // We do not check for h.Count being at least as large as the sum of the - // counts in the buckets because floating point precision issues can - // create false positives here. - - return nil -} - -func checkHistogramSpans(spans []histogram.Span, numBuckets int) error { - var spanBuckets int - for n, span := range spans { - if n > 0 && span.Offset < 0 { - return errors.Wrap( - storage.ErrHistogramSpanNegativeOffset, - fmt.Sprintf("span number %d with offset %d", n+1, span.Offset), - ) - } - spanBuckets += int(span.Length) - } - if spanBuckets != numBuckets { - return errors.Wrap( - storage.ErrHistogramSpansBucketsMismatch, - fmt.Sprintf("spans need %d buckets, have %d buckets", spanBuckets, numBuckets), - ) - } - return nil -} - -func checkHistogramBuckets[BC histogram.BucketCount, IBC histogram.InternalBucketCount](buckets []IBC, count *BC, deltas bool) error { - if len(buckets) == 0 { - return nil - } - - var last IBC - for i := 0; i < len(buckets); i++ { - var c IBC - if deltas { - c = last + buckets[i] - } else { - c = buckets[i] - } - if c < 0 { - return errors.Wrap( - storage.ErrHistogramNegativeBucketCount, - fmt.Sprintf("bucket number %d has observation count of %v", i+1, c), - ) - } - last = c - *count += BC(c) - } - - return nil -} - var _ storage.GetRef = &headAppender{} func (a *headAppender) GetRef(lset labels.Labels, hash uint64) (storage.SeriesRef, labels.Labels) { diff --git a/tsdb/head_test.go b/tsdb/head_test.go index 2feb745f1..1216dd0a6 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -4894,181 +4894,6 @@ func TestReplayAfterMmapReplayError(t *testing.T) { require.NoError(t, h.Close()) } -func TestHistogramValidation(t *testing.T) { - tests := map[string]struct { - h *histogram.Histogram - errMsg string - skipFloat bool - }{ - "valid histogram": { - h: tsdbutil.GenerateTestHistograms(1)[0], - }, - "valid histogram with NaN observations that has its Count (4) higher than the actual total of buckets (2 + 1)": { - // This case is possible if NaN values (which do not fall into any bucket) are observed. - h: &histogram.Histogram{ - ZeroCount: 2, - Count: 4, - Sum: math.NaN(), - PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}}, - PositiveBuckets: []int64{1}, - }, - }, - "rejects histogram without NaN observations that has its Count (4) higher than the actual total of buckets (2 + 1)": { - h: &histogram.Histogram{ - ZeroCount: 2, - Count: 4, - Sum: 333, - PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}}, - PositiveBuckets: []int64{1}, - }, - errMsg: `3 observations found in buckets, but the Count field is 4: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`, - skipFloat: true, - }, - "rejects histogram that has too few negative buckets": { - h: &histogram.Histogram{ - NegativeSpans: []histogram.Span{{Offset: 0, Length: 1}}, - NegativeBuckets: []int64{}, - }, - errMsg: `negative side: spans need 1 buckets, have 0 buckets: histogram spans specify different number of buckets than provided`, - }, - "rejects histogram that has too few positive buckets": { - h: &histogram.Histogram{ - PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}}, - PositiveBuckets: []int64{}, - }, - errMsg: `positive side: spans need 1 buckets, have 0 buckets: histogram spans specify different number of buckets than provided`, - }, - "rejects histogram that has too many negative buckets": { - h: &histogram.Histogram{ - NegativeSpans: []histogram.Span{{Offset: 0, Length: 1}}, - NegativeBuckets: []int64{1, 2}, - }, - errMsg: `negative side: spans need 1 buckets, have 2 buckets: histogram spans specify different number of buckets than provided`, - }, - "rejects histogram that has too many positive buckets": { - h: &histogram.Histogram{ - PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}}, - PositiveBuckets: []int64{1, 2}, - }, - errMsg: `positive side: spans need 1 buckets, have 2 buckets: histogram spans specify different number of buckets than provided`, - }, - "rejects a histogram that has a negative span with a negative offset": { - h: &histogram.Histogram{ - NegativeSpans: []histogram.Span{{Offset: -1, Length: 1}, {Offset: -1, Length: 1}}, - NegativeBuckets: []int64{1, 2}, - }, - errMsg: `negative side: span number 2 with offset -1: histogram has a span whose offset is negative`, - }, - "rejects a histogram which has a positive span with a negative offset": { - h: &histogram.Histogram{ - PositiveSpans: []histogram.Span{{Offset: -1, Length: 1}, {Offset: -1, Length: 1}}, - PositiveBuckets: []int64{1, 2}, - }, - errMsg: `positive side: span number 2 with offset -1: histogram has a span whose offset is negative`, - }, - "rejects a histogram that has a negative bucket with a negative count": { - h: &histogram.Histogram{ - NegativeSpans: []histogram.Span{{Offset: -1, Length: 1}}, - NegativeBuckets: []int64{-1}, - }, - errMsg: `negative side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative`, - }, - "rejects a histogram that has a positive bucket with a negative count": { - h: &histogram.Histogram{ - PositiveSpans: []histogram.Span{{Offset: -1, Length: 1}}, - PositiveBuckets: []int64{-1}, - }, - errMsg: `positive side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative`, - }, - "rejects a histogram that has a lower count than count in buckets": { - h: &histogram.Histogram{ - Count: 0, - NegativeSpans: []histogram.Span{{Offset: -1, Length: 1}}, - PositiveSpans: []histogram.Span{{Offset: -1, Length: 1}}, - NegativeBuckets: []int64{1}, - PositiveBuckets: []int64{1}, - }, - errMsg: `2 observations found in buckets, but the Count field is 0: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`, - skipFloat: true, - }, - "rejects a histogram that doesn't count the zero bucket in its count": { - h: &histogram.Histogram{ - Count: 2, - ZeroCount: 1, - NegativeSpans: []histogram.Span{{Offset: -1, Length: 1}}, - PositiveSpans: []histogram.Span{{Offset: -1, Length: 1}}, - NegativeBuckets: []int64{1}, - PositiveBuckets: []int64{1}, - }, - errMsg: `3 observations found in buckets, but the Count field is 2: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`, - skipFloat: true, - }, - } - - for testName, tc := range tests { - t.Run(testName, func(t *testing.T) { - if err := ValidateHistogram(tc.h); tc.errMsg != "" { - require.EqualError(t, err, tc.errMsg) - } else { - require.NoError(t, err) - } - if tc.skipFloat { - return - } - if err := ValidateFloatHistogram(tc.h.ToFloat()); tc.errMsg != "" { - require.EqualError(t, err, tc.errMsg) - } else { - require.NoError(t, err) - } - }) - } -} - -func BenchmarkHistogramValidation(b *testing.B) { - histograms := generateBigTestHistograms(b.N, 500) - b.ResetTimer() - for _, h := range histograms { - require.NoError(b, ValidateHistogram(h)) - } -} - -func generateBigTestHistograms(numHistograms, numBuckets int) []*histogram.Histogram { - numSpans := numBuckets / 10 - bucketsPerSide := numBuckets / 2 - spanLength := uint32(bucketsPerSide / numSpans) - // Given all bucket deltas are 1, sum bucketsPerSide + 1. - observationCount := bucketsPerSide * (1 + bucketsPerSide) - - var histograms []*histogram.Histogram - for i := 0; i < numHistograms; i++ { - h := &histogram.Histogram{ - Count: uint64(i + observationCount), - ZeroCount: uint64(i), - ZeroThreshold: 1e-128, - Sum: 18.4 * float64(i+1), - Schema: 2, - NegativeSpans: make([]histogram.Span, numSpans), - PositiveSpans: make([]histogram.Span, numSpans), - NegativeBuckets: make([]int64, bucketsPerSide), - PositiveBuckets: make([]int64, bucketsPerSide), - } - - for j := 0; j < numSpans; j++ { - s := histogram.Span{Offset: 1, Length: spanLength} - h.NegativeSpans[j] = s - h.PositiveSpans[j] = s - } - - for j := 0; j < bucketsPerSide; j++ { - h.NegativeBuckets[j] = 1 - h.PositiveBuckets[j] = 1 - } - - histograms = append(histograms, h) - } - return histograms -} - func TestOOOAppendWithNoSeries(t *testing.T) { dir := t.TempDir() wal, err := wlog.NewSize(nil, nil, filepath.Join(dir, "wal"), 32768, wlog.CompressionSnappy) @@ -5409,7 +5234,7 @@ func BenchmarkCuttingHeadHistogramChunks(b *testing.B) { numSamples = 50000 numBuckets = 100 ) - samples := generateBigTestHistograms(numSamples, numBuckets) + samples := histogram.GenerateBigTestHistograms(numSamples, numBuckets) h, _ := newTestHead(b, DefaultBlockDuration, wlog.CompressionNone, false) defer func() { @@ -5473,7 +5298,7 @@ func TestCuttingNewHeadChunks(t *testing.T) { "small histograms": { numTotalSamples: 240, histValFunc: func() func(i int) *histogram.Histogram { - hists := generateBigTestHistograms(240, 10) + hists := histogram.GenerateBigTestHistograms(240, 10) return func(i int) *histogram.Histogram { return hists[i] } @@ -5489,7 +5314,7 @@ func TestCuttingNewHeadChunks(t *testing.T) { "large histograms": { numTotalSamples: 240, histValFunc: func() func(i int) *histogram.Histogram { - hists := generateBigTestHistograms(240, 100) + hists := histogram.GenerateBigTestHistograms(240, 100) return func(i int) *histogram.Histogram { return hists[i] } @@ -5512,7 +5337,7 @@ func TestCuttingNewHeadChunks(t *testing.T) { // per chunk. numTotalSamples: 11, histValFunc: func() func(i int) *histogram.Histogram { - hists := generateBigTestHistograms(11, 100000) + hists := histogram.GenerateBigTestHistograms(11, 100000) return func(i int) *histogram.Histogram { return hists[i] } From ebed7d0612dca43e3ef89f00c3882b58463d21ab Mon Sep 17 00:00:00 2001 From: Linas Medziunas Date: Fri, 3 Nov 2023 16:47:59 +0200 Subject: [PATCH 3/3] Change Validate to be a method on histogram structs Signed-off-by: Linas Medziunas --- model/histogram/float_histogram.go | 27 +++++ model/histogram/generic.go | 56 +++++++++ model/histogram/histogram.go | 45 ++++++++ model/histogram/histogram_test.go | 156 +++++++++++++++++++++++++ model/histogram/validate.go | 136 ---------------------- model/histogram/validate_test.go | 175 ----------------------------- tsdb/agent/db.go | 4 +- tsdb/head_append.go | 4 +- 8 files changed, 288 insertions(+), 315 deletions(-) delete mode 100644 model/histogram/validate.go delete mode 100644 model/histogram/validate_test.go diff --git a/model/histogram/float_histogram.go b/model/histogram/float_histogram.go index b519cbc58..fd6c2560f 100644 --- a/model/histogram/float_histogram.go +++ b/model/histogram/float_histogram.go @@ -17,6 +17,8 @@ import ( "fmt" "math" "strings" + + "github.com/pkg/errors" ) // FloatHistogram is similar to Histogram but uses float64 for all @@ -593,6 +595,31 @@ func (h *FloatHistogram) AllReverseBucketIterator() BucketIterator[float64] { } } +// Validate validates consistency between span and bucket slices. Also, buckets are checked +// against negative values. +// We do not check for h.Count being at least as large as the sum of the +// counts in the buckets because floating point precision issues can +// create false positives here. +func (h *FloatHistogram) Validate() error { + if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil { + return errors.Wrap(err, "negative side") + } + if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil { + return errors.Wrap(err, "positive side") + } + var nCount, pCount float64 + err := checkHistogramBuckets(h.NegativeBuckets, &nCount, false) + if err != nil { + return errors.Wrap(err, "negative side") + } + err = checkHistogramBuckets(h.PositiveBuckets, &pCount, false) + if err != nil { + return errors.Wrap(err, "positive side") + } + + return nil +} + // zeroCountForLargerThreshold returns what the histogram's zero count would be // if the ZeroThreshold had the provided larger (or equal) value. If the // provided value is less than the histogram's ZeroThreshold, the method panics. diff --git a/model/histogram/generic.go b/model/histogram/generic.go index 3c1ad7cc8..22048c44e 100644 --- a/model/histogram/generic.go +++ b/model/histogram/generic.go @@ -17,6 +17,16 @@ import ( "fmt" "math" "strings" + + "github.com/pkg/errors" +) + +var ( + ErrHistogramCountNotBigEnough = errors.New("histogram's observation count should be at least the number of observations found in the buckets") + ErrHistogramCountMismatch = errors.New("histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)") + ErrHistogramNegativeBucketCount = errors.New("histogram has a bucket whose observation count is negative") + ErrHistogramSpanNegativeOffset = errors.New("histogram has a span whose offset is negative") + ErrHistogramSpansBucketsMismatch = errors.New("histogram spans specify different number of buckets than provided") ) // BucketCount is a type constraint for the count in a bucket, which can be @@ -347,6 +357,52 @@ func compactBuckets[IBC InternalBucketCount](buckets []IBC, spans []Span, maxEmp return buckets, spans } +func checkHistogramSpans(spans []Span, numBuckets int) error { + var spanBuckets int + for n, span := range spans { + if n > 0 && span.Offset < 0 { + return errors.Wrap( + ErrHistogramSpanNegativeOffset, + fmt.Sprintf("span number %d with offset %d", n+1, span.Offset), + ) + } + spanBuckets += int(span.Length) + } + if spanBuckets != numBuckets { + return errors.Wrap( + ErrHistogramSpansBucketsMismatch, + fmt.Sprintf("spans need %d buckets, have %d buckets", spanBuckets, numBuckets), + ) + } + return nil +} + +func checkHistogramBuckets[BC BucketCount, IBC InternalBucketCount](buckets []IBC, count *BC, deltas bool) error { + if len(buckets) == 0 { + return nil + } + + var last IBC + for i := 0; i < len(buckets); i++ { + var c IBC + if deltas { + c = last + buckets[i] + } else { + c = buckets[i] + } + if c < 0 { + return errors.Wrap( + ErrHistogramNegativeBucketCount, + fmt.Sprintf("bucket number %d has observation count of %v", i+1, c), + ) + } + last = c + *count += BC(c) + } + + return nil +} + func getBound(idx, schema int32) float64 { // Here a bit of context about the behavior for the last bucket counting // regular numbers (called simply "last bucket" below) and the bucket diff --git a/model/histogram/histogram.go b/model/histogram/histogram.go index 762e7de81..fa624841e 100644 --- a/model/histogram/histogram.go +++ b/model/histogram/histogram.go @@ -18,6 +18,7 @@ import ( "math" "strings" + "github.com/pkg/errors" "golang.org/x/exp/slices" ) @@ -328,6 +329,50 @@ func (h *Histogram) ToFloat() *FloatHistogram { } } +// Validate validates consistency between span and bucket slices. Also, buckets are checked +// against negative values. +// For histograms that have not observed any NaN values (based on IsNaN(h.Sum) check), a +// strict h.Count = nCount + pCount + h.ZeroCount check is performed. +// Otherwise, only a lower bound check will be done (h.Count >= nCount + pCount + h.ZeroCount), +// because NaN observations do not increment the values of buckets (but they do increment +// the total h.Count). +func (h *Histogram) Validate() error { + if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil { + return errors.Wrap(err, "negative side") + } + if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil { + return errors.Wrap(err, "positive side") + } + var nCount, pCount uint64 + err := checkHistogramBuckets(h.NegativeBuckets, &nCount, true) + if err != nil { + return errors.Wrap(err, "negative side") + } + err = checkHistogramBuckets(h.PositiveBuckets, &pCount, true) + if err != nil { + return errors.Wrap(err, "positive side") + } + + sumOfBuckets := nCount + pCount + h.ZeroCount + if math.IsNaN(h.Sum) { + if sumOfBuckets > h.Count { + return errors.Wrap( + ErrHistogramCountNotBigEnough, + fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count), + ) + } + } else { + if sumOfBuckets != h.Count { + return errors.Wrap( + ErrHistogramCountMismatch, + fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count), + ) + } + } + + return nil +} + type regularBucketIterator struct { baseBucketIterator[uint64, int64] } diff --git a/model/histogram/histogram_test.go b/model/histogram/histogram_test.go index 23fb1779e..6f12f53e8 100644 --- a/model/histogram/histogram_test.go +++ b/model/histogram/histogram_test.go @@ -811,3 +811,159 @@ func TestHistogramCompact(t *testing.T) { }) } } + +func TestHistogramValidation(t *testing.T) { + tests := map[string]struct { + h *Histogram + errMsg string + skipFloat bool + }{ + "valid histogram": { + h: &Histogram{ + Count: 12, + ZeroCount: 2, + ZeroThreshold: 0.001, + Sum: 19.4, + Schema: 1, + PositiveSpans: []Span{ + {Offset: 0, Length: 2}, + {Offset: 1, Length: 2}, + }, + PositiveBuckets: []int64{1, 1, -1, 0}, + NegativeSpans: []Span{ + {Offset: 0, Length: 2}, + {Offset: 1, Length: 2}, + }, + NegativeBuckets: []int64{1, 1, -1, 0}, + }, + }, + "valid histogram with NaN observations that has its Count (4) higher than the actual total of buckets (2 + 1)": { + // This case is possible if NaN values (which do not fall into any bucket) are observed. + h: &Histogram{ + ZeroCount: 2, + Count: 4, + Sum: math.NaN(), + PositiveSpans: []Span{{Offset: 0, Length: 1}}, + PositiveBuckets: []int64{1}, + }, + }, + "rejects histogram without NaN observations that has its Count (4) higher than the actual total of buckets (2 + 1)": { + h: &Histogram{ + ZeroCount: 2, + Count: 4, + Sum: 333, + PositiveSpans: []Span{{Offset: 0, Length: 1}}, + PositiveBuckets: []int64{1}, + }, + errMsg: `3 observations found in buckets, but the Count field is 4: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`, + skipFloat: true, + }, + "rejects histogram that has too few negative buckets": { + h: &Histogram{ + NegativeSpans: []Span{{Offset: 0, Length: 1}}, + NegativeBuckets: []int64{}, + }, + errMsg: `negative side: spans need 1 buckets, have 0 buckets: histogram spans specify different number of buckets than provided`, + }, + "rejects histogram that has too few positive buckets": { + h: &Histogram{ + PositiveSpans: []Span{{Offset: 0, Length: 1}}, + PositiveBuckets: []int64{}, + }, + errMsg: `positive side: spans need 1 buckets, have 0 buckets: histogram spans specify different number of buckets than provided`, + }, + "rejects histogram that has too many negative buckets": { + h: &Histogram{ + NegativeSpans: []Span{{Offset: 0, Length: 1}}, + NegativeBuckets: []int64{1, 2}, + }, + errMsg: `negative side: spans need 1 buckets, have 2 buckets: histogram spans specify different number of buckets than provided`, + }, + "rejects histogram that has too many positive buckets": { + h: &Histogram{ + PositiveSpans: []Span{{Offset: 0, Length: 1}}, + PositiveBuckets: []int64{1, 2}, + }, + errMsg: `positive side: spans need 1 buckets, have 2 buckets: histogram spans specify different number of buckets than provided`, + }, + "rejects a histogram that has a negative span with a negative offset": { + h: &Histogram{ + NegativeSpans: []Span{{Offset: -1, Length: 1}, {Offset: -1, Length: 1}}, + NegativeBuckets: []int64{1, 2}, + }, + errMsg: `negative side: span number 2 with offset -1: histogram has a span whose offset is negative`, + }, + "rejects a histogram which has a positive span with a negative offset": { + h: &Histogram{ + PositiveSpans: []Span{{Offset: -1, Length: 1}, {Offset: -1, Length: 1}}, + PositiveBuckets: []int64{1, 2}, + }, + errMsg: `positive side: span number 2 with offset -1: histogram has a span whose offset is negative`, + }, + "rejects a histogram that has a negative bucket with a negative count": { + h: &Histogram{ + NegativeSpans: []Span{{Offset: -1, Length: 1}}, + NegativeBuckets: []int64{-1}, + }, + errMsg: `negative side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative`, + }, + "rejects a histogram that has a positive bucket with a negative count": { + h: &Histogram{ + PositiveSpans: []Span{{Offset: -1, Length: 1}}, + PositiveBuckets: []int64{-1}, + }, + errMsg: `positive side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative`, + }, + "rejects a histogram that has a lower count than count in buckets": { + h: &Histogram{ + Count: 0, + NegativeSpans: []Span{{Offset: -1, Length: 1}}, + PositiveSpans: []Span{{Offset: -1, Length: 1}}, + NegativeBuckets: []int64{1}, + PositiveBuckets: []int64{1}, + }, + errMsg: `2 observations found in buckets, but the Count field is 0: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`, + skipFloat: true, + }, + "rejects a histogram that doesn't count the zero bucket in its count": { + h: &Histogram{ + Count: 2, + ZeroCount: 1, + NegativeSpans: []Span{{Offset: -1, Length: 1}}, + PositiveSpans: []Span{{Offset: -1, Length: 1}}, + NegativeBuckets: []int64{1}, + PositiveBuckets: []int64{1}, + }, + errMsg: `3 observations found in buckets, but the Count field is 2: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`, + skipFloat: true, + }, + } + + for testName, tc := range tests { + t.Run(testName, func(t *testing.T) { + if err := tc.h.Validate(); tc.errMsg != "" { + require.EqualError(t, err, tc.errMsg) + } else { + require.NoError(t, err) + } + if tc.skipFloat { + return + } + + fh := tc.h.ToFloat() + if err := fh.Validate(); tc.errMsg != "" { + require.EqualError(t, err, tc.errMsg) + } else { + require.NoError(t, err) + } + }) + } +} + +func BenchmarkHistogramValidation(b *testing.B) { + histograms := GenerateBigTestHistograms(b.N, 500) + b.ResetTimer() + for _, h := range histograms { + require.NoError(b, h.Validate()) + } +} diff --git a/model/histogram/validate.go b/model/histogram/validate.go deleted file mode 100644 index 41649b798..000000000 --- a/model/histogram/validate.go +++ /dev/null @@ -1,136 +0,0 @@ -// Copyright 2023 The Prometheus Authors -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package histogram - -import ( - "fmt" - "math" - - "github.com/pkg/errors" -) - -var ( - ErrHistogramCountNotBigEnough = errors.New("histogram's observation count should be at least the number of observations found in the buckets") - ErrHistogramCountMismatch = errors.New("histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)") - ErrHistogramNegativeBucketCount = errors.New("histogram has a bucket whose observation count is negative") - ErrHistogramSpanNegativeOffset = errors.New("histogram has a span whose offset is negative") - ErrHistogramSpansBucketsMismatch = errors.New("histogram spans specify different number of buckets than provided") -) - -func ValidateHistogram(h *Histogram) error { - if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil { - return errors.Wrap(err, "negative side") - } - if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil { - return errors.Wrap(err, "positive side") - } - var nCount, pCount uint64 - err := checkHistogramBuckets(h.NegativeBuckets, &nCount, true) - if err != nil { - return errors.Wrap(err, "negative side") - } - err = checkHistogramBuckets(h.PositiveBuckets, &pCount, true) - if err != nil { - return errors.Wrap(err, "positive side") - } - - sumOfBuckets := nCount + pCount + h.ZeroCount - if math.IsNaN(h.Sum) { - if sumOfBuckets > h.Count { - return errors.Wrap( - ErrHistogramCountNotBigEnough, - fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count), - ) - } - } else { - if sumOfBuckets != h.Count { - return errors.Wrap( - ErrHistogramCountMismatch, - fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count), - ) - } - } - - return nil -} - -func ValidateFloatHistogram(h *FloatHistogram) error { - if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil { - return errors.Wrap(err, "negative side") - } - if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil { - return errors.Wrap(err, "positive side") - } - var nCount, pCount float64 - err := checkHistogramBuckets(h.NegativeBuckets, &nCount, false) - if err != nil { - return errors.Wrap(err, "negative side") - } - err = checkHistogramBuckets(h.PositiveBuckets, &pCount, false) - if err != nil { - return errors.Wrap(err, "positive side") - } - - // We do not check for h.Count being at least as large as the sum of the - // counts in the buckets because floating point precision issues can - // create false positives here. - - return nil -} - -func checkHistogramSpans(spans []Span, numBuckets int) error { - var spanBuckets int - for n, span := range spans { - if n > 0 && span.Offset < 0 { - return errors.Wrap( - ErrHistogramSpanNegativeOffset, - fmt.Sprintf("span number %d with offset %d", n+1, span.Offset), - ) - } - spanBuckets += int(span.Length) - } - if spanBuckets != numBuckets { - return errors.Wrap( - ErrHistogramSpansBucketsMismatch, - fmt.Sprintf("spans need %d buckets, have %d buckets", spanBuckets, numBuckets), - ) - } - return nil -} - -func checkHistogramBuckets[BC BucketCount, IBC InternalBucketCount](buckets []IBC, count *BC, deltas bool) error { - if len(buckets) == 0 { - return nil - } - - var last IBC - for i := 0; i < len(buckets); i++ { - var c IBC - if deltas { - c = last + buckets[i] - } else { - c = buckets[i] - } - if c < 0 { - return errors.Wrap( - ErrHistogramNegativeBucketCount, - fmt.Sprintf("bucket number %d has observation count of %v", i+1, c), - ) - } - last = c - *count += BC(c) - } - - return nil -} diff --git a/model/histogram/validate_test.go b/model/histogram/validate_test.go deleted file mode 100644 index d9d8f0639..000000000 --- a/model/histogram/validate_test.go +++ /dev/null @@ -1,175 +0,0 @@ -// Copyright 2023 The Prometheus Authors -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package histogram - -import ( - "math" - "testing" - - "github.com/stretchr/testify/require" -) - -func TestHistogramValidation(t *testing.T) { - tests := map[string]struct { - h *Histogram - errMsg string - skipFloat bool - }{ - "valid histogram": { - h: &Histogram{ - Count: 12, - ZeroCount: 2, - ZeroThreshold: 0.001, - Sum: 19.4, - Schema: 1, - PositiveSpans: []Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, - }, - PositiveBuckets: []int64{1, 1, -1, 0}, - NegativeSpans: []Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, - }, - NegativeBuckets: []int64{1, 1, -1, 0}, - }, - }, - "valid histogram with NaN observations that has its Count (4) higher than the actual total of buckets (2 + 1)": { - // This case is possible if NaN values (which do not fall into any bucket) are observed. - h: &Histogram{ - ZeroCount: 2, - Count: 4, - Sum: math.NaN(), - PositiveSpans: []Span{{Offset: 0, Length: 1}}, - PositiveBuckets: []int64{1}, - }, - }, - "rejects histogram without NaN observations that has its Count (4) higher than the actual total of buckets (2 + 1)": { - h: &Histogram{ - ZeroCount: 2, - Count: 4, - Sum: 333, - PositiveSpans: []Span{{Offset: 0, Length: 1}}, - PositiveBuckets: []int64{1}, - }, - errMsg: `3 observations found in buckets, but the Count field is 4: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`, - skipFloat: true, - }, - "rejects histogram that has too few negative buckets": { - h: &Histogram{ - NegativeSpans: []Span{{Offset: 0, Length: 1}}, - NegativeBuckets: []int64{}, - }, - errMsg: `negative side: spans need 1 buckets, have 0 buckets: histogram spans specify different number of buckets than provided`, - }, - "rejects histogram that has too few positive buckets": { - h: &Histogram{ - PositiveSpans: []Span{{Offset: 0, Length: 1}}, - PositiveBuckets: []int64{}, - }, - errMsg: `positive side: spans need 1 buckets, have 0 buckets: histogram spans specify different number of buckets than provided`, - }, - "rejects histogram that has too many negative buckets": { - h: &Histogram{ - NegativeSpans: []Span{{Offset: 0, Length: 1}}, - NegativeBuckets: []int64{1, 2}, - }, - errMsg: `negative side: spans need 1 buckets, have 2 buckets: histogram spans specify different number of buckets than provided`, - }, - "rejects histogram that has too many positive buckets": { - h: &Histogram{ - PositiveSpans: []Span{{Offset: 0, Length: 1}}, - PositiveBuckets: []int64{1, 2}, - }, - errMsg: `positive side: spans need 1 buckets, have 2 buckets: histogram spans specify different number of buckets than provided`, - }, - "rejects a histogram that has a negative span with a negative offset": { - h: &Histogram{ - NegativeSpans: []Span{{Offset: -1, Length: 1}, {Offset: -1, Length: 1}}, - NegativeBuckets: []int64{1, 2}, - }, - errMsg: `negative side: span number 2 with offset -1: histogram has a span whose offset is negative`, - }, - "rejects a histogram which has a positive span with a negative offset": { - h: &Histogram{ - PositiveSpans: []Span{{Offset: -1, Length: 1}, {Offset: -1, Length: 1}}, - PositiveBuckets: []int64{1, 2}, - }, - errMsg: `positive side: span number 2 with offset -1: histogram has a span whose offset is negative`, - }, - "rejects a histogram that has a negative bucket with a negative count": { - h: &Histogram{ - NegativeSpans: []Span{{Offset: -1, Length: 1}}, - NegativeBuckets: []int64{-1}, - }, - errMsg: `negative side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative`, - }, - "rejects a histogram that has a positive bucket with a negative count": { - h: &Histogram{ - PositiveSpans: []Span{{Offset: -1, Length: 1}}, - PositiveBuckets: []int64{-1}, - }, - errMsg: `positive side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative`, - }, - "rejects a histogram that has a lower count than count in buckets": { - h: &Histogram{ - Count: 0, - NegativeSpans: []Span{{Offset: -1, Length: 1}}, - PositiveSpans: []Span{{Offset: -1, Length: 1}}, - NegativeBuckets: []int64{1}, - PositiveBuckets: []int64{1}, - }, - errMsg: `2 observations found in buckets, but the Count field is 0: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`, - skipFloat: true, - }, - "rejects a histogram that doesn't count the zero bucket in its count": { - h: &Histogram{ - Count: 2, - ZeroCount: 1, - NegativeSpans: []Span{{Offset: -1, Length: 1}}, - PositiveSpans: []Span{{Offset: -1, Length: 1}}, - NegativeBuckets: []int64{1}, - PositiveBuckets: []int64{1}, - }, - errMsg: `3 observations found in buckets, but the Count field is 2: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`, - skipFloat: true, - }, - } - - for testName, tc := range tests { - t.Run(testName, func(t *testing.T) { - if err := ValidateHistogram(tc.h); tc.errMsg != "" { - require.EqualError(t, err, tc.errMsg) - } else { - require.NoError(t, err) - } - if tc.skipFloat { - return - } - if err := ValidateFloatHistogram(tc.h.ToFloat()); tc.errMsg != "" { - require.EqualError(t, err, tc.errMsg) - } else { - require.NoError(t, err) - } - }) - } -} - -func BenchmarkHistogramValidation(b *testing.B) { - histograms := GenerateBigTestHistograms(b.N, 500) - b.ResetTimer() - for _, h := range histograms { - require.NoError(b, ValidateHistogram(h)) - } -} diff --git a/tsdb/agent/db.go b/tsdb/agent/db.go index 188b10585..e4d44afa2 100644 --- a/tsdb/agent/db.go +++ b/tsdb/agent/db.go @@ -883,13 +883,13 @@ func (a *appender) AppendExemplar(ref storage.SeriesRef, _ labels.Labels, e exem func (a *appender) AppendHistogram(ref storage.SeriesRef, l labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { if h != nil { - if err := histogram.ValidateHistogram(h); err != nil { + if err := h.Validate(); err != nil { return 0, err } } if fh != nil { - if err := histogram.ValidateFloatHistogram(fh); err != nil { + if err := fh.Validate(); err != nil { return 0, err } } diff --git a/tsdb/head_append.go b/tsdb/head_append.go index eeaaa369f..3663c800a 100644 --- a/tsdb/head_append.go +++ b/tsdb/head_append.go @@ -521,13 +521,13 @@ func (a *headAppender) AppendHistogram(ref storage.SeriesRef, lset labels.Labels } if h != nil { - if err := histogram.ValidateHistogram(h); err != nil { + if err := h.Validate(); err != nil { return 0, err } } if fh != nil { - if err := histogram.ValidateFloatHistogram(fh); err != nil { + if err := fh.Validate(); err != nil { return 0, err } }