Add unit test for histogram append and various querying scenarios (#11194)

* Add unit test for histogram append and various querying scenarios Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * make lint happy Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Fix tests Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Fix review comments Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com>
2025-04-06 17:42:02 +00:00 · 2022-08-29 15:35:03 +05:30 · 2022-08-29 15:35:03 +05:30 · d209a29a5b
commit d209a29a5b
parent f7df3b86ba
6 changed files with 374 additions and 15 deletions
--- a/tsdb/block_test.go
+++ b/tsdb/block_test.go
@ -29,6 +29,7 @@ import (
 	"github.com/go-kit/log"
 	"github.com/stretchr/testify/require"
 	"github.com/prometheus/prometheus/model/histogram"
 	"github.com/prometheus/prometheus/model/labels"
 	"github.com/prometheus/prometheus/storage"
 	"github.com/prometheus/prometheus/tsdb/chunkenc"
@ -515,8 +516,67 @@ const (
 	defaultLabelValue = "labelValue"
 )
-// genSeries generates series with a given number of labels and values.
+// genSeries generates series of float64 samples with a given number of labels and values.
 func genSeries(totalSeries, labelCount int, mint, maxt int64) []storage.Series {
 	return genSeriesFromSampleGenerator(totalSeries, labelCount, mint, maxt, 1, func(ts int64) tsdbutil.Sample {
 		return sample{t: ts, v: rand.Float64()}
 	})
 }
 // genHistogramSeries generates series of histogram samples with a given number of labels and values.
 func genHistogramSeries(totalSeries, labelCount int, mint, maxt, step int64) []storage.Series {
 	return genSeriesFromSampleGenerator(totalSeries, labelCount, mint, maxt, step, func(ts int64) tsdbutil.Sample {
 		h := &histogram.Histogram{
 			Count:         5 + uint64(ts*4),
 			ZeroCount:     2 + uint64(ts),
 			ZeroThreshold: 0.001,
 			Sum:           18.4 * rand.Float64(),
 			Schema:        1,
 			PositiveSpans: []histogram.Span{
 				{Offset: 0, Length: 2},
 				{Offset: 1, Length: 2},
 			},
 			PositiveBuckets: []int64{int64(ts + 1), 1, -1, 0},
 		}
 		return sample{t: ts, h: h}
 	})
 }
 // genHistogramAndFloatSeries generates series of mixed histogram and float64 samples with a given number of labels and values.
 func genHistogramAndFloatSeries(totalSeries, labelCount int, mint, maxt, step int64) []storage.Series {
 	floatSample := false
 	count := 0
 	return genSeriesFromSampleGenerator(totalSeries, labelCount, mint, maxt, step, func(ts int64) tsdbutil.Sample {
 		count++
 		var s sample
 		if floatSample {
 			s = sample{t: ts, v: rand.Float64()}
 		} else {
 			h := &histogram.Histogram{
 				Count:         5 + uint64(ts*4),
 				ZeroCount:     2 + uint64(ts),
 				ZeroThreshold: 0.001,
 				Sum:           18.4 * rand.Float64(),
 				Schema:        1,
 				PositiveSpans: []histogram.Span{
 					{Offset: 0, Length: 2},
 					{Offset: 1, Length: 2},
 				},
 				PositiveBuckets: []int64{int64(ts + 1), 1, -1, 0},
 			}
 			s = sample{t: ts, h: h}
 		}
 		if count%5 == 0 {
 			// Flip the sample type for every 5 samples.
 			floatSample = !floatSample
 		}
 		return s
 	})
 }
 func genSeriesFromSampleGenerator(totalSeries, labelCount int, mint, maxt, step int64, generator func(ts int64) tsdbutil.Sample) []storage.Series {
 	if totalSeries == 0 || labelCount == 0 {
 		return nil
 	}
@ -530,8 +590,8 @@ func genSeries(totalSeries, labelCount int, mint, maxt int64) []storage.Series {
 			lbls[defaultLabelName+strconv.Itoa(j)] = defaultLabelValue + strconv.Itoa(j)
 		}
 		samples := make([]tsdbutil.Sample, 0, maxt-mint+1)
-		for t := mint; t < maxt; t++ {
+		for t := mint; t < maxt; t += step {
-			samples = append(samples, sample{t: t, v: rand.Float64()})
+			samples = append(samples, generator(t))
 		}
 		series[i] = storage.NewListSeries(labels.FromMap(lbls), samples)
 	}
--- a/tsdb/db_test.go
+++ b/tsdb/db_test.go
@ -40,6 +40,7 @@ import (
 	"github.com/stretchr/testify/require"
 	"go.uber.org/goleak"
 	"github.com/prometheus/prometheus/model/histogram"
 	"github.com/prometheus/prometheus/model/labels"
 	"github.com/prometheus/prometheus/model/metadata"
 	"github.com/prometheus/prometheus/storage"
@ -92,10 +93,17 @@ func query(t testing.TB, q storage.Querier, matchers ...*labels.Matcher) map[str
 		samples := []tsdbutil.Sample{}
 		it := series.Iterator()
-		for it.Next() == chunkenc.ValFloat {
+		for typ := it.Next(); typ != chunkenc.ValNone; typ = it.Next() {
-			// TODO(beorn7): Also handle histograms.
+			switch typ {
-			t, v := it.At()
+			case chunkenc.ValFloat:
-			samples = append(samples, sample{t: t, v: v})
+				ts, v := it.At()
 				samples = append(samples, sample{t: ts, v: v})
 			case chunkenc.ValHistogram:
 				ts, h := it.AtHistogram()
 				samples = append(samples, sample{t: ts, h: h})
 			default:
 				t.Fatalf("unknown sample type in query %s", typ.String())
 			}
 		}
 		require.NoError(t, it.Err())
@ -3742,3 +3750,276 @@ func TestMetadataAssertInMemoryData(t *testing.T) {
 	require.Equal(t, reopenDB.head.series.getByHash(s3.Hash(), s3).meta, m3)
 	require.Equal(t, reopenDB.head.series.getByHash(s4.Hash(), s4).meta, m4)
 }
 func TestHistogramAppendAndQuery(t *testing.T) {
 	db := openTestDB(t, nil, nil)
 	minute := func(m int) int64 { return int64(m) * time.Minute.Milliseconds() }
 	t.Cleanup(func() {
 		require.NoError(t, db.Close())
 	})
 	ctx := context.Background()
 	appendHistogram := func(lbls labels.Labels, tsMinute int, h *histogram.Histogram, exp *[]tsdbutil.Sample) {
 		t.Helper()
 		app := db.Appender(ctx)
 		_, err := app.AppendHistogram(0, lbls, minute(tsMinute), h)
 		require.NoError(t, err)
 		require.NoError(t, app.Commit())
 		*exp = append(*exp, sample{t: minute(tsMinute), h: h.Copy()})
 	}
 	appendFloat := func(lbls labels.Labels, tsMinute int, val float64, exp *[]tsdbutil.Sample) {
 		t.Helper()
 		app := db.Appender(ctx)
 		_, err := app.Append(0, lbls, minute(tsMinute), val)
 		require.NoError(t, err)
 		require.NoError(t, app.Commit())
 		*exp = append(*exp, sample{t: minute(tsMinute), v: val})
 	}
 	testQuery := func(name, value string, exp map[string][]tsdbutil.Sample) {
 		t.Helper()
 		q, err := db.Querier(ctx, math.MinInt64, math.MaxInt64)
 		require.NoError(t, err)
 		act := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, name, value))
 		require.Equal(t, exp, act)
 	}
 	baseH := &histogram.Histogram{
 		Count:         11,
 		ZeroCount:     4,
 		ZeroThreshold: 0.001,
 		Sum:           35.5,
 		Schema:        1,
 		PositiveSpans: []histogram.Span{
 			{Offset: 0, Length: 2},
 			{Offset: 2, Length: 2},
 		},
 		PositiveBuckets: []int64{1, 1, -1, 0},
 		NegativeSpans: []histogram.Span{
 			{Offset: 0, Length: 1},
 			{Offset: 1, Length: 2},
 		},
 		NegativeBuckets: []int64{1, 2, -1},
 	}
 	var (
 		series1                = labels.FromStrings("foo", "bar1")
 		series2                = labels.FromStrings("foo", "bar2")
 		series3                = labels.FromStrings("foo", "bar3")
 		series4                = labels.FromStrings("foo", "bar4")
 		exp1, exp2, exp3, exp4 []tsdbutil.Sample
 	)
 	// TODO(codesome): test everything for negative buckets as well.
 	t.Run("series with only histograms", func(t *testing.T) {
 		h := baseH.Copy() // This is shared across all sub tests.
 		appendHistogram(series1, 100, h.Copy(), &exp1)
 		testQuery("foo", "bar1", map[string][]tsdbutil.Sample{series1.String(): exp1})
 		h.PositiveBuckets[0]++
 		h.NegativeBuckets[0] += 2
 		h.Count += 10
 		appendHistogram(series1, 101, h.Copy(), &exp1)
 		testQuery("foo", "bar1", map[string][]tsdbutil.Sample{series1.String(): exp1})
 		t.Run("changing schema", func(t *testing.T) {
 			h.Schema = 2
 			appendHistogram(series1, 102, h.Copy(), &exp1)
 			testQuery("foo", "bar1", map[string][]tsdbutil.Sample{series1.String(): exp1})
 			// Schema back to old.
 			h.Schema = 1
 			appendHistogram(series1, 103, h.Copy(), &exp1)
 			testQuery("foo", "bar1", map[string][]tsdbutil.Sample{series1.String(): exp1})
 		})
 		t.Run("new buckets incoming", func(t *testing.T) {
 			// This histogram with new bucket at the end causes the re-encoding of the previous histogram.
 			// Hence the previous histogram is recoded into this new layout.
 			// But the query returns the histogram from the in-memory buffer, hence we don't see the recode here yet.
 			h.PositiveSpans[1].Length++
 			h.PositiveBuckets = append(h.PositiveBuckets, 1)
 			h.Count += 3
 			appendHistogram(series1, 104, h.Copy(), &exp1)
 			testQuery("foo", "bar1", map[string][]tsdbutil.Sample{series1.String(): exp1})
 			// Now we add the new buckets in between. Empty bucket is again not present for the old histogram.
 			h.PositiveSpans[0].Length++
 			h.PositiveSpans[1].Offset--
 			h.Count += 3
 			// {2, 1, -1, 0, 1} -> {2, 1, 0, -1, 0, 1}
 			h.PositiveBuckets = append(h.PositiveBuckets[:2], append([]int64{0}, h.PositiveBuckets[2:]...)...)
 			appendHistogram(series1, 105, h.Copy(), &exp1)
 			testQuery("foo", "bar1", map[string][]tsdbutil.Sample{series1.String(): exp1})
 			// We add 4 more histograms to clear out the buffer and see the re-encoded histograms.
 			appendHistogram(series1, 106, h.Copy(), &exp1)
 			appendHistogram(series1, 107, h.Copy(), &exp1)
 			appendHistogram(series1, 108, h.Copy(), &exp1)
 			appendHistogram(series1, 109, h.Copy(), &exp1)
 			// Update the expected histograms to reflect the re-encoding.
 			l := len(exp1)
 			h7 := exp1[l-7].H()
 			h7.PositiveSpans = exp1[l-1].H().PositiveSpans
 			h7.PositiveBuckets = []int64{2, 1, -3, 2, 0, -2} // -3 and -2 are the empty buckets.
 			exp1[l-7] = sample{t: exp1[l-7].T(), h: h7}
 			h6 := exp1[l-6].H()
 			h6.PositiveSpans = exp1[l-1].H().PositiveSpans
 			h6.PositiveBuckets = []int64{2, 1, -3, 2, 0, 1} // -3 is the empty bucket.
 			exp1[l-6] = sample{t: exp1[l-6].T(), h: h6}
 			testQuery("foo", "bar1", map[string][]tsdbutil.Sample{series1.String(): exp1})
 		})
 		t.Run("buckets disappearing", func(t *testing.T) {
 			h.PositiveSpans[1].Length--
 			h.PositiveBuckets = h.PositiveBuckets[:len(h.PositiveBuckets)-1]
 			appendHistogram(series1, 110, h.Copy(), &exp1)
 			testQuery("foo", "bar1", map[string][]tsdbutil.Sample{series1.String(): exp1})
 		})
 	})
 	t.Run("series starting with float and then getting histograms", func(t *testing.T) {
 		appendFloat(series2, 100, 100, &exp2)
 		appendFloat(series2, 101, 101, &exp2)
 		appendFloat(series2, 102, 102, &exp2)
 		testQuery("foo", "bar2", map[string][]tsdbutil.Sample{series2.String(): exp2})
 		h := baseH.Copy()
 		appendHistogram(series2, 103, h.Copy(), &exp2)
 		appendHistogram(series2, 104, h.Copy(), &exp2)
 		appendHistogram(series2, 105, h.Copy(), &exp2)
 		testQuery("foo", "bar2", map[string][]tsdbutil.Sample{series2.String(): exp2})
 		// Switching between float and histograms again.
 		appendFloat(series2, 106, 106, &exp2)
 		appendFloat(series2, 107, 107, &exp2)
 		testQuery("foo", "bar2", map[string][]tsdbutil.Sample{series2.String(): exp2})
 		appendHistogram(series2, 108, h.Copy(), &exp2)
 		appendHistogram(series2, 109, h.Copy(), &exp2)
 		testQuery("foo", "bar2", map[string][]tsdbutil.Sample{series2.String(): exp2})
 	})
 	t.Run("series starting with histogram and then getting float", func(t *testing.T) {
 		h := baseH.Copy()
 		appendHistogram(series3, 101, h.Copy(), &exp3)
 		appendHistogram(series3, 102, h.Copy(), &exp3)
 		appendHistogram(series3, 103, h.Copy(), &exp3)
 		testQuery("foo", "bar3", map[string][]tsdbutil.Sample{series3.String(): exp3})
 		appendFloat(series3, 104, 100, &exp3)
 		appendFloat(series3, 105, 101, &exp3)
 		appendFloat(series3, 106, 102, &exp3)
 		testQuery("foo", "bar3", map[string][]tsdbutil.Sample{series3.String(): exp3})
 		// Switching between histogram and float again.
 		appendHistogram(series3, 107, h.Copy(), &exp3)
 		appendHistogram(series3, 108, h.Copy(), &exp3)
 		testQuery("foo", "bar3", map[string][]tsdbutil.Sample{series3.String(): exp3})
 		appendFloat(series3, 109, 106, &exp3)
 		appendFloat(series3, 110, 107, &exp3)
 		testQuery("foo", "bar3", map[string][]tsdbutil.Sample{series3.String(): exp3})
 	})
 	t.Run("query mix of histogram and float series", func(t *testing.T) {
 		// A float only series.
 		appendFloat(series4, 100, 100, &exp4)
 		appendFloat(series4, 101, 101, &exp4)
 		appendFloat(series4, 102, 102, &exp4)
 		testQuery("foo", "bar.*", map[string][]tsdbutil.Sample{
 			series1.String(): exp1,
 			series2.String(): exp2,
 			series3.String(): exp3,
 			series4.String(): exp4,
 		})
 	})
 }
 func TestQueryHistogramFromBlocks(t *testing.T) {
 	minute := func(m int) int64 { return int64(m) * time.Minute.Milliseconds() }
 	testBlockQuerying := func(t *testing.T, blockSeries ...[]storage.Series) {
 		t.Helper()
 		opts := DefaultOptions()
 		opts.AllowOverlappingBlocks = true
 		db := openTestDB(t, opts, nil)
 		t.Cleanup(func() {
 			require.NoError(t, db.Close())
 		})
 		ctx := context.Background()
 		exp := make(map[string][]tsdbutil.Sample)
 		for _, series := range blockSeries {
 			createBlock(t, db.Dir(), series)
 			for _, s := range series {
 				key := s.Labels().String()
 				it := s.Iterator()
 				slice := exp[key]
 				for typ := it.Next(); typ != chunkenc.ValNone; typ = it.Next() {
 					switch typ {
 					case chunkenc.ValFloat:
 						ts, v := it.At()
 						slice = append(slice, sample{t: ts, v: v})
 					case chunkenc.ValHistogram:
 						ts, h := it.AtHistogram()
 						slice = append(slice, sample{t: ts, h: h})
 					}
 				}
 				sort.Slice(slice, func(i, j int) bool {
 					return slice[i].T() < slice[j].T()
 				})
 				exp[key] = slice
 			}
 		}
 		require.Len(t, db.Blocks(), 0)
 		require.NoError(t, db.reload())
 		require.Len(t, db.Blocks(), len(blockSeries))
 		q, err := db.Querier(ctx, math.MinInt64, math.MaxInt64)
 		require.NoError(t, err)
 		res := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "__name__", ".*"))
 		require.Equal(t, exp, res)
 	}
 	t.Run("serial blocks with only histograms", func(t *testing.T) {
 		testBlockQuerying(t,
 			genHistogramSeries(10, 5, minute(0), minute(119), minute(1)),
 			genHistogramSeries(10, 5, minute(120), minute(239), minute(1)),
 			genHistogramSeries(10, 5, minute(240), minute(359), minute(1)),
 		)
 	})
 	t.Run("overlapping blocks with only histograms", func(t *testing.T) {
 		testBlockQuerying(t,
 			genHistogramSeries(10, 5, minute(0), minute(120), minute(3)),
 			genHistogramSeries(10, 5, minute(1), minute(120), minute(3)),
 			genHistogramSeries(10, 5, minute(2), minute(120), minute(3)),
 		)
 	})
 	t.Run("serial blocks with mix of histograms and float64", func(t *testing.T) {
 		testBlockQuerying(t,
 			genHistogramAndFloatSeries(10, 5, minute(0), minute(60), minute(1)),
 			genHistogramSeries(10, 5, minute(61), minute(120), minute(1)),
 			genHistogramAndFloatSeries(10, 5, minute(121), minute(180), minute(1)),
 		)
 	})
 	t.Run("overlapping blocks with mix of histograms and float64", func(t *testing.T) {
 		testBlockQuerying(t,
 			genHistogramAndFloatSeries(10, 5, minute(0), minute(60), minute(3)),
 			genHistogramSeries(10, 5, minute(46), minute(100), minute(3)),
 			genHistogramAndFloatSeries(10, 5, minute(89), minute(140), minute(3)),
 		)
 	})
 }
--- a/tsdb/head_append.go
+++ b/tsdb/head_append.go
@ -553,7 +553,7 @@ func ValidateHistogram(h *histogram.Histogram) error {
 	if c := negativeCount + positiveCount; c > h.Count {
 		return errors.Wrap(
 			storage.ErrHistogramCountNotBigEnough,
-			fmt.Sprintf("%d observations found in buckets, but overall count is %d", c, h.Count),
+			fmt.Sprintf("%d observations found in buckets, but the Count field is %d", c, h.Count),
 		)
 	}
@ -712,7 +712,6 @@ func (a *headAppender) Commit() (err error) {
 	defer a.head.putHistogramBuffer(a.histograms)
 	defer a.head.putMetadataBuffer(a.metadata)
 	defer a.head.iso.closeAppend(a.appendID)
 	total := len(a.samples)
 	var series *memSeries
 	for i, s := range a.samples {
--- a/tsdb/head_test.go
+++ b/tsdb/head_test.go
@ -3886,7 +3886,7 @@ func TestHistogramValidation(t *testing.T) {
 				NegativeBuckets: []int64{1},
 				PositiveBuckets: []int64{1},
 			},
-			errMsg: `2 observations found in buckets, but overall count is 0`,
+			errMsg: `2 observations found in buckets, but the Count field is 0`,
 		},
 	}
--- a/tsdb/querier.go
+++ b/tsdb/querier.go
@ -696,7 +696,6 @@ func (p *populateWithDelChunkSeriesIterator) Next() bool {
 	if !p.next() {
 		return false
 	}
 	p.curr = p.currChkMeta
 	if p.currDelIter == nil {
 		return true
--- a/tsdb/tsdbblockutil.go
+++ b/tsdb/tsdbblockutil.go
@ -54,14 +54,34 @@ func CreateBlock(series []storage.Series, dir string, chunkRange int64, logger l
 		ref := storage.SeriesRef(0)
 		it := s.Iterator()
 		lset := s.Labels()
-		for it.Next() == chunkenc.ValFloat {
+		typ := it.Next()
-			// TODO(beorn7): Add histogram support.
+		lastTyp := typ
-			t, v := it.At()
+		for ; typ != chunkenc.ValNone; typ = it.Next() {
-			ref, err = app.Append(ref, lset, t, v)
+			if lastTyp != typ {
 				// The behaviour of appender is undefined if samples of different types
 				// are appended to the same series in a single Commit().
 				if err = app.Commit(); err != nil {
 					return "", err
 				}
 				app = w.Appender(ctx)
 				sampleCount = 0
 			}
 			switch typ {
 			case chunkenc.ValFloat:
 				t, v := it.At()
 				ref, err = app.Append(ref, lset, t, v)
 			case chunkenc.ValHistogram:
 				t, h := it.AtHistogram()
 				ref, err = app.AppendHistogram(ref, lset, t, h)
 			default:
 				return "", fmt.Errorf("unknown sample type %s", typ.String())
 			}
 			if err != nil {
 				return "", err
 			}
 			sampleCount++
 			lastTyp = typ
 		}
 		if it.Err() != nil {
 			return "", it.Err()