From ac4f8a5e23e6c03a12e694313cb7eac4f58e4296 Mon Sep 17 00:00:00 2001 From: Bryan Boreham <bjboreham@gmail.com> Date: Mon, 16 Dec 2024 09:42:52 +0000 Subject: [PATCH] [ENHANCEMENT] TSDB: Improve calculation of space used by labels (#13880) * [ENHANCEMENT] TSDB: Improve calculation of space used by labels The labels for each series in the Head take up some some space in the Postings index, but far more space in the `memSeries` structure. Instead of having the Postings index calculate this overhead, which is a layering violation, have the caller pass in a function to do it. Provide three implementations of this function for the three Labels versions. Signed-off-by: Bryan Boreham <bjboreham@gmail.com> --- model/labels/labels.go | 6 ++++++ model/labels/labels_dedupelabels.go | 5 +++++ model/labels/labels_stringlabels.go | 5 +++++ tsdb/head.go | 2 +- tsdb/index/postings.go | 5 +++-- tsdb/index/postings_test.go | 7 ++++--- 6 files changed, 24 insertions(+), 6 deletions(-) diff --git a/model/labels/labels.go b/model/labels/labels.go index f4de7496ce..0747ab90d9 100644 --- a/model/labels/labels.go +++ b/model/labels/labels.go @@ -19,6 +19,7 @@ import ( "bytes" "slices" "strings" + "unsafe" "github.com/cespare/xxhash/v2" ) @@ -488,3 +489,8 @@ func (b *ScratchBuilder) Labels() Labels { func (b *ScratchBuilder) Overwrite(ls *Labels) { *ls = append((*ls)[:0], b.add...) } + +// SizeOfLabels returns the approximate space required for n copies of a label. +func SizeOfLabels(name, value string, n uint64) uint64 { + return (uint64(len(name)) + uint64(unsafe.Sizeof(name)) + uint64(len(value)) + uint64(unsafe.Sizeof(value))) * n +} diff --git a/model/labels/labels_dedupelabels.go b/model/labels/labels_dedupelabels.go index da8a88cc15..a0d83e0044 100644 --- a/model/labels/labels_dedupelabels.go +++ b/model/labels/labels_dedupelabels.go @@ -815,3 +815,8 @@ func (b *ScratchBuilder) Overwrite(ls *Labels) { ls.syms = b.syms.nameTable ls.data = yoloString(b.overwriteBuffer) } + +// SizeOfLabels returns the approximate space required for n copies of a label. +func SizeOfLabels(name, value string, n uint64) uint64 { + return uint64(len(name)+len(value)) + n*4 // Assuming most symbol-table entries are 2 bytes long. +} diff --git a/model/labels/labels_stringlabels.go b/model/labels/labels_stringlabels.go index c64bb990e0..f49ed96f65 100644 --- a/model/labels/labels_stringlabels.go +++ b/model/labels/labels_stringlabels.go @@ -691,3 +691,8 @@ func NewScratchBuilderWithSymbolTable(_ *SymbolTable, n int) ScratchBuilder { func (b *ScratchBuilder) SetSymbolTable(_ *SymbolTable) { // no-op } + +// SizeOfLabels returns the approximate space required for n copies of a label. +func SizeOfLabels(name, value string, n uint64) uint64 { + return uint64(labelSize(&Label{Name: name, Value: value})) * n +} diff --git a/tsdb/head.go b/tsdb/head.go index c67c438e52..47f85d7713 100644 --- a/tsdb/head.go +++ b/tsdb/head.go @@ -1048,7 +1048,7 @@ func (h *Head) PostingsCardinalityStats(statsByLabelName string, limit int) *ind return h.cardinalityCache } h.cardinalityCacheKey = cacheKey - h.cardinalityCache = h.postings.Stats(statsByLabelName, limit) + h.cardinalityCache = h.postings.Stats(statsByLabelName, limit, labels.SizeOfLabels) h.lastPostingsStatsCall = time.Duration(time.Now().Unix()) * time.Second return h.cardinalityCache diff --git a/tsdb/index/postings.go b/tsdb/index/postings.go index ea32ba5632..f9a284bc70 100644 --- a/tsdb/index/postings.go +++ b/tsdb/index/postings.go @@ -190,7 +190,8 @@ type PostingsStats struct { } // Stats calculates the cardinality statistics from postings. -func (p *MemPostings) Stats(label string, limit int) *PostingsStats { +// Caller can pass in a function which computes the space required for n series with a given label. +func (p *MemPostings) Stats(label string, limit int, labelSizeFunc func(string, string, uint64) uint64) *PostingsStats { var size uint64 p.mtx.RLock() @@ -218,7 +219,7 @@ func (p *MemPostings) Stats(label string, limit int) *PostingsStats { } seriesCnt := uint64(len(values)) labelValuePairs.push(Stat{Name: n + "=" + name, Count: seriesCnt}) - size += uint64(len(name)) * seriesCnt + size += labelSizeFunc(n, name, seriesCnt) } labelValueLength.push(Stat{Name: n, Count: size}) } diff --git a/tsdb/index/postings_test.go b/tsdb/index/postings_test.go index 6ff5b9c060..6dd9f25bc0 100644 --- a/tsdb/index/postings_test.go +++ b/tsdb/index/postings_test.go @@ -939,7 +939,7 @@ func BenchmarkPostings_Stats(b *testing.B) { } b.ResetTimer() for n := 0; n < b.N; n++ { - p.Stats("__name__", 10) + p.Stats("__name__", 10, labels.SizeOfLabels) } } @@ -954,7 +954,8 @@ func TestMemPostingsStats(t *testing.T) { p.Add(2, labels.FromStrings("label", "value1")) // call the Stats method to calculate the cardinality statistics - stats := p.Stats("label", 10) + // passing a fake calculation so we get the same result regardless of compilation -tags. + stats := p.Stats("label", 10, func(name, value string, n uint64) uint64 { return uint64(len(name)+len(value)) * n }) // assert that the expected statistics were calculated require.Equal(t, uint64(2), stats.CardinalityMetricsStats[0].Count) @@ -963,7 +964,7 @@ func TestMemPostingsStats(t *testing.T) { require.Equal(t, uint64(3), stats.CardinalityLabelStats[0].Count) require.Equal(t, "label", stats.CardinalityLabelStats[0].Name) - require.Equal(t, uint64(24), stats.LabelValueStats[0].Count) + require.Equal(t, uint64(44), stats.LabelValueStats[0].Count) require.Equal(t, "label", stats.LabelValueStats[0].Name) require.Equal(t, uint64(2), stats.LabelValuePairsStats[0].Count)