diff --git a/head.go b/head.go index 40a4eb5b92..647a3d3d76 100644 --- a/head.go +++ b/head.go @@ -185,13 +185,14 @@ func NewHead(r prometheus.Registerer, l log.Logger, wal WAL, chunkRange int64) ( return h, nil } +// ReadWAL initializes the head by consuming the write ahead log. func (h *Head) ReadWAL() error { r := h.wal.Reader() mint := h.MinTime() seriesFunc := func(series []RefSeries) error { for _, s := range series { - h.create(s.Labels.Hash(), s.Labels) + h.getOrCreate(s.Labels.Hash(), s.Labels) } return nil } @@ -379,17 +380,12 @@ func (a *headAppender) Add(lset labels.Labels, t int64, v float64) (uint64, erro if t < a.mint { return 0, ErrOutOfBounds } - hash := lset.Hash() - - s := a.head.series.getByHash(hash, lset) - - if s == nil { - s = a.head.create(hash, lset) + s, created := a.head.getOrCreate(lset.Hash(), lset) + if created { a.series = append(a.series, RefSeries{ Ref: s.ref, Labels: lset, - hash: hash, }) } return s.ref, a.AddFast(s.ref, t, v) @@ -839,20 +835,27 @@ func (h *headIndexReader) LabelIndices() ([][]string, error) { return res, nil } -func (h *Head) create(hash uint64, lset labels.Labels) *memSeries { - h.metrics.series.Inc() - h.metrics.seriesCreated.Inc() +func (h *Head) getOrCreate(hash uint64, lset labels.Labels) (*memSeries, bool) { + // Just using `getOrSet` below would be semantically sufficient, but we'd create + // a new series on every sample inserted via Add(), which causes allocations + // and makes our series IDs rather random and harder to compress in postings. + s := h.series.getByHash(hash, lset) + if s != nil { + return s, false + } // Optimistically assume that we are the first one to create the series. id := atomic.AddUint64(&h.lastSeriesID, 1) - s := newMemSeries(lset, id, h.chunkRange) + s = newMemSeries(lset, id, h.chunkRange) s, created := h.series.getOrSet(hash, s) - // Skip indexing if we didn't actually create the series. if !created { - return s + return s, false } + h.metrics.series.Inc() + h.metrics.seriesCreated.Inc() + h.postings.add(id, lset) h.symMtx.Lock() @@ -870,7 +873,7 @@ func (h *Head) create(hash uint64, lset labels.Labels) *memSeries { h.symbols[l.Value] = struct{}{} } - return s + return s, true } // seriesHashmap is a simple hashmap for memSeries by their label set. It is built diff --git a/head_test.go b/head_test.go index 724dab224a..b4901b7cd5 100644 --- a/head_test.go +++ b/head_test.go @@ -41,7 +41,7 @@ func BenchmarkCreateSeries(b *testing.B) { b.ResetTimer() for _, l := range lbls { - h.create(l.Hash(), l) + h.getOrCreate(l.Hash(), l) } } @@ -89,10 +89,10 @@ func TestHead_Truncate(t *testing.T) { h.initTime(0) - s1 := h.create(1, labels.FromStrings("a", "1", "b", "1")) - s2 := h.create(2, labels.FromStrings("a", "2", "b", "1")) - s3 := h.create(3, labels.FromStrings("a", "1", "b", "2")) - s4 := h.create(4, labels.FromStrings("a", "2", "b", "2", "c", "1")) + s1, _ := h.getOrCreate(1, labels.FromStrings("a", "1", "b", "1")) + s2, _ := h.getOrCreate(2, labels.FromStrings("a", "2", "b", "1")) + s3, _ := h.getOrCreate(3, labels.FromStrings("a", "1", "b", "2")) + s4, _ := h.getOrCreate(4, labels.FromStrings("a", "2", "b", "2", "c", "1")) s1.chunks = []*memChunk{ {minTime: 0, maxTime: 999}, diff --git a/wal.go b/wal.go index 9af9a18536..695e8d31b5 100644 --- a/wal.go +++ b/wal.go @@ -99,9 +99,6 @@ type WALReader interface { type RefSeries struct { Ref uint64 Labels labels.Labels - - // hash for the label set. This field is not generally populated. - hash uint64 } // RefSample is a timestamp/value pair associated with a reference to a series.