From afaf12fe45de0cef9b72b2b5a453318e90ee9343 Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Fri, 8 Sep 2017 00:35:28 +0530 Subject: [PATCH] Compress the series chunk details in index. Signed-off-by: Goutham Veeramachaneni --- Documentation/format/index.md | 51 ++++++++++++++++++++--------------- db_test.go | 2 -- index.go | 43 ++++++++++++++++++++++++----- 3 files changed, 66 insertions(+), 30 deletions(-) diff --git a/Documentation/format/index.md b/Documentation/format/index.md index 3beebb6fe..5fbdb176e 100644 --- a/Documentation/format/index.md +++ b/Documentation/format/index.md @@ -82,28 +82,37 @@ The file offset to the beginning of a series serves as the series' ID in all sub Every series entry first holds its number of labels, followed by tuples of symbol table references that contain the label name and value. The label pairs are lexicographically sorted. After the labels, the number of indexed chunks is encoded, followed by a sequence of metadata entries containing the chunks minimum and maximum timestamp and a reference to its position in the chunk file. Holding the time range data in the index allows dropping chunks irrelevant to queried time ranges without accessing them directly. +mint of the first chunk is stored and the mint and maxt are encoded as deltas to the previous time. Similarly, the first delta is stored and the next ref is stored as a delta to the previous. + ``` -┌─────────────────────────────────────────────────────────┐ -│ len │ -├─────────────────────────────────────────────────────────┤ -│ ┌──────────────────┬──────────────────────────────────┐ │ -│ │ │ ┌──────────────────────────┐ │ │ -│ │ │ │ ref(l_i.name) │ │ │ -│ │ #labels │ ├──────────────────────────┤ ... │ │ -│ │ │ │ ref(l_i.value) │ │ │ -│ │ │ └──────────────────────────┘ │ │ -│ ├──────────────────┼──────────────────────────────────┤ │ -│ │ │ ┌──────────────────────────┐ │ │ -│ │ │ │ c_i.mint │ │ │ -│ │ │ ├──────────────────────────┤ │ │ -│ │ #chunks │ │ c_i.maxt │ │ │ -│ │ │ ├──────────────────────────┤ ... │ │ -│ │ │ │ ref(c_i.data) │ │ │ -│ │ │ └──────────────────────────┘ │ │ -│ └──────────────────┴──────────────────────────────────┘ │ -├─────────────────────────────────────────────────────────┤ -│ CRC32 <4b> │ -└─────────────────────────────────────────────────────────┘ +┌─────────────────────────────────────────────────────────────────────────┐ +│ len │ +├─────────────────────────────────────────────────────────────────────────┤ +│ ┌──────────────────┬──────────────────────────────────────────────────┐ │ +│ │ │ ┌──────────────────────────────────────────┐ │ │ +│ │ │ │ ref(l_i.name) │ │ │ +│ │ #labels │ ├──────────────────────────────────────────┤ ... │ │ +│ │ │ │ ref(l_i.value) │ │ │ +│ │ │ └──────────────────────────────────────────┘ │ │ +│ ├──────────────────┼──────────────────────────────────────────────────┤ │ +│ │ │ ┌──────────────────────────────────────────┐ │ │ +│ │ │ │ c_0.mint │ │ │ +│ │ │ ├──────────────────────────────────────────┤ │ │ +│ │ │ │ c_0.maxt - c_0.mint │ │ │ +│ │ │ ├──────────────────────────────────────────┤ │ │ +│ │ │ │ ref(c_0.data) │ │ │ +│ │ #chunks │ └──────────────────────────────────────────┘ │ │ +│ │ │ ┌──────────────────────────────────────────┐ │ │ +│ │ │ │ c_i.mint - c_i-1.maxt │ │ │ +│ │ │ ├──────────────────────────────────────────┤ │ │ +│ │ │ │ c_i.maxt - c_i.mint │ │ │ +│ │ │ ├──────────────────────────────────────────┤ ... │ │ +│ │ │ │ ref(c_i.data) - ref(c_i-1.data) │ │ │ +│ │ │ └──────────────────────────────────────────┘ │ │ +│ └──────────────────┴──────────────────────────────────────────────────┘ │ +├─────────────────────────────────────────────────────────────────────────┤ +│ CRC32 <4b> │ +└─────────────────────────────────────────────────────────────────────────┘ ``` diff --git a/db_test.go b/db_test.go index fd4389a3a..8f5ad543a 100644 --- a/db_test.go +++ b/db_test.go @@ -433,8 +433,6 @@ func TestDB_e2e(t *testing.T) { mint := rand.Int63n(300) maxt := mint + rand.Int63n(timeInterval*int64(numDatapoints)) - t.Logf("run query %s, [%d, %d]", qry.ms, mint, maxt) - expected := map[string][]sample{} // Build the mockSeriesSet. diff --git a/index.go b/index.go index ddc2c4f52..06f347aaa 100644 --- a/index.go +++ b/index.go @@ -292,10 +292,22 @@ func (w *indexWriter) AddSeries(ref uint64, lset labels.Labels, chunks ...ChunkM w.buf2.putUvarint(len(chunks)) - for _, c := range chunks { + if len(chunks) > 0 { + c := chunks[0] w.buf2.putVarint64(c.MinTime) - w.buf2.putVarint64(c.MaxTime) + w.buf2.putUvarint64(uint64(c.MaxTime - c.MinTime)) w.buf2.putUvarint64(c.Ref) + t0 := c.MaxTime + ref0 := int64(c.Ref) + + for _, c := range chunks[1:] { + w.buf2.putUvarint64(uint64(c.MinTime - t0)) + w.buf2.putUvarint64(uint64(c.MaxTime - c.MinTime)) + t0 = c.MaxTime + + w.buf2.putVarint64(int64(c.Ref) - ref0) + ref0 = int64(c.Ref) + } } w.buf1.reset() @@ -775,17 +787,34 @@ func (r *indexReader) Series(ref uint64, lbls *labels.Labels, chks *[]ChunkMeta) // Read the chunks meta data. k = int(d2.uvarint()) - for i := 0; i < k; i++ { - mint := d2.varint64() - maxt := d2.varint64() - off := d2.uvarint64() + if k == 0 { + return nil + } + + t0 := d2.varint64() + maxt := int64(d2.uvarint64()) + t0 + ref0 := int64(d2.uvarint64()) + + *chks = append(*chks, ChunkMeta{ + Ref: uint64(ref0), + MinTime: t0, + MaxTime: maxt, + }) + t0 = maxt + + for i := 1; i < k; i++ { + mint := int64(d2.uvarint64()) + t0 + maxt := int64(d2.uvarint64()) + mint + + ref0 += d2.varint64() + t0 = maxt if d2.err() != nil { return errors.Wrapf(d2.err(), "read meta for chunk %d", i) } *chks = append(*chks, ChunkMeta{ - Ref: off, + Ref: uint64(ref0), MinTime: mint, MaxTime: maxt, })