From 3a08a71d86c0776321720ce2d7fef288eb9c14e1 Mon Sep 17 00:00:00 2001
From: Ganesh Vernekar <cs15btech11018@iith.ac.in>
Date: Wed, 7 Nov 2018 21:22:41 +0530
Subject: [PATCH] LabelNames() method to get all unique label names (#369)

* LabelNames() method to get all unique label names

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>
---
 block.go            | 13 ++++++
 db.go               | 43 ++++++++++++++++++++
 db_test.go          | 99 ++++++++++++++++++++++++++++++++++++++++++++-
 head.go             | 17 +++++++-
 index/index.go      | 37 +++++++++++++----
 index/index_test.go |  2 -
 querier_test.go     | 11 ++++-
 7 files changed, 207 insertions(+), 15 deletions(-)

diff --git a/block.go b/block.go
index 981c69eb4..7d4ad9365 100644
--- a/block.go
+++ b/block.go
@@ -83,8 +83,12 @@ type IndexReader interface {
 	Series(ref uint64, lset *labels.Labels, chks *[]chunks.Meta) error
 
 	// LabelIndices returns a list of string tuples for which a label value index exists.
+	// NOTE: This is deprecated. Use `LabelNames()` instead.
 	LabelIndices() ([][]string, error)
 
+	// LabelNames returns all the unique label names present in the index in sorted order.
+	LabelNames() ([]string, error)
+
 	// Close releases the underlying resources of the reader.
 	Close() error
 }
@@ -407,6 +411,10 @@ func (r blockIndexReader) LabelIndices() ([][]string, error) {
 	return ss, errors.Wrapf(err, "block: %s", r.b.Meta().ULID)
 }
 
+func (r blockIndexReader) LabelNames() ([]string, error) {
+	return r.b.LabelNames()
+}
+
 func (r blockIndexReader) Close() error {
 	r.b.pendingReaders.Done()
 	return nil
@@ -564,6 +572,11 @@ func (pb *Block) OverlapsClosedInterval(mint, maxt int64) bool {
 	return pb.meta.MinTime <= maxt && mint < pb.meta.MaxTime
 }
 
+// LabelNames returns all the unique label names present in the Block in sorted order.
+func (pb *Block) LabelNames() ([]string, error) {
+	return pb.indexr.LabelNames()
+}
+
 func clampInterval(a, b, mint, maxt int64) (int64, int64) {
 	if a < mint {
 		a = mint
diff --git a/db.go b/db.go
index cb02b4835..bfb7d4c2c 100644
--- a/db.go
+++ b/db.go
@@ -878,6 +878,49 @@ func (db *DB) CleanTombstones() (err error) {
 	return errors.Wrap(db.reload(), "reload blocks")
 }
 
+// labelNames returns all the unique label names from the Block Readers.
+func labelNames(brs ...BlockReader) (map[string]struct{}, error) {
+	labelNamesMap := make(map[string]struct{})
+	for _, br := range brs {
+		ir, err := br.Index()
+		if err != nil {
+			return nil, errors.Wrap(err, "get IndexReader")
+		}
+		names, err := ir.LabelNames()
+		if err != nil {
+			return nil, errors.Wrap(err, "LabelNames() from IndexReader")
+		}
+		for _, name := range names {
+			labelNamesMap[name] = struct{}{}
+		}
+		if err = ir.Close(); err != nil {
+			return nil, errors.Wrap(err, "close IndexReader")
+		}
+	}
+	return labelNamesMap, nil
+}
+
+// LabelNames returns all the unique label names present in the DB in sorted order.
+func (db *DB) LabelNames() ([]string, error) {
+	brs := []BlockReader{db.head}
+	for _, b := range db.Blocks() {
+		brs = append(brs, b)
+	}
+
+	labelNamesMap, err := labelNames(brs...)
+	if err != nil {
+		return nil, err
+	}
+
+	labelNames := make([]string, 0, len(labelNamesMap))
+	for name := range labelNamesMap {
+		labelNames = append(labelNames, name)
+	}
+	sort.Strings(labelNames)
+
+	return labelNames, nil
+}
+
 func isBlockDir(fi os.FileInfo) bool {
 	if !fi.IsDir() {
 		return false
diff --git a/db_test.go b/db_test.go
index 6dae80c76..1996c6e8d 100644
--- a/db_test.go
+++ b/db_test.go
@@ -781,7 +781,7 @@ func TestTombstoneClean(t *testing.T) {
 			testutil.Equals(t, smplExp, smplRes)
 		}
 
-		for _, b := range db.blocks {
+		for _, b := range db.Blocks() {
 			testutil.Equals(t, NewMemTombstones(), b.tombstones)
 		}
 	}
@@ -1134,7 +1134,7 @@ func TestChunkAtBlockBoundary(t *testing.T) {
 	err = db.compact()
 	testutil.Ok(t, err)
 
-	for _, block := range db.blocks {
+	for _, block := range db.Blocks() {
 		r, err := block.Index()
 		testutil.Ok(t, err)
 		defer r.Close()
@@ -1303,6 +1303,101 @@ func TestInitializeHeadTimestamp(t *testing.T) {
 	})
 }
 
+func TestDB_LabelNames(t *testing.T) {
+	tests := []struct {
+		// Add 'sampleLabels1' -> Test Head -> Compact -> Test Disk ->
+		// -> Add 'sampleLabels2' -> Test Head+Disk
+
+		sampleLabels1 [][2]string // For checking head and disk separately.
+		// To test Head+Disk, sampleLabels2 should have
+		// at least 1 unique label name which is not in sampleLabels1.
+		sampleLabels2 [][2]string // // For checking head and disk together.
+		exp1          []string    // after adding sampleLabels1.
+		exp2          []string    // after adding sampleLabels1 and sampleLabels2.
+	}{
+		{
+			sampleLabels1: [][2]string{
+				[2]string{"name1", ""},
+				[2]string{"name3", ""},
+				[2]string{"name2", ""},
+			},
+			sampleLabels2: [][2]string{
+				[2]string{"name4", ""},
+				[2]string{"name1", ""},
+			},
+			exp1: []string{"name1", "name2", "name3"},
+			exp2: []string{"name1", "name2", "name3", "name4"},
+		},
+		{
+			sampleLabels1: [][2]string{
+				[2]string{"name2", ""},
+				[2]string{"name1", ""},
+				[2]string{"name2", ""},
+			},
+			sampleLabels2: [][2]string{
+				[2]string{"name6", ""},
+				[2]string{"name0", ""},
+			},
+			exp1: []string{"name1", "name2"},
+			exp2: []string{"name0", "name1", "name2", "name6"},
+		},
+	}
+
+	blockRange := DefaultOptions.BlockRanges[0]
+	// Appends samples into the database.
+	appendSamples := func(db *DB, mint, maxt int64, sampleLabels [][2]string) {
+		t.Helper()
+		app := db.Appender()
+		for i := mint; i <= maxt; i++ {
+			for _, tuple := range sampleLabels {
+				label := labels.FromStrings(tuple[0], tuple[1])
+				_, err := app.Add(label, i*blockRange, 0)
+				testutil.Ok(t, err)
+			}
+		}
+		err := app.Commit()
+		testutil.Ok(t, err)
+	}
+	for _, tst := range tests {
+		db, close := openTestDB(t, nil)
+		defer close()
+		defer db.Close()
+
+		appendSamples(db, 0, 4, tst.sampleLabels1)
+
+		// Testing head.
+		headIndexr, err := db.head.Index()
+		testutil.Ok(t, err)
+		labelNames, err := headIndexr.LabelNames()
+		testutil.Ok(t, err)
+		testutil.Equals(t, tst.exp1, labelNames)
+		testutil.Ok(t, headIndexr.Close())
+
+		// Testing disk.
+		err = db.compact()
+		testutil.Ok(t, err)
+		// All blocks have same label names, hence check them individually.
+		// No need to aggregrate and check.
+		for _, b := range db.Blocks() {
+			blockIndexr, err := b.Index()
+			testutil.Ok(t, err)
+			labelNames, err = blockIndexr.LabelNames()
+			testutil.Ok(t, err)
+			testutil.Equals(t, tst.exp1, labelNames)
+			testutil.Ok(t, blockIndexr.Close())
+		}
+
+		// Addings more samples to head with new label names
+		// so that we can test db.LabelNames() (the union).
+		appendSamples(db, 5, 9, tst.sampleLabels2)
+
+		// Testing DB (union).
+		labelNames, err = db.LabelNames()
+		testutil.Ok(t, err)
+		testutil.Equals(t, tst.exp2, labelNames)
+	}
+}
+
 func TestCorrectNumTombstones(t *testing.T) {
 	db, close := openTestDB(t, nil)
 	defer close()
diff --git a/head.go b/head.go
index 92d8a128f..c52290778 100644
--- a/head.go
+++ b/head.go
@@ -1026,6 +1026,21 @@ func (h *headIndexReader) LabelValues(names ...string) (index.StringTuples, erro
 	return index.NewStringTuples(sl, len(names))
 }
 
+// LabelNames returns all the unique label names present in the head.
+func (h *headIndexReader) LabelNames() ([]string, error) {
+	h.head.symMtx.RLock()
+	defer h.head.symMtx.RUnlock()
+	labelNames := make([]string, 0, len(h.head.values))
+	for name := range h.head.values {
+		if name == "" {
+			continue
+		}
+		labelNames = append(labelNames, name)
+	}
+	sort.Strings(labelNames)
+	return labelNames, nil
+}
+
 // Postings returns the postings list iterator for the label pair.
 func (h *headIndexReader) Postings(name, value string) (index.Postings, error) {
 	return h.head.postings.Get(name, value), nil
@@ -1087,9 +1102,7 @@ func (h *headIndexReader) Series(ref uint64, lbls *labels.Labels, chks *[]chunks
 func (h *headIndexReader) LabelIndices() ([][]string, error) {
 	h.head.symMtx.RLock()
 	defer h.head.symMtx.RUnlock()
-
 	res := [][]string{}
-
 	for s := range h.head.values {
 		res = append(res, []string{s})
 	}
diff --git a/index/index.go b/index/index.go
index 17acf9ab2..133799ac9 100644
--- a/index/index.go
+++ b/index/index.go
@@ -38,6 +38,8 @@ const (
 
 	indexFormatV1 = 1
 	indexFormatV2 = 2
+
+	labelNameSeperator = "\xff"
 )
 
 type indexWriterSeries struct {
@@ -850,9 +852,8 @@ func (r *Reader) SymbolTable() map[uint32]string {
 
 // LabelValues returns value tuples that exist for the given label name tuples.
 func (r *Reader) LabelValues(names ...string) (StringTuples, error) {
-	const sep = "\xff"
 
-	key := strings.Join(names, sep)
+	key := strings.Join(names, labelNameSeperator)
 	off, ok := r.labels[key]
 	if !ok {
 		// XXX(fabxc): hot fix. Should return a partial data error and handle cases
@@ -882,14 +883,12 @@ type emptyStringTuples struct{}
 func (emptyStringTuples) At(i int) ([]string, error) { return nil, nil }
 func (emptyStringTuples) Len() int                   { return 0 }
 
-// LabelIndices returns a for which labels or label tuples value indices exist.
+// LabelIndices returns a slice of label names for which labels or label tuples value indices exist.
+// NOTE: This is deprecated. Use `LabelNames()` instead.
 func (r *Reader) LabelIndices() ([][]string, error) {
-	const sep = "\xff"
-
 	res := [][]string{}
-
 	for s := range r.labels {
-		res = append(res, strings.Split(s, sep))
+		res = append(res, strings.Split(s, labelNameSeperator))
 	}
 	return res, nil
 }
@@ -935,6 +934,30 @@ func (r *Reader) SortedPostings(p Postings) Postings {
 	return p
 }
 
+// LabelNames returns all the unique label names present in the index.
+func (r *Reader) LabelNames() ([]string, error) {
+	labelNamesMap := make(map[string]struct{}, len(r.labels))
+	for key := range r.labels {
+		// 'key' contains the label names concatenated with the
+		// delimiter 'labelNameSeperator'.
+		names := strings.Split(key, labelNameSeperator)
+		for _, name := range names {
+			if name == allPostingsKey.Name {
+				// This is not from any metric.
+				// It is basically an empty label name.
+				continue
+			}
+			labelNamesMap[name] = struct{}{}
+		}
+	}
+	labelNames := make([]string, 0, len(labelNamesMap))
+	for name := range labelNamesMap {
+		labelNames = append(labelNames, name)
+	}
+	sort.Strings(labelNames)
+	return labelNames, nil
+}
+
 type stringTuples struct {
 	length  int      // tuple length
 	entries []string // flattened tuple entries
diff --git a/index/index_test.go b/index/index_test.go
index f50cda47a..d5122402e 100644
--- a/index/index_test.go
+++ b/index/index_test.go
@@ -140,11 +140,9 @@ func (m mockIndex) Series(ref uint64, lset *labels.Labels, chks *[]chunks.Meta)
 
 func (m mockIndex) LabelIndices() ([][]string, error) {
 	res := make([][]string, 0, len(m.labelIndex))
-
 	for k := range m.labelIndex {
 		res = append(res, []string{k})
 	}
-
 	return res, nil
 }
 
diff --git a/querier_test.go b/querier_test.go
index 9a30534c3..3abcb847b 100644
--- a/querier_test.go
+++ b/querier_test.go
@@ -1507,10 +1507,17 @@ func (m mockIndex) Series(ref uint64, lset *labels.Labels, chks *[]chunks.Meta)
 
 func (m mockIndex) LabelIndices() ([][]string, error) {
 	res := make([][]string, 0, len(m.labelIndex))
-
 	for k := range m.labelIndex {
 		res = append(res, []string{k})
 	}
-
 	return res, nil
 }
+
+func (m mockIndex) LabelNames() ([]string, error) {
+	labelNames := make([]string, 0, len(m.labelIndex))
+	for name := range m.labelIndex {
+		labelNames = append(labelNames, name)
+	}
+	sort.Strings(labelNames)
+	return labelNames, nil
+}