From 492414542ec8502e8885c8fefc3402ec51167457 Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Wed, 5 Feb 2020 05:53:12 -0500 Subject: [PATCH] fix matcher for regex (#6540) Signed-off-by: yeya24 --- pkg/labels/matcher.go | 8 ++++ tsdb/querier.go | 2 +- tsdb/querier_test.go | 92 ++++++++++++++++++++++++++++++++++++------- 3 files changed, 86 insertions(+), 16 deletions(-) diff --git a/pkg/labels/matcher.go b/pkg/labels/matcher.go index 2702d9ddb..df8dfae69 100644 --- a/pkg/labels/matcher.go +++ b/pkg/labels/matcher.go @@ -110,3 +110,11 @@ func (m *Matcher) Inverse() (*Matcher, error) { } panic("labels.Matcher.Matches: invalid match type") } + +// GetRegexString returns the regex string. +func (m *Matcher) GetRegexString() string { + if m.re == nil { + return "" + } + return m.re.String() +} diff --git a/tsdb/querier.go b/tsdb/querier.go index b7323abbf..dbde6ef76 100644 --- a/tsdb/querier.go +++ b/tsdb/querier.go @@ -424,7 +424,7 @@ func postingsForMatcher(ix IndexReader, m *labels.Matcher) (index.Postings, erro // Fast-path for set matching. if m.Type == labels.MatchRegexp { - setMatches := findSetMatches(m.Value) + setMatches := findSetMatches(m.GetRegexString()) if len(setMatches) > 0 { sort.Strings(setMatches) return ix.Postings(m.Name, setMatches...) diff --git a/tsdb/querier_test.go b/tsdb/querier_test.go index f391dc791..f1fe53b73 100644 --- a/tsdb/querier_test.go +++ b/tsdb/querier_test.go @@ -1641,21 +1641,21 @@ func BenchmarkSetMatcher(b *testing.B) { numSeries: 1, numSamplesPerSeriesPerBlock: 10, cardinality: 100, - pattern: "^(?:1|2|3|4|5|6|7|8|9|10)$", + pattern: "1|2|3|4|5|6|7|8|9|10", }, { numBlocks: 1, numSeries: 15, numSamplesPerSeriesPerBlock: 10, cardinality: 100, - pattern: "^(?:1|2|3|4|5|6|7|8|9|10)$", + pattern: "1|2|3|4|5|6|7|8|9|10", }, { numBlocks: 1, numSeries: 15, numSamplesPerSeriesPerBlock: 10, cardinality: 100, - pattern: "^(?:1|2|3)$", + pattern: "1|2|3", }, // Big data sizes benchmarks. { @@ -1663,14 +1663,14 @@ func BenchmarkSetMatcher(b *testing.B) { numSeries: 1000, numSamplesPerSeriesPerBlock: 10, cardinality: 100, - pattern: "^(?:1|2|3)$", + pattern: "1|2|3", }, { numBlocks: 20, numSeries: 1000, numSamplesPerSeriesPerBlock: 10, cardinality: 100, - pattern: "^(?:1|2|3|4|5|6|7|8|9|10)$", + pattern: "1|2|3|4|5|6|7|8|9|10", }, // Increase cardinality. { @@ -1678,28 +1678,28 @@ func BenchmarkSetMatcher(b *testing.B) { numSeries: 100000, numSamplesPerSeriesPerBlock: 10, cardinality: 100000, - pattern: "^(?:1|2|3|4|5|6|7|8|9|10)$", + pattern: "1|2|3|4|5|6|7|8|9|10", }, { numBlocks: 1, numSeries: 500000, numSamplesPerSeriesPerBlock: 10, cardinality: 500000, - pattern: "^(?:1|2|3|4|5|6|7|8|9|10)$", + pattern: "1|2|3|4|5|6|7|8|9|10", }, { numBlocks: 10, numSeries: 500000, numSamplesPerSeriesPerBlock: 10, cardinality: 500000, - pattern: "^(?:1|2|3|4|5|6|7|8|9|10)$", + pattern: "1|2|3|4|5|6|7|8|9|10", }, { numBlocks: 1, numSeries: 1000000, numSamplesPerSeriesPerBlock: 10, cardinality: 1000000, - pattern: "^(?:1|2|3|4|5|6|7|8|9|10)$", + pattern: "1|2|3|4|5|6|7|8|9|10", }, } @@ -1749,7 +1749,6 @@ func BenchmarkSetMatcher(b *testing.B) { for n := 0; n < b.N; n++ { _, err := que.Select(labels.MustNewMatcher(labels.MatchRegexp, "test", c.pattern)) testutil.Ok(b, err) - } }) } @@ -1997,7 +1996,7 @@ func TestPostingsForMatchers(t *testing.T) { // Set optimization for Regex. // Refer to https://github.com/prometheus/prometheus/issues/2651. { - matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, "n", "^(?:1|2)$")}, + matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, "n", "1|2")}, exp: []labels.Labels{ labels.FromStrings("n", "1"), labels.FromStrings("n", "1", "i", "a"), @@ -2006,20 +2005,20 @@ func TestPostingsForMatchers(t *testing.T) { }, }, { - matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, "i", "^(?:a|b)$")}, + matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, "i", "a|b")}, exp: []labels.Labels{ labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "b"), }, }, { - matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, "n", "^(?:x1|2)$")}, + matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, "n", "x1|2")}, exp: []labels.Labels{ labels.FromStrings("n", "2"), }, }, { - matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, "n", "^(?:2|2\\.5)$")}, + matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, "n", "2|2\\.5")}, exp: []labels.Labels{ labels.FromStrings("n", "2"), labels.FromStrings("n", "2.5"), @@ -2027,7 +2026,7 @@ func TestPostingsForMatchers(t *testing.T) { }, // Empty value. { - matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, "i", "^(?:c||d)$")}, + matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, "i", "c||d")}, exp: []labels.Labels{ labels.FromStrings("n", "1"), labels.FromStrings("n", "2"), @@ -2211,3 +2210,66 @@ func benchQuery(b *testing.B, expExpansions int, q Querier, selectors labels.Sel testutil.Ok(b, ss.Err()) } } + +// mockMatcherIndex is used to check if the regex matcher works as expected. +type mockMatcherIndex struct{} + +func (m mockMatcherIndex) Symbols() index.StringIter { return nil } + +func (m mockMatcherIndex) Close() error { return nil } + +// LabelValues will return error if it is called. +func (m mockMatcherIndex) LabelValues(name string) ([]string, error) { + return []string{}, errors.New("label values called") +} + +func (m mockMatcherIndex) Postings(name string, values ...string) (index.Postings, error) { + return index.EmptyPostings(), nil +} + +func (m mockMatcherIndex) SortedPostings(p index.Postings) index.Postings { + return index.EmptyPostings() +} + +func (m mockMatcherIndex) Series(ref uint64, lset *labels.Labels, chks *[]chunks.Meta) error { + return nil +} + +func (m mockMatcherIndex) LabelNames() ([]string, error) { return []string{}, nil } + +func TestPostingsForMatcher(t *testing.T) { + cases := []struct { + matcher *labels.Matcher + hasError bool + }{ + { + // Equal label matcher will just return. + matcher: labels.MustNewMatcher(labels.MatchEqual, "test", "test"), + hasError: false, + }, + { + // Regex matcher which doesn't have '|' will call Labelvalues() + matcher: labels.MustNewMatcher(labels.MatchRegexp, "test", ".*"), + hasError: true, + }, + { + matcher: labels.MustNewMatcher(labels.MatchRegexp, "test", "a|b"), + hasError: false, + }, + { + // Test case for double quoted regex matcher + matcher: labels.MustNewMatcher(labels.MatchRegexp, "test", "^(?:a|b)$"), + hasError: true, + }, + } + + for _, tc := range cases { + ir := &mockMatcherIndex{} + _, err := postingsForMatcher(ir, tc.matcher) + if tc.hasError { + testutil.NotOk(t, err) + } else { + testutil.Ok(t, err) + } + } +}