From 2f6bf7de4c41015a9adeeb3e544766d61ec865eb Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Tue, 7 Jul 2020 10:38:04 +0200 Subject: [PATCH] Optimise labels regex matchers containing a literal within the pattern (#7503) * Added labels matchers regex fast path for literals within the regex Signed-off-by: Marco Pracucci --- pkg/labels/regexp.go | 24 +++++++++++++++++++----- pkg/labels/regexp_test.go | 31 +++++++++++++++++++++---------- tsdb/querier_bench_test.go | 4 ++++ 3 files changed, 44 insertions(+), 15 deletions(-) diff --git a/pkg/labels/regexp.go b/pkg/labels/regexp.go index 39e75b0e9..317e39435 100644 --- a/pkg/labels/regexp.go +++ b/pkg/labels/regexp.go @@ -20,9 +20,10 @@ import ( ) type FastRegexMatcher struct { - re *regexp.Regexp - prefix string - suffix string + re *regexp.Regexp + prefix string + suffix string + contains string } func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) { @@ -41,7 +42,7 @@ func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) { } if parsed.Op == syntax.OpConcat { - m.prefix, m.suffix = optimizeConcatRegex(parsed) + m.prefix, m.suffix, m.contains = optimizeConcatRegex(parsed) } return m, nil @@ -54,6 +55,9 @@ func (m *FastRegexMatcher) MatchString(s string) bool { if m.suffix != "" && !strings.HasSuffix(s, m.suffix) { return false } + if m.contains != "" && !strings.Contains(s, m.contains) { + return false + } return m.re.MatchString(s) } @@ -63,7 +67,7 @@ func (m *FastRegexMatcher) GetRegexString() string { // optimizeConcatRegex returns literal prefix/suffix text that can be safely // checked against the label value before running the regexp matcher. -func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix string) { +func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix, contains string) { sub := r.Sub // We can safely remove begin and end text matchers respectively @@ -89,5 +93,15 @@ func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix string) { suffix = string(sub[last].Rune) } + // If contains any literal which is not a prefix/suffix, we keep the + // 1st one. We do not keep the whole list of literals to simplify the + // fast path. + for i := 1; i < len(sub)-1; i++ { + if sub[i].Op == syntax.OpLiteral { + contains = string(sub[i].Rune) + break + } + } + return } diff --git a/pkg/labels/regexp_test.go b/pkg/labels/regexp_test.go index 0b38d44d2..c12b893a9 100644 --- a/pkg/labels/regexp_test.go +++ b/pkg/labels/regexp_test.go @@ -42,6 +42,11 @@ func TestNewFastRegexMatcher(t *testing.T) { {regex: ".*foo", value: "\nfoo", expected: false}, {regex: "foo.*", value: "foo\n", expected: false}, {regex: "foo\n.*", value: "foo\n", expected: true}, + {regex: ".*foo.*", value: "foo", expected: true}, + {regex: ".*foo.*", value: "foo bar", expected: true}, + {regex: ".*foo.*", value: "hello foo world", expected: true}, + {regex: ".*foo.*", value: "hello foo\n world", expected: false}, + {regex: ".*foo\n.*", value: "hello foo\n world", expected: true}, {regex: ".*", value: "foo", expected: true}, {regex: "", value: "foo", expected: false}, {regex: "", value: "", expected: true}, @@ -56,24 +61,30 @@ func TestNewFastRegexMatcher(t *testing.T) { func TestOptimizeConcatRegex(t *testing.T) { cases := []struct { - regex string - prefix string - suffix string + regex string + prefix string + suffix string + contains string }{ - {regex: "foo(hello|bar)", prefix: "foo", suffix: ""}, - {regex: "foo(hello|bar)world", prefix: "foo", suffix: "world"}, - {regex: "foo.*", prefix: "foo", suffix: ""}, - {regex: "foo.*hello.*bar", prefix: "foo", suffix: "bar"}, - {regex: ".*foo", prefix: "", suffix: "foo"}, - {regex: "^.*foo$", prefix: "", suffix: "foo"}, + {regex: "foo(hello|bar)", prefix: "foo", suffix: "", contains: ""}, + {regex: "foo(hello|bar)world", prefix: "foo", suffix: "world", contains: ""}, + {regex: "foo.*", prefix: "foo", suffix: "", contains: ""}, + {regex: "foo.*hello.*bar", prefix: "foo", suffix: "bar", contains: "hello"}, + {regex: ".*foo", prefix: "", suffix: "foo", contains: ""}, + {regex: "^.*foo$", prefix: "", suffix: "foo", contains: ""}, + {regex: ".*foo.*", prefix: "", suffix: "", contains: "foo"}, + {regex: ".*foo.*bar.*", prefix: "", suffix: "", contains: "foo"}, + {regex: ".*(foo|bar).*", prefix: "", suffix: "", contains: ""}, + {regex: ".*[abc].*", prefix: "", suffix: "", contains: ""}, } for _, c := range cases { parsed, err := syntax.Parse(c.regex, syntax.Perl) testutil.Ok(t, err) - prefix, suffix := optimizeConcatRegex(parsed) + prefix, suffix, contains := optimizeConcatRegex(parsed) testutil.Equals(t, c.prefix, prefix) testutil.Equals(t, c.suffix, suffix) + testutil.Equals(t, c.contains, contains) } } diff --git a/tsdb/querier_bench_test.go b/tsdb/querier_bench_test.go index fe31d0eee..e4eca1c55 100644 --- a/tsdb/querier_bench_test.go +++ b/tsdb/querier_bench_test.go @@ -93,12 +93,14 @@ func benchmarkPostingsForMatchers(b *testing.B, ir IndexReader) { iStar := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*$") i1Star := labels.MustNewMatcher(labels.MatchRegexp, "i", "^1.*$") iStar1 := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*1$") + iStar1Star := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*1.*$") iPlus := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.+$") i1Plus := labels.MustNewMatcher(labels.MatchRegexp, "i", "^1.+$") iEmptyRe := labels.MustNewMatcher(labels.MatchRegexp, "i", "^$") iNotEmpty := labels.MustNewMatcher(labels.MatchNotEqual, "i", "") iNot2 := labels.MustNewMatcher(labels.MatchNotEqual, "n", "2"+postingsBenchSuffix) iNot2Star := labels.MustNewMatcher(labels.MatchNotRegexp, "i", "^2.*$") + iNotStar2Star := labels.MustNewMatcher(labels.MatchNotRegexp, "i", "^.*2.*$") cases := []struct { name string @@ -120,8 +122,10 @@ func benchmarkPostingsForMatchers(b *testing.B, ir IndexReader) { {`n="1",i!="",j="foo"`, []*labels.Matcher{n1, iNotEmpty, jFoo}}, {`n="1",i=~".+",j="foo"`, []*labels.Matcher{n1, iPlus, jFoo}}, {`n="1",i=~"1.+",j="foo"`, []*labels.Matcher{n1, i1Plus, jFoo}}, + {`n="1",i=~".*1.*",j="foo"`, []*labels.Matcher{n1, iStar1Star, jFoo}}, {`n="1",i=~".+",i!="2",j="foo"`, []*labels.Matcher{n1, iPlus, iNot2, jFoo}}, {`n="1",i=~".+",i!~"2.*",j="foo"`, []*labels.Matcher{n1, iPlus, iNot2Star, jFoo}}, + {`n="1",i=~".+",i!~".*2.*",j="foo"`, []*labels.Matcher{n1, iPlus, iNotStar2Star, jFoo}}, } for _, c := range cases {