From cef4dd6fff06198c61007211be460fb83cd708c1 Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Fri, 26 Jun 2020 11:49:09 +0200 Subject: [PATCH] Optimized label regex matcher with literal prefix and/or suffix (#7453) * Optimized label regex matcher with literal prefix and/or suffix Signed-off-by: Marco Pracucci * Added license Signed-off-by: Marco Pracucci * Added more tests cases with newlines Signed-off-by: Marco Pracucci * Restored deleted test Signed-off-by: Marco Pracucci --- pkg/labels/matcher.go | 7 ++- pkg/labels/regexp.go | 93 ++++++++++++++++++++++++++++++++++++++ pkg/labels/regexp_test.go | 79 ++++++++++++++++++++++++++++++++ tsdb/querier_bench_test.go | 4 ++ 4 files changed, 179 insertions(+), 4 deletions(-) create mode 100644 pkg/labels/regexp.go create mode 100644 pkg/labels/regexp_test.go diff --git a/pkg/labels/matcher.go b/pkg/labels/matcher.go index df8dfae69..88d463233 100644 --- a/pkg/labels/matcher.go +++ b/pkg/labels/matcher.go @@ -15,7 +15,6 @@ package labels import ( "fmt" - "regexp" ) // MatchType is an enum for label matching types. @@ -48,7 +47,7 @@ type Matcher struct { Name string Value string - re *regexp.Regexp + re *FastRegexMatcher } // NewMatcher returns a matcher object. @@ -59,7 +58,7 @@ func NewMatcher(t MatchType, n, v string) (*Matcher, error) { Value: v, } if t == MatchRegexp || t == MatchNotRegexp { - re, err := regexp.Compile("^(?:" + v + ")$") + re, err := NewFastRegexMatcher(v) if err != nil { return nil, err } @@ -116,5 +115,5 @@ func (m *Matcher) GetRegexString() string { if m.re == nil { return "" } - return m.re.String() + return m.re.GetRegexString() } diff --git a/pkg/labels/regexp.go b/pkg/labels/regexp.go new file mode 100644 index 000000000..39e75b0e9 --- /dev/null +++ b/pkg/labels/regexp.go @@ -0,0 +1,93 @@ +// Copyright 2020 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package labels + +import ( + "regexp" + "regexp/syntax" + "strings" +) + +type FastRegexMatcher struct { + re *regexp.Regexp + prefix string + suffix string +} + +func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) { + re, err := regexp.Compile("^(?:" + v + ")$") + if err != nil { + return nil, err + } + + parsed, err := syntax.Parse(v, syntax.Perl) + if err != nil { + return nil, err + } + + m := &FastRegexMatcher{ + re: re, + } + + if parsed.Op == syntax.OpConcat { + m.prefix, m.suffix = optimizeConcatRegex(parsed) + } + + return m, nil +} + +func (m *FastRegexMatcher) MatchString(s string) bool { + if m.prefix != "" && !strings.HasPrefix(s, m.prefix) { + return false + } + if m.suffix != "" && !strings.HasSuffix(s, m.suffix) { + return false + } + return m.re.MatchString(s) +} + +func (m *FastRegexMatcher) GetRegexString() string { + return m.re.String() +} + +// optimizeConcatRegex returns literal prefix/suffix text that can be safely +// checked against the label value before running the regexp matcher. +func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix string) { + sub := r.Sub + + // We can safely remove begin and end text matchers respectively + // at the beginning and end of the regexp. + if len(sub) > 0 && sub[0].Op == syntax.OpBeginText { + sub = sub[1:] + } + if len(sub) > 0 && sub[len(sub)-1].Op == syntax.OpEndText { + sub = sub[:len(sub)-1] + } + + if len(sub) == 0 { + return + } + + // Given Prometheus regex matchers are always anchored to the begin/end + // of the text, if the first/last operations are literals, we can safely + // treat them as prefix/suffix. + if sub[0].Op == syntax.OpLiteral { + prefix = string(sub[0].Rune) + } + if last := len(sub) - 1; sub[last].Op == syntax.OpLiteral { + suffix = string(sub[last].Rune) + } + + return +} diff --git a/pkg/labels/regexp_test.go b/pkg/labels/regexp_test.go new file mode 100644 index 000000000..0b38d44d2 --- /dev/null +++ b/pkg/labels/regexp_test.go @@ -0,0 +1,79 @@ +// Copyright 2020 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package labels + +import ( + "regexp/syntax" + "testing" + + "github.com/prometheus/prometheus/util/testutil" +) + +func TestNewFastRegexMatcher(t *testing.T) { + cases := []struct { + regex string + value string + expected bool + }{ + {regex: "(foo|bar)", value: "foo", expected: true}, + {regex: "(foo|bar)", value: "foo bar", expected: false}, + {regex: "(foo|bar)", value: "bar", expected: true}, + {regex: "foo.*", value: "foo bar", expected: true}, + {regex: "foo.*", value: "bar foo", expected: false}, + {regex: ".*foo", value: "foo bar", expected: false}, + {regex: ".*foo", value: "bar foo", expected: true}, + {regex: ".*foo", value: "foo", expected: true}, + {regex: "^.*foo$", value: "foo", expected: true}, + {regex: "^.+foo$", value: "foo", expected: false}, + {regex: "^.+foo$", value: "bfoo", expected: true}, + {regex: ".*", value: "\n", expected: false}, + {regex: ".*", value: "\nfoo", expected: false}, + {regex: ".*foo", value: "\nfoo", expected: false}, + {regex: "foo.*", value: "foo\n", expected: false}, + {regex: "foo\n.*", value: "foo\n", expected: true}, + {regex: ".*", value: "foo", expected: true}, + {regex: "", value: "foo", expected: false}, + {regex: "", value: "", expected: true}, + } + + for _, c := range cases { + m, err := NewFastRegexMatcher(c.regex) + testutil.Ok(t, err) + testutil.Equals(t, c.expected, m.MatchString(c.value)) + } +} + +func TestOptimizeConcatRegex(t *testing.T) { + cases := []struct { + regex string + prefix string + suffix string + }{ + {regex: "foo(hello|bar)", prefix: "foo", suffix: ""}, + {regex: "foo(hello|bar)world", prefix: "foo", suffix: "world"}, + {regex: "foo.*", prefix: "foo", suffix: ""}, + {regex: "foo.*hello.*bar", prefix: "foo", suffix: "bar"}, + {regex: ".*foo", prefix: "", suffix: "foo"}, + {regex: "^.*foo$", prefix: "", suffix: "foo"}, + } + + for _, c := range cases { + parsed, err := syntax.Parse(c.regex, syntax.Perl) + testutil.Ok(t, err) + + prefix, suffix := optimizeConcatRegex(parsed) + testutil.Equals(t, c.prefix, prefix) + testutil.Equals(t, c.suffix, suffix) + } +} diff --git a/tsdb/querier_bench_test.go b/tsdb/querier_bench_test.go index 1f6755190..fe31d0eee 100644 --- a/tsdb/querier_bench_test.go +++ b/tsdb/querier_bench_test.go @@ -91,6 +91,8 @@ func benchmarkPostingsForMatchers(b *testing.B, ir IndexReader) { jNotFoo := labels.MustNewMatcher(labels.MatchNotEqual, "j", "foo") iStar := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*$") + i1Star := labels.MustNewMatcher(labels.MatchRegexp, "i", "^1.*$") + iStar1 := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*1$") iPlus := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.+$") i1Plus := labels.MustNewMatcher(labels.MatchRegexp, "i", "^1.+$") iEmptyRe := labels.MustNewMatcher(labels.MatchRegexp, "i", "^$") @@ -107,6 +109,8 @@ func benchmarkPostingsForMatchers(b *testing.B, ir IndexReader) { {`j="foo",n="1"`, []*labels.Matcher{jFoo, n1}}, {`n="1",j!="foo"`, []*labels.Matcher{n1, jNotFoo}}, {`i=~".*"`, []*labels.Matcher{iStar}}, + {`i=~"1.*"`, []*labels.Matcher{i1Star}}, + {`i=~".*1"`, []*labels.Matcher{iStar1}}, {`i=~".+"`, []*labels.Matcher{iPlus}}, {`i=~""`, []*labels.Matcher{iEmptyRe}}, {`i!=""`, []*labels.Matcher{iNotEmpty}},