Optimized label regex matcher with literal prefix and/or suffix (#7453)
* Optimized label regex matcher with literal prefix and/or suffix Signed-off-by: Marco Pracucci <marco@pracucci.com> * Added license Signed-off-by: Marco Pracucci <marco@pracucci.com> * Added more tests cases with newlines Signed-off-by: Marco Pracucci <marco@pracucci.com> * Restored deleted test Signed-off-by: Marco Pracucci <marco@pracucci.com>
This commit is contained in:
parent
d78656c244
commit
cef4dd6fff
|
@ -15,7 +15,6 @@ package labels
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
)
|
||||
|
||||
// MatchType is an enum for label matching types.
|
||||
|
@ -48,7 +47,7 @@ type Matcher struct {
|
|||
Name string
|
||||
Value string
|
||||
|
||||
re *regexp.Regexp
|
||||
re *FastRegexMatcher
|
||||
}
|
||||
|
||||
// NewMatcher returns a matcher object.
|
||||
|
@ -59,7 +58,7 @@ func NewMatcher(t MatchType, n, v string) (*Matcher, error) {
|
|||
Value: v,
|
||||
}
|
||||
if t == MatchRegexp || t == MatchNotRegexp {
|
||||
re, err := regexp.Compile("^(?:" + v + ")$")
|
||||
re, err := NewFastRegexMatcher(v)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -116,5 +115,5 @@ func (m *Matcher) GetRegexString() string {
|
|||
if m.re == nil {
|
||||
return ""
|
||||
}
|
||||
return m.re.String()
|
||||
return m.re.GetRegexString()
|
||||
}
|
||||
|
|
|
@ -0,0 +1,93 @@
|
|||
// Copyright 2020 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package labels
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"regexp/syntax"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type FastRegexMatcher struct {
|
||||
re *regexp.Regexp
|
||||
prefix string
|
||||
suffix string
|
||||
}
|
||||
|
||||
func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) {
|
||||
re, err := regexp.Compile("^(?:" + v + ")$")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
parsed, err := syntax.Parse(v, syntax.Perl)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
m := &FastRegexMatcher{
|
||||
re: re,
|
||||
}
|
||||
|
||||
if parsed.Op == syntax.OpConcat {
|
||||
m.prefix, m.suffix = optimizeConcatRegex(parsed)
|
||||
}
|
||||
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func (m *FastRegexMatcher) MatchString(s string) bool {
|
||||
if m.prefix != "" && !strings.HasPrefix(s, m.prefix) {
|
||||
return false
|
||||
}
|
||||
if m.suffix != "" && !strings.HasSuffix(s, m.suffix) {
|
||||
return false
|
||||
}
|
||||
return m.re.MatchString(s)
|
||||
}
|
||||
|
||||
func (m *FastRegexMatcher) GetRegexString() string {
|
||||
return m.re.String()
|
||||
}
|
||||
|
||||
// optimizeConcatRegex returns literal prefix/suffix text that can be safely
|
||||
// checked against the label value before running the regexp matcher.
|
||||
func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix string) {
|
||||
sub := r.Sub
|
||||
|
||||
// We can safely remove begin and end text matchers respectively
|
||||
// at the beginning and end of the regexp.
|
||||
if len(sub) > 0 && sub[0].Op == syntax.OpBeginText {
|
||||
sub = sub[1:]
|
||||
}
|
||||
if len(sub) > 0 && sub[len(sub)-1].Op == syntax.OpEndText {
|
||||
sub = sub[:len(sub)-1]
|
||||
}
|
||||
|
||||
if len(sub) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// Given Prometheus regex matchers are always anchored to the begin/end
|
||||
// of the text, if the first/last operations are literals, we can safely
|
||||
// treat them as prefix/suffix.
|
||||
if sub[0].Op == syntax.OpLiteral {
|
||||
prefix = string(sub[0].Rune)
|
||||
}
|
||||
if last := len(sub) - 1; sub[last].Op == syntax.OpLiteral {
|
||||
suffix = string(sub[last].Rune)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
|
@ -0,0 +1,79 @@
|
|||
// Copyright 2020 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package labels
|
||||
|
||||
import (
|
||||
"regexp/syntax"
|
||||
"testing"
|
||||
|
||||
"github.com/prometheus/prometheus/util/testutil"
|
||||
)
|
||||
|
||||
func TestNewFastRegexMatcher(t *testing.T) {
|
||||
cases := []struct {
|
||||
regex string
|
||||
value string
|
||||
expected bool
|
||||
}{
|
||||
{regex: "(foo|bar)", value: "foo", expected: true},
|
||||
{regex: "(foo|bar)", value: "foo bar", expected: false},
|
||||
{regex: "(foo|bar)", value: "bar", expected: true},
|
||||
{regex: "foo.*", value: "foo bar", expected: true},
|
||||
{regex: "foo.*", value: "bar foo", expected: false},
|
||||
{regex: ".*foo", value: "foo bar", expected: false},
|
||||
{regex: ".*foo", value: "bar foo", expected: true},
|
||||
{regex: ".*foo", value: "foo", expected: true},
|
||||
{regex: "^.*foo$", value: "foo", expected: true},
|
||||
{regex: "^.+foo$", value: "foo", expected: false},
|
||||
{regex: "^.+foo$", value: "bfoo", expected: true},
|
||||
{regex: ".*", value: "\n", expected: false},
|
||||
{regex: ".*", value: "\nfoo", expected: false},
|
||||
{regex: ".*foo", value: "\nfoo", expected: false},
|
||||
{regex: "foo.*", value: "foo\n", expected: false},
|
||||
{regex: "foo\n.*", value: "foo\n", expected: true},
|
||||
{regex: ".*", value: "foo", expected: true},
|
||||
{regex: "", value: "foo", expected: false},
|
||||
{regex: "", value: "", expected: true},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
m, err := NewFastRegexMatcher(c.regex)
|
||||
testutil.Ok(t, err)
|
||||
testutil.Equals(t, c.expected, m.MatchString(c.value))
|
||||
}
|
||||
}
|
||||
|
||||
func TestOptimizeConcatRegex(t *testing.T) {
|
||||
cases := []struct {
|
||||
regex string
|
||||
prefix string
|
||||
suffix string
|
||||
}{
|
||||
{regex: "foo(hello|bar)", prefix: "foo", suffix: ""},
|
||||
{regex: "foo(hello|bar)world", prefix: "foo", suffix: "world"},
|
||||
{regex: "foo.*", prefix: "foo", suffix: ""},
|
||||
{regex: "foo.*hello.*bar", prefix: "foo", suffix: "bar"},
|
||||
{regex: ".*foo", prefix: "", suffix: "foo"},
|
||||
{regex: "^.*foo$", prefix: "", suffix: "foo"},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
parsed, err := syntax.Parse(c.regex, syntax.Perl)
|
||||
testutil.Ok(t, err)
|
||||
|
||||
prefix, suffix := optimizeConcatRegex(parsed)
|
||||
testutil.Equals(t, c.prefix, prefix)
|
||||
testutil.Equals(t, c.suffix, suffix)
|
||||
}
|
||||
}
|
|
@ -91,6 +91,8 @@ func benchmarkPostingsForMatchers(b *testing.B, ir IndexReader) {
|
|||
jNotFoo := labels.MustNewMatcher(labels.MatchNotEqual, "j", "foo")
|
||||
|
||||
iStar := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*$")
|
||||
i1Star := labels.MustNewMatcher(labels.MatchRegexp, "i", "^1.*$")
|
||||
iStar1 := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*1$")
|
||||
iPlus := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.+$")
|
||||
i1Plus := labels.MustNewMatcher(labels.MatchRegexp, "i", "^1.+$")
|
||||
iEmptyRe := labels.MustNewMatcher(labels.MatchRegexp, "i", "^$")
|
||||
|
@ -107,6 +109,8 @@ func benchmarkPostingsForMatchers(b *testing.B, ir IndexReader) {
|
|||
{`j="foo",n="1"`, []*labels.Matcher{jFoo, n1}},
|
||||
{`n="1",j!="foo"`, []*labels.Matcher{n1, jNotFoo}},
|
||||
{`i=~".*"`, []*labels.Matcher{iStar}},
|
||||
{`i=~"1.*"`, []*labels.Matcher{i1Star}},
|
||||
{`i=~".*1"`, []*labels.Matcher{iStar1}},
|
||||
{`i=~".+"`, []*labels.Matcher{iPlus}},
|
||||
{`i=~""`, []*labels.Matcher{iEmptyRe}},
|
||||
{`i!=""`, []*labels.Matcher{iNotEmpty}},
|
||||
|
|
Loading…
Reference in New Issue