Opmizing Group Regex (#12375)

Signed-off-by: Alan Protasio <alanprot@gmail.com>
This commit is contained in:
Alan Protasio 2023-05-30 04:49:22 -07:00 committed by GitHub
parent a16b876a05
commit 73078bf738
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 33 additions and 1 deletions

View File

@ -190,7 +190,14 @@ func findSetMatches(pattern string) []string {
}
escaped := false
sets := []*strings.Builder{{}}
for i := 4; i < len(pattern)-2; i++ {
init := 4
end := len(pattern) - 2
// If the regex is wrapped in a group we can remove the first and last parentheses
if pattern[init] == '(' && pattern[end-1] == ')' {
init++
end--
}
for i := init; i < end; i++ {
if escaped {
switch {
case isRegexMetaCharacter(pattern[i]):

View File

@ -113,6 +113,7 @@ func benchmarkPostingsForMatchers(b *testing.B, ir IndexReader) {
jXplus := labels.MustNewMatcher(labels.MatchRegexp, "j", "X.+")
iCharSet := labels.MustNewMatcher(labels.MatchRegexp, "i", "1[0-9]")
iAlternate := labels.MustNewMatcher(labels.MatchRegexp, "i", "(1|2|3|4|5|6|20|55)")
iNotAlternate := labels.MustNewMatcher(labels.MatchNotRegexp, "i", "(1|2|3|4|5|6|20|55)")
iXYZ := labels.MustNewMatcher(labels.MatchRegexp, "i", "X|Y|Z")
iNotXYZ := labels.MustNewMatcher(labels.MatchNotRegexp, "i", "X|Y|Z")
cases := []struct {
@ -132,6 +133,7 @@ func benchmarkPostingsForMatchers(b *testing.B, ir IndexReader) {
{`j=~"XXX|YYY"`, []*labels.Matcher{jXXXYYY}},
{`j=~"X.+"`, []*labels.Matcher{jXplus}},
{`i=~"(1|2|3|4|5|6|20|55)"`, []*labels.Matcher{iAlternate}},
{`i!~"(1|2|3|4|5|6|20|55)"`, []*labels.Matcher{iNotAlternate}},
{`i=~"X|Y|Z"`, []*labels.Matcher{iXYZ}},
{`i!~"X|Y|Z"`, []*labels.Matcher{iNotXYZ}},
{`i=~".*"`, []*labels.Matcher{iStar}},
@ -161,6 +163,8 @@ func benchmarkPostingsForMatchers(b *testing.B, ir IndexReader) {
for _, c := range cases {
b.Run(c.name, func(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := PostingsForMatchers(ir, c.matchers...)
require.NoError(b, err)

View File

@ -2051,6 +2051,12 @@ func TestPostingsForMatchers(t *testing.T) {
labels.FromStrings("n", "2"),
},
},
{
matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchNotRegexp, "n", "(1|2.5)")},
exp: []labels.Labels{
labels.FromStrings("n", "2"),
},
},
{
matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, "n", "1"), labels.MustNewMatcher(labels.MatchNotRegexp, "i", "^a$")},
exp: []labels.Labels{
@ -2112,6 +2118,13 @@ func TestPostingsForMatchers(t *testing.T) {
labels.FromStrings("n", "1", "i", "b"),
},
},
{
matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, "i", "(a|b)")},
exp: []labels.Labels{
labels.FromStrings("n", "1", "i", "a"),
labels.FromStrings("n", "1", "i", "b"),
},
},
{
matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, "n", "x1|2")},
exp: []labels.Labels{
@ -2134,6 +2147,14 @@ func TestPostingsForMatchers(t *testing.T) {
labels.FromStrings("n", "2.5"),
},
},
{
matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, "i", "(c||d)")},
exp: []labels.Labels{
labels.FromStrings("n", "1"),
labels.FromStrings("n", "2"),
labels.FromStrings("n", "2.5"),
},
},
}
ir, err := h.Index()