Optimize queries using regex matchers for set lookups (#602)
* Original version of the set optimization Signed-off-by: naivewong <867245430@qq.com> * simple set matcher Signed-off-by: naivewong <867245430@qq.com> * simple set matcher Signed-off-by: naivewong <867245430@qq.com> * update Signed-off-by: naivewong <867245430@qq.com> * update Signed-off-by: naivewong <867245430@qq.com> * add benchmark Signed-off-by: naivewong <867245430@qq.com> * update Signed-off-by: naivewong <867245430@qq.com> * update Signed-off-by: naivewong <867245430@qq.com> * update benchmark Signed-off-by: naivewong <867245430@qq.com> * update Signed-off-by: naivewong <867245430@qq.com> * update benchmark Signed-off-by: naivewong <867245430@qq.com> * update benchmark Signed-off-by: naivewong <867245430@qq.com> * update benchmark Signed-off-by: naivewong <867245430@qq.com> * update Signed-off-by: naivewong <867245430@qq.com> * update Signed-off-by: naivewong <867245430@qq.com> * update Signed-off-by: naivewong <867245430@qq.com> * update Signed-off-by: naivewong <867245430@qq.com> * use genSeries from #467 Signed-off-by: naivewong <867245430@qq.com> * update Signed-off-by: naivewong <867245430@qq.com>
This commit is contained in:
parent
562e93e8e6
commit
13c80a5979
|
@ -21,6 +21,7 @@ import (
|
|||
"math/rand"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/go-kit/kit/log"
|
||||
|
@ -184,6 +185,11 @@ func createBlock(tb testing.TB, dir string, series []Series) string {
|
|||
return filepath.Join(dir, ulid.String())
|
||||
}
|
||||
|
||||
const (
|
||||
defaultLabelName = "labelName"
|
||||
defaultLabelValue = "labelValue"
|
||||
)
|
||||
|
||||
// genSeries generates series with a given number of labels and values.
|
||||
func genSeries(totalSeries, labelCount int, mint, maxt int64) []Series {
|
||||
if totalSeries == 0 || labelCount == 0 {
|
||||
|
@ -193,8 +199,9 @@ func genSeries(totalSeries, labelCount int, mint, maxt int64) []Series {
|
|||
series := make([]Series, totalSeries)
|
||||
for i := 0; i < totalSeries; i++ {
|
||||
lbls := make(map[string]string, labelCount)
|
||||
for len(lbls) < labelCount {
|
||||
lbls[randString()] = randString()
|
||||
lbls[defaultLabelName] = strconv.Itoa(i)
|
||||
for j := 1; len(lbls) < labelCount; j++ {
|
||||
lbls[defaultLabelName+strconv.Itoa(j)] = defaultLabelValue + strconv.Itoa(j)
|
||||
}
|
||||
samples := make([]tsdbutil.Sample, 0, maxt-mint+1)
|
||||
for t := mint; t <= maxt; t++ {
|
||||
|
@ -224,31 +231,3 @@ func populateSeries(lbls []map[string]string, mint, maxt int64) []Series {
|
|||
}
|
||||
return series
|
||||
}
|
||||
|
||||
const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
const (
|
||||
letterIdxBits = 6 // 6 bits to represent a letter index
|
||||
letterIdxMask = 1<<letterIdxBits - 1 // All 1-bits, as many as letterIdxBits
|
||||
letterIdxMax = 63 / letterIdxBits // # of letter indices fitting in 63 bits
|
||||
)
|
||||
|
||||
// randString generates random string.
|
||||
func randString() string {
|
||||
maxLength := int32(50)
|
||||
length := rand.Int31n(maxLength)
|
||||
b := make([]byte, length+1)
|
||||
// A rand.Int63() generates 63 random bits, enough for letterIdxMax characters!
|
||||
for i, cache, remain := length, rand.Int63(), letterIdxMax; i >= 0; {
|
||||
if remain == 0 {
|
||||
cache, remain = rand.Int63(), letterIdxMax
|
||||
}
|
||||
if idx := int(cache & letterIdxMask); idx < len(letterBytes) {
|
||||
b[i] = letterBytes[idx]
|
||||
i--
|
||||
}
|
||||
cache >>= letterIdxBits
|
||||
remain--
|
||||
}
|
||||
|
||||
return string(b)
|
||||
}
|
||||
|
|
|
@ -63,14 +63,15 @@ func NewEqualMatcher(name, value string) Matcher {
|
|||
return &EqualMatcher{name: name, value: value}
|
||||
}
|
||||
|
||||
type regexpMatcher struct {
|
||||
type RegexpMatcher struct {
|
||||
name string
|
||||
re *regexp.Regexp
|
||||
}
|
||||
|
||||
func (m regexpMatcher) Name() string { return m.name }
|
||||
func (m regexpMatcher) Matches(v string) bool { return m.re.MatchString(v) }
|
||||
func (m regexpMatcher) String() string { return fmt.Sprintf("%s=~%q", m.name, m.re.String()) }
|
||||
func (m RegexpMatcher) Name() string { return m.name }
|
||||
func (m RegexpMatcher) Matches(v string) bool { return m.re.MatchString(v) }
|
||||
func (m RegexpMatcher) String() string { return fmt.Sprintf("%s=~%q", m.name, m.re.String()) }
|
||||
func (m RegexpMatcher) Value() string { return m.re.String() }
|
||||
|
||||
// NewRegexpMatcher returns a new matcher verifying that a value matches
|
||||
// the regular expression pattern.
|
||||
|
@ -79,7 +80,7 @@ func NewRegexpMatcher(name, pattern string) (Matcher, error) {
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return ®expMatcher{name: name, re: re}, nil
|
||||
return &RegexpMatcher{name: name, re: re}, nil
|
||||
}
|
||||
|
||||
// NewMustRegexpMatcher returns a new matcher verifying that a value matches
|
||||
|
@ -90,7 +91,7 @@ func NewMustRegexpMatcher(name, pattern string) Matcher {
|
|||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return ®expMatcher{name: name, re: re}
|
||||
return &RegexpMatcher{name: name, re: re}
|
||||
|
||||
}
|
||||
|
||||
|
|
77
querier.go
77
querier.go
|
@ -17,6 +17,7 @@ import (
|
|||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
"github.com/prometheus/tsdb/chunkenc"
|
||||
|
@ -266,6 +267,62 @@ func (q *blockQuerier) Close() error {
|
|||
return merr.Err()
|
||||
}
|
||||
|
||||
// Bitmap used by func isRegexMetaCharacter to check whether a character needs to be escaped.
|
||||
var regexMetaCharacterBytes [16]byte
|
||||
|
||||
// isRegexMetaCharacter reports whether byte b needs to be escaped.
|
||||
func isRegexMetaCharacter(b byte) bool {
|
||||
return b < utf8.RuneSelf && regexMetaCharacterBytes[b%16]&(1<<(b/16)) != 0
|
||||
}
|
||||
|
||||
func init() {
|
||||
for _, b := range []byte(`.+*?()|[]{}^$`) {
|
||||
regexMetaCharacterBytes[b%16] |= 1 << (b / 16)
|
||||
}
|
||||
}
|
||||
|
||||
func findSetMatches(pattern string) []string {
|
||||
// Return empty matches if the wrapper from Prometheus is missing.
|
||||
if len(pattern) < 6 || pattern[:4] != "^(?:" || pattern[len(pattern)-2:] != ")$" {
|
||||
return nil
|
||||
}
|
||||
escaped := false
|
||||
sets := []*strings.Builder{&strings.Builder{}}
|
||||
for i := 4; i < len(pattern)-2; i++ {
|
||||
if escaped {
|
||||
switch {
|
||||
case isRegexMetaCharacter(pattern[i]):
|
||||
sets[len(sets)-1].WriteByte(pattern[i])
|
||||
case pattern[i] == '\\':
|
||||
sets[len(sets)-1].WriteByte('\\')
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
escaped = false
|
||||
} else {
|
||||
switch {
|
||||
case isRegexMetaCharacter(pattern[i]):
|
||||
if pattern[i] == '|' {
|
||||
sets = append(sets, &strings.Builder{})
|
||||
} else {
|
||||
return nil
|
||||
}
|
||||
case pattern[i] == '\\':
|
||||
escaped = true
|
||||
default:
|
||||
sets[len(sets)-1].WriteByte(pattern[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
matches := make([]string, 0, len(sets))
|
||||
for _, s := range sets {
|
||||
if s.Len() > 0 {
|
||||
matches = append(matches, s.String())
|
||||
}
|
||||
}
|
||||
return matches
|
||||
}
|
||||
|
||||
// PostingsForMatchers assembles a single postings iterator against the index reader
|
||||
// based on the given matchers.
|
||||
func PostingsForMatchers(ix IndexReader, ms ...labels.Matcher) (index.Postings, error) {
|
||||
|
@ -346,6 +403,14 @@ func postingsForMatcher(ix IndexReader, m labels.Matcher) (index.Postings, error
|
|||
return ix.Postings(em.Name(), em.Value())
|
||||
}
|
||||
|
||||
// Fast-path for set matching.
|
||||
if em, ok := m.(*labels.RegexpMatcher); ok {
|
||||
setMatches := findSetMatches(em.Value())
|
||||
if len(setMatches) > 0 {
|
||||
return postingsForSetMatcher(ix, em.Name(), setMatches)
|
||||
}
|
||||
}
|
||||
|
||||
tpls, err := ix.LabelValues(m.Name())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -411,6 +476,18 @@ func inversePostingsForMatcher(ix IndexReader, m labels.Matcher) (index.Postings
|
|||
return index.Merge(rit...), nil
|
||||
}
|
||||
|
||||
func postingsForSetMatcher(ix IndexReader, name string, matches []string) (index.Postings, error) {
|
||||
var its []index.Postings
|
||||
for _, match := range matches {
|
||||
if it, err := ix.Postings(name, match); err == nil {
|
||||
its = append(its, it)
|
||||
} else {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return index.Merge(its...), nil
|
||||
}
|
||||
|
||||
func mergeStrings(a, b []string) []string {
|
||||
maxl := len(a)
|
||||
if len(b) > len(a) {
|
||||
|
|
231
querier_test.go
231
querier_test.go
|
@ -1691,6 +1691,192 @@ func BenchmarkQuerySeek(b *testing.B) {
|
|||
}
|
||||
}
|
||||
|
||||
// Refer to https://github.com/prometheus/prometheus/issues/2651.
|
||||
func BenchmarkSetMatcher(b *testing.B) {
|
||||
cases := []struct {
|
||||
numBlocks int
|
||||
numSeries int
|
||||
numSamplesPerSeriesPerBlock int
|
||||
cardinality int
|
||||
pattern string
|
||||
}{
|
||||
// The first three cases are to find out whether the set
|
||||
// matcher is always faster than regex matcher.
|
||||
{
|
||||
numBlocks: 1,
|
||||
numSeries: 1,
|
||||
numSamplesPerSeriesPerBlock: 10,
|
||||
cardinality: 100,
|
||||
pattern: "^(?:1|2|3|4|5|6|7|8|9|10)$",
|
||||
},
|
||||
{
|
||||
numBlocks: 1,
|
||||
numSeries: 15,
|
||||
numSamplesPerSeriesPerBlock: 10,
|
||||
cardinality: 100,
|
||||
pattern: "^(?:1|2|3|4|5|6|7|8|9|10)$",
|
||||
},
|
||||
{
|
||||
numBlocks: 1,
|
||||
numSeries: 15,
|
||||
numSamplesPerSeriesPerBlock: 10,
|
||||
cardinality: 100,
|
||||
pattern: "^(?:1|2|3)$",
|
||||
},
|
||||
// Big data sizes benchmarks.
|
||||
{
|
||||
numBlocks: 20,
|
||||
numSeries: 1000,
|
||||
numSamplesPerSeriesPerBlock: 10,
|
||||
cardinality: 100,
|
||||
pattern: "^(?:1|2|3)$",
|
||||
},
|
||||
{
|
||||
numBlocks: 20,
|
||||
numSeries: 1000,
|
||||
numSamplesPerSeriesPerBlock: 10,
|
||||
cardinality: 100,
|
||||
pattern: "^(?:1|2|3|4|5|6|7|8|9|10)$",
|
||||
},
|
||||
// Increase cardinality.
|
||||
{
|
||||
numBlocks: 1,
|
||||
numSeries: 100000,
|
||||
numSamplesPerSeriesPerBlock: 10,
|
||||
cardinality: 100000,
|
||||
pattern: "^(?:1|2|3|4|5|6|7|8|9|10)$",
|
||||
},
|
||||
{
|
||||
numBlocks: 1,
|
||||
numSeries: 500000,
|
||||
numSamplesPerSeriesPerBlock: 10,
|
||||
cardinality: 500000,
|
||||
pattern: "^(?:1|2|3|4|5|6|7|8|9|10)$",
|
||||
},
|
||||
{
|
||||
numBlocks: 10,
|
||||
numSeries: 500000,
|
||||
numSamplesPerSeriesPerBlock: 10,
|
||||
cardinality: 500000,
|
||||
pattern: "^(?:1|2|3|4|5|6|7|8|9|10)$",
|
||||
},
|
||||
{
|
||||
numBlocks: 1,
|
||||
numSeries: 1000000,
|
||||
numSamplesPerSeriesPerBlock: 10,
|
||||
cardinality: 1000000,
|
||||
pattern: "^(?:1|2|3|4|5|6|7|8|9|10)$",
|
||||
},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
dir, err := ioutil.TempDir("", "bench_postings_for_matchers")
|
||||
testutil.Ok(b, err)
|
||||
defer func() {
|
||||
testutil.Ok(b, os.RemoveAll(dir))
|
||||
}()
|
||||
|
||||
var (
|
||||
blocks []*Block
|
||||
prefilledLabels []map[string]string
|
||||
generatedSeries []Series
|
||||
)
|
||||
for i := int64(0); i < int64(c.numBlocks); i++ {
|
||||
mint := i * int64(c.numSamplesPerSeriesPerBlock)
|
||||
maxt := mint + int64(c.numSamplesPerSeriesPerBlock) - 1
|
||||
if len(prefilledLabels) == 0 {
|
||||
generatedSeries = genSeries(c.numSeries, 10, mint, maxt)
|
||||
for _, s := range generatedSeries {
|
||||
prefilledLabels = append(prefilledLabels, s.Labels().Map())
|
||||
}
|
||||
} else {
|
||||
generatedSeries = populateSeries(prefilledLabels, mint, maxt)
|
||||
}
|
||||
block, err := OpenBlock(nil, createBlock(b, dir, generatedSeries), nil)
|
||||
testutil.Ok(b, err)
|
||||
blocks = append(blocks, block)
|
||||
defer block.Close()
|
||||
}
|
||||
|
||||
que := &querier{
|
||||
blocks: make([]Querier, 0, len(blocks)),
|
||||
}
|
||||
for _, blk := range blocks {
|
||||
q, err := NewBlockQuerier(blk, math.MinInt64, math.MaxInt64)
|
||||
testutil.Ok(b, err)
|
||||
que.blocks = append(que.blocks, q)
|
||||
}
|
||||
defer que.Close()
|
||||
|
||||
benchMsg := fmt.Sprintf("nSeries=%d,nBlocks=%d,cardinality=%d,pattern=\"%s\"", c.numSeries, c.numBlocks, c.cardinality, c.pattern)
|
||||
b.Run(benchMsg, func(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
for n := 0; n < b.N; n++ {
|
||||
_, err := que.Select(labels.NewMustRegexpMatcher("test", c.pattern))
|
||||
testutil.Ok(b, err)
|
||||
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Refer to https://github.com/prometheus/prometheus/issues/2651.
|
||||
func TestFindSetMatches(t *testing.T) {
|
||||
cases := []struct {
|
||||
pattern string
|
||||
exp []string
|
||||
}{
|
||||
// Simple sets.
|
||||
{
|
||||
pattern: "^(?:foo|bar|baz)$",
|
||||
exp: []string{
|
||||
"foo",
|
||||
"bar",
|
||||
"baz",
|
||||
},
|
||||
},
|
||||
// Simple sets containing escaped characters.
|
||||
{
|
||||
pattern: "^(?:fo\\.o|bar\\?|\\^baz)$",
|
||||
exp: []string{
|
||||
"fo.o",
|
||||
"bar?",
|
||||
"^baz",
|
||||
},
|
||||
},
|
||||
// Simple sets containing special characters without escaping.
|
||||
{
|
||||
pattern: "^(?:fo.o|bar?|^baz)$",
|
||||
exp: nil,
|
||||
},
|
||||
// Missing wrapper.
|
||||
{
|
||||
pattern: "foo|bar|baz",
|
||||
exp: nil,
|
||||
},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
matches := findSetMatches(c.pattern)
|
||||
if len(c.exp) == 0 {
|
||||
if len(matches) != 0 {
|
||||
t.Errorf("Evaluating %s, unexpected result %v", c.pattern, matches)
|
||||
}
|
||||
} else {
|
||||
if len(matches) != len(c.exp) {
|
||||
t.Errorf("Evaluating %s, length of result not equal to exp", c.pattern)
|
||||
} else {
|
||||
for i := 0; i < len(c.exp); i++ {
|
||||
if c.exp[i] != matches[i] {
|
||||
t.Errorf("Evaluating %s, unexpected result %s", c.pattern, matches[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestPostingsForMatchers(t *testing.T) {
|
||||
h, err := NewHead(nil, nil, nil, 1000)
|
||||
testutil.Ok(t, err)
|
||||
|
@ -1703,6 +1889,7 @@ func TestPostingsForMatchers(t *testing.T) {
|
|||
app.Add(labels.FromStrings("n", "1", "i", "a"), 0, 0)
|
||||
app.Add(labels.FromStrings("n", "1", "i", "b"), 0, 0)
|
||||
app.Add(labels.FromStrings("n", "2"), 0, 0)
|
||||
app.Add(labels.FromStrings("n", "2.5"), 0, 0)
|
||||
testutil.Ok(t, app.Commit())
|
||||
|
||||
cases := []struct {
|
||||
|
@ -1735,6 +1922,7 @@ func TestPostingsForMatchers(t *testing.T) {
|
|||
labels.FromStrings("n", "1", "i", "a"),
|
||||
labels.FromStrings("n", "1", "i", "b"),
|
||||
labels.FromStrings("n", "2"),
|
||||
labels.FromStrings("n", "2.5"),
|
||||
},
|
||||
},
|
||||
// Not equals.
|
||||
|
@ -1742,6 +1930,7 @@ func TestPostingsForMatchers(t *testing.T) {
|
|||
matchers: []labels.Matcher{labels.Not(labels.NewEqualMatcher("n", "1"))},
|
||||
exp: []labels.Labels{
|
||||
labels.FromStrings("n", "2"),
|
||||
labels.FromStrings("n", "2.5"),
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -1796,6 +1985,7 @@ func TestPostingsForMatchers(t *testing.T) {
|
|||
exp: []labels.Labels{
|
||||
labels.FromStrings("n", "1"),
|
||||
labels.FromStrings("n", "2"),
|
||||
labels.FromStrings("n", "2.5"),
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -1824,6 +2014,7 @@ func TestPostingsForMatchers(t *testing.T) {
|
|||
matchers: []labels.Matcher{labels.Not(labels.NewMustRegexpMatcher("n", "^1$"))},
|
||||
exp: []labels.Labels{
|
||||
labels.FromStrings("n", "2"),
|
||||
labels.FromStrings("n", "2.5"),
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -1869,6 +2060,46 @@ func TestPostingsForMatchers(t *testing.T) {
|
|||
labels.FromStrings("n", "1", "i", "a"),
|
||||
},
|
||||
},
|
||||
// Set optimization for Regex.
|
||||
// Refer to https://github.com/prometheus/prometheus/issues/2651.
|
||||
{
|
||||
matchers: []labels.Matcher{labels.NewMustRegexpMatcher("n", "^(?:1|2)$")},
|
||||
exp: []labels.Labels{
|
||||
labels.FromStrings("n", "1"),
|
||||
labels.FromStrings("n", "1", "i", "a"),
|
||||
labels.FromStrings("n", "1", "i", "b"),
|
||||
labels.FromStrings("n", "2"),
|
||||
},
|
||||
},
|
||||
{
|
||||
matchers: []labels.Matcher{labels.NewMustRegexpMatcher("i", "^(?:a|b)$")},
|
||||
exp: []labels.Labels{
|
||||
labels.FromStrings("n", "1", "i", "a"),
|
||||
labels.FromStrings("n", "1", "i", "b"),
|
||||
},
|
||||
},
|
||||
{
|
||||
matchers: []labels.Matcher{labels.NewMustRegexpMatcher("n", "^(?:x1|2)$")},
|
||||
exp: []labels.Labels{
|
||||
labels.FromStrings("n", "2"),
|
||||
},
|
||||
},
|
||||
{
|
||||
matchers: []labels.Matcher{labels.NewMustRegexpMatcher("n", "^(?:2|2\\.5)$")},
|
||||
exp: []labels.Labels{
|
||||
labels.FromStrings("n", "2"),
|
||||
labels.FromStrings("n", "2.5"),
|
||||
},
|
||||
},
|
||||
// Empty value.
|
||||
{
|
||||
matchers: []labels.Matcher{labels.NewMustRegexpMatcher("i", "^(?:c||d)$")},
|
||||
exp: []labels.Labels{
|
||||
labels.FromStrings("n", "1"),
|
||||
labels.FromStrings("n", "2"),
|
||||
labels.FromStrings("n", "2.5"),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
ir, err := h.Index()
|
||||
|
|
Loading…
Reference in New Issue