Swap rules lexer for much faster one.

This swaps github.com/kivikakk/golex for github.com/cznic/golex.

The old lexer would have taken 3.5 years to load a set of 5000 test rules
(quadratic time complexity for input length), whereas this one takes only 32ms.
Furthermore, since the new lexer is embedded differently, this gets rid of the
global parser variables and makes the rule loader fully reentrant without a
lock.
This commit is contained in:
Julius Volz 2013-07-11 18:38:44 +02:00
parent 2d538bf55a
commit 64b0ade171
7 changed files with 1450 additions and 808 deletions

View File

@ -17,7 +17,8 @@ parser.y.go: parser.y
go tool yacc -o parser.y.go -v "" parser.y
lexer.l.go: parser.y.go lexer.l
golex lexer.l
# This is golex from https://github.com/cznic/golex.
golex -o="lexer.l.go" lexer.l
clean:
rm lexer.l.go parser.y.go

View File

@ -1,25 +1,53 @@
/* Copyright 2013 Prometheus Team
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http: *www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. */
// Copyright 2013 Prometheus Team
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
%{
package rules
import (
"fmt"
"strconv"
"strings"
clientmodel "github.com/prometheus/client_golang/model"
)
// Lex is called by the parser generated by "go tool yacc" to obtain each
// token. The method is opened before the matching rules block and closed at
// the end of the file.
func (lexer *RulesLexer) Lex(lval *yySymType) int {
// Internal lexer states.
const (
S_INITIAL = iota
S_COMMENTS
)
// We simulate multiple start symbols for closely-related grammars via dummy tokens. See
// http://www.gnu.org/software/bison/manual/html_node/Multiple-start_002dsymbols.html
// Reason: we want to be able to parse lists of named rules as well as single expressions.
if lexer.startToken != 0 {
startToken := lexer.startToken
lexer.startToken = 0
return startToken
}
c := lexer.current
currentState := 0
if lexer.empty {
c, lexer.empty = lexer.getChar(), false
}
%}
D [0-9]
@ -28,44 +56,50 @@ U [smhdwy]
%x S_COMMENTS
%yyc c
%yyn c = lexer.getChar()
%yyt currentState
%%
. { yypos++; REJECT }
\n { yyline++; yypos = 1; REJECT }
lexer.buf = lexer.buf[:0] // The code before the first rule executed before every scan cycle (rule #0 / state 0 action)
"/*" { BEGIN(S_COMMENTS) }
<S_COMMENTS>"*/" { BEGIN(0) }
<S_COMMENTS>. { /* ignore chars within multi-line comments */ }
"/*" currentState = S_COMMENTS
<S_COMMENTS>"*/" currentState = S_INITIAL
<S_COMMENTS>.|\n /* ignore chars within multi-line comments */
\/\/[^\r\n]*\n { /* gobble up one-line comments */ }
\/\/[^\r\n]*\n /* gobble up one-line comments */
ALERT|alert { return ALERT }
IF|if { return IF }
FOR|for { return FOR }
WITH|with { return WITH }
ALERT|alert return ALERT
IF|if return IF
FOR|for return FOR
WITH|with return WITH
PERMANENT|permanent { return PERMANENT }
BY|by { return GROUP_OP }
AVG|SUM|MAX|MIN|COUNT { yylval.str = yytext; return AGGR_OP }
avg|sum|max|min|count { yylval.str = strings.ToUpper(yytext); return AGGR_OP }
\<|>|AND|OR|and|or { yylval.str = strings.ToUpper(yytext); return CMP_OP }
==|!=|>=|<= { yylval.str = yytext; return CMP_OP }
[+\-] { yylval.str = yytext; return ADDITIVE_OP }
[*/%] { yylval.str = yytext; return MULT_OP }
PERMANENT|permanent return PERMANENT
BY|by return GROUP_OP
AVG|SUM|MAX|MIN|COUNT lval.str = lexer.token(); return AGGR_OP
avg|sum|max|min|count lval.str = strings.ToUpper(lexer.token()); return AGGR_OP
\<|>|AND|OR|and|or lval.str = strings.ToUpper(lexer.token()); return CMP_OP
==|!=|>=|<= lval.str = lexer.token(); return CMP_OP
[+\-] lval.str = lexer.token(); return ADDITIVE_OP
[*/%] lval.str = lexer.token(); return MULT_OP
{D}+{U} { yylval.str = yytext; return DURATION }
{L}({L}|{D})* { yylval.str = yytext; return IDENTIFIER }
{D}+{U} lval.str = lexer.token(); return DURATION
{L}({L}|{D})* lval.str = lexer.token(); return IDENTIFIER
\-?{D}+(\.{D}*)? { num, err := strconv.ParseFloat(yytext, 64);
if (err != nil && err.(*strconv.NumError).Err == strconv.ErrSyntax) {
panic("Invalid float")
}
yylval.num = clientmodel.SampleValue(num)
return NUMBER }
\-?{D}+(\.{D}*)? num, err := strconv.ParseFloat(lexer.token(), 64);
if (err != nil && err.(*strconv.NumError).Err == strconv.ErrSyntax) {
panic("Invalid float")
}
lval.num = clientmodel.SampleValue(num)
return NUMBER
\"(\\.|[^\\"])*\" { yylval.str = yytext[1:len(yytext) - 1]; return STRING }
\'(\\.|[^\\'])*\' { yylval.str = yytext[1:len(yytext) - 1]; return STRING }
\"(\\.|[^\\"])*\" lval.str = lexer.token()[1:len(lexer.token()) - 1]; return STRING
\'(\\.|[^\\'])*\' lval.str = lexer.token()[1:len(lexer.token()) - 1]; return STRING
[{}\[\]()=,] { return int(yytext[0]) }
. { /* don't print any remaining chars (whitespace) */ }
\n { /* don't print any remaining chars (whitespace) */ }
[{}\[\]()=,] return int(lexer.buf[0])
[\t\n\r ] /* gobble up any whitespace */
%%
lexer.empty = true
return int(c)
}

File diff suppressed because it is too large Load Diff

View File

@ -14,69 +14,87 @@
package rules
import (
"bufio"
"errors"
"fmt"
"github.com/prometheus/prometheus/rules/ast"
"io"
"log"
"os"
"strings"
"sync"
)
// GoLex sadly needs these global variables for storing temporary token/parsing information.
var (
yylval *yySymType // For storing extra token information, like the contents of a string.
yyline int // Line number within the current file or buffer.
yypos int // Character position within the current line.
parseMutex sync.Mutex // Mutex protecting the parsing-related global state defined above.
"github.com/prometheus/prometheus/rules/ast"
)
type RulesLexer struct {
errors []string // Errors encountered during parsing.
startToken int // Dummy token to simulate multiple start symbols (see below).
parsedRules []Rule // Parsed full rules.
parsedExpr ast.Node // Parsed single expression.
}
// Errors encountered during parsing.
errors []string
// Dummy token to simulate multiple start symbols (see below).
startToken int
// Parsed full rules.
parsedRules []Rule
// Parsed single expression.
parsedExpr ast.Node
func (lexer *RulesLexer) Lex(lval *yySymType) int {
yylval = lval
// Current character.
current byte
// Current token buffer.
buf []byte
// Input text.
src *bufio.Reader
// Whether we have a current char.
empty bool
// We simulate multiple start symbols for closely-related grammars via dummy tokens. See
// http://www.gnu.org/software/bison/manual/html_node/Multiple-start_002dsymbols.html
// Reason: we want to be able to parse lists of named rules as well as single expressions.
if lexer.startToken != 0 {
startToken := lexer.startToken
lexer.startToken = 0
return startToken
}
tokenType := yylex()
return tokenType
// Current input line.
line int
// Current character position within the current input line.
pos int
}
func (lexer *RulesLexer) Error(errorStr string) {
err := fmt.Sprintf("Error parsing rules at line %v, char %v: %v", yyline, yypos, errorStr)
err := fmt.Sprintf("Error parsing rules at line %v, char %v: %v", lexer.line, lexer.pos, errorStr)
lexer.errors = append(lexer.errors, err)
}
func LoadFromReader(rulesReader io.Reader, singleExpr bool) (interface{}, error) {
parseMutex.Lock()
defer parseMutex.Unlock()
func (lexer *RulesLexer) getChar() byte {
if lexer.current != 0 {
lexer.buf = append(lexer.buf, lexer.current)
}
lexer.current = 0
if b, err := lexer.src.ReadByte(); err == nil {
if b == '\n' {
lexer.line++
lexer.pos = 0
} else {
lexer.pos++
}
lexer.current = b
} else if err != io.EOF {
log.Fatal(err)
}
return lexer.current
}
yyin = rulesReader
yypos = 1
yyline = 1
yydata = ""
yytext = ""
func (lexer *RulesLexer) token() string {
return string(lexer.buf)
}
func newRulesLexer(src io.Reader, singleExpr bool) *RulesLexer {
lexer := &RulesLexer{
startToken: START_RULES,
src: bufio.NewReader(src),
pos: 1,
line: 1,
}
if singleExpr {
lexer.startToken = START_EXPRESSION
}
lexer.getChar()
return lexer
}
func LoadFromReader(rulesReader io.Reader, singleExpr bool) (interface{}, error) {
lexer := newRulesLexer(rulesReader, singleExpr)
ret := yyParse(lexer)
if ret != 0 && len(lexer.errors) == 0 {
lexer.Error("Unknown parser error")

View File

@ -14,10 +14,10 @@
%{
package rules
import (
clientmodel "github.com/prometheus/client_golang/model"
import (
clientmodel "github.com/prometheus/client_golang/model"
import "github.com/prometheus/prometheus/rules/ast"
"github.com/prometheus/prometheus/rules/ast"
)
%}

View File

@ -1,22 +1,25 @@
//line parser.y:15
package rules
//line parser.y:15
package rules
import __yyfmt__ "fmt"
//line parser.y:15
import clientmodel "github.com/prometheus/client_golang/model"
import "github.com/prometheus/prometheus/rules/ast"
import (
clientmodel "github.com/prometheus/client_golang/model"
//line parser.y:21
"github.com/prometheus/prometheus/rules/ast"
)
//line parser.y:24
type yySymType struct {
yys int
num clientmodel.SampleValue
str string
ruleNode ast.Node
ruleNodeSlice []ast.Node
boolean bool
labelNameSlice clientmodel.LabelNames
labelSet clientmodel.LabelSet
yys int
num clientmodel.SampleValue
str string
ruleNode ast.Node
ruleNodeSlice []ast.Node
boolean bool
labelNameSlice clientmodel.LabelNames
labelSet clientmodel.LabelSet
}
const START_RULES = 57346
@ -61,7 +64,8 @@ const yyEofCode = 1
const yyErrCode = 2
const yyMaxDepth = 200
//line parser.y:188
//line parser.y:191
//line yacctab:1
var yyExca = []int{
@ -394,208 +398,134 @@ yydefault:
switch yynt {
case 5:
//line parser.y:66
{
yylex.(*RulesLexer).parsedExpr = yyS[yypt-0].ruleNode
}
//line parser.y:69
{ yylex.(*RulesLexer).parsedExpr = yyS[yypt-0].ruleNode }
case 6:
//line parser.y:70
//line parser.y:73
{
rule, err := CreateRecordingRule(yyS[yypt-3].str, yyS[yypt-2].labelSet, yyS[yypt-0].ruleNode, yyS[yypt-4].boolean)
if err != nil {
yylex.Error(err.Error())
return 1
}
yylex.(*RulesLexer).parsedRules = append(yylex.(*RulesLexer).parsedRules, rule)
}
rule, err := CreateRecordingRule(yyS[yypt-3].str, yyS[yypt-2].labelSet, yyS[yypt-0].ruleNode, yyS[yypt-4].boolean)
if err != nil { yylex.Error(err.Error()); return 1 }
yylex.(*RulesLexer).parsedRules = append(yylex.(*RulesLexer).parsedRules, rule)
}
case 7:
//line parser.y:76
//line parser.y:79
{
rule, err := CreateAlertingRule(yyS[yypt-5].str, yyS[yypt-3].ruleNode, yyS[yypt-2].str, yyS[yypt-0].labelSet)
if err != nil {
yylex.Error(err.Error())
return 1
}
yylex.(*RulesLexer).parsedRules = append(yylex.(*RulesLexer).parsedRules, rule)
}
rule, err := CreateAlertingRule(yyS[yypt-5].str, yyS[yypt-3].ruleNode, yyS[yypt-2].str, yyS[yypt-0].labelSet)
if err != nil { yylex.Error(err.Error()); return 1 }
yylex.(*RulesLexer).parsedRules = append(yylex.(*RulesLexer).parsedRules, rule)
}
case 8:
//line parser.y:84
{
yyVAL.str = "0s"
}
//line parser.y:87
{ yyVAL.str = "0s" }
case 9:
//line parser.y:86
{
yyVAL.str = yyS[yypt-0].str
}
//line parser.y:89
{ yyVAL.str = yyS[yypt-0].str }
case 10:
//line parser.y:90
{
yyVAL.boolean = false
}
//line parser.y:93
{ yyVAL.boolean = false }
case 11:
//line parser.y:92
{
yyVAL.boolean = true
}
//line parser.y:95
{ yyVAL.boolean = true }
case 12:
//line parser.y:96
{
yyVAL.labelSet = clientmodel.LabelSet{}
}
//line parser.y:99
{ yyVAL.labelSet = clientmodel.LabelSet{} }
case 13:
//line parser.y:98
{
yyVAL.labelSet = yyS[yypt-1].labelSet
}
//line parser.y:101
{ yyVAL.labelSet = yyS[yypt-1].labelSet }
case 14:
//line parser.y:100
{
yyVAL.labelSet = clientmodel.LabelSet{}
}
case 15:
//line parser.y:103
{
yyVAL.labelSet = yyS[yypt-0].labelSet
}
{ yyVAL.labelSet = clientmodel.LabelSet{} }
case 15:
//line parser.y:106
{ yyVAL.labelSet = yyS[yypt-0].labelSet }
case 16:
//line parser.y:105
{
for k, v := range yyS[yypt-0].labelSet {
yyVAL.labelSet[k] = v
}
}
//line parser.y:108
{ for k, v := range yyS[yypt-0].labelSet { yyVAL.labelSet[k] = v } }
case 17:
//line parser.y:109
{
yyVAL.labelSet = clientmodel.LabelSet{clientmodel.LabelName(yyS[yypt-2].str): clientmodel.LabelValue(yyS[yypt-0].str)}
}
//line parser.y:112
{ yyVAL.labelSet = clientmodel.LabelSet{ clientmodel.LabelName(yyS[yypt-2].str): clientmodel.LabelValue(yyS[yypt-0].str) } }
case 18:
//line parser.y:114
{
yyVAL.ruleNode = yyS[yypt-1].ruleNode
}
//line parser.y:117
{ yyVAL.ruleNode = yyS[yypt-1].ruleNode }
case 19:
//line parser.y:116
{
yyS[yypt-0].labelSet[clientmodel.MetricNameLabel] = clientmodel.LabelValue(yyS[yypt-1].str)
yyVAL.ruleNode = ast.NewVectorLiteral(yyS[yypt-0].labelSet)
}
//line parser.y:119
{ yyS[yypt-0].labelSet[clientmodel.MetricNameLabel] = clientmodel.LabelValue(yyS[yypt-1].str); yyVAL.ruleNode = ast.NewVectorLiteral(yyS[yypt-0].labelSet) }
case 20:
//line parser.y:118
//line parser.y:121
{
var err error
yyVAL.ruleNode, err = NewFunctionCall(yyS[yypt-3].str, yyS[yypt-1].ruleNodeSlice)
if err != nil {
yylex.Error(err.Error())
return 1
}
}
var err error
yyVAL.ruleNode, err = NewFunctionCall(yyS[yypt-3].str, yyS[yypt-1].ruleNodeSlice)
if err != nil { yylex.Error(err.Error()); return 1 }
}
case 21:
//line parser.y:124
//line parser.y:127
{
var err error
yyVAL.ruleNode, err = NewFunctionCall(yyS[yypt-2].str, []ast.Node{})
if err != nil {
yylex.Error(err.Error())
return 1
}
}
var err error
yyVAL.ruleNode, err = NewFunctionCall(yyS[yypt-2].str, []ast.Node{})
if err != nil { yylex.Error(err.Error()); return 1 }
}
case 22:
//line parser.y:130
//line parser.y:133
{
var err error
yyVAL.ruleNode, err = NewMatrix(yyS[yypt-3].ruleNode, yyS[yypt-1].str)
if err != nil {
yylex.Error(err.Error())
return 1
}
}
var err error
yyVAL.ruleNode, err = NewMatrix(yyS[yypt-3].ruleNode, yyS[yypt-1].str)
if err != nil { yylex.Error(err.Error()); return 1 }
}
case 23:
//line parser.y:136
//line parser.y:139
{
var err error
yyVAL.ruleNode, err = NewVectorAggregation(yyS[yypt-4].str, yyS[yypt-2].ruleNode, yyS[yypt-0].labelNameSlice)
if err != nil {
yylex.Error(err.Error())
return 1
}
}
var err error
yyVAL.ruleNode, err = NewVectorAggregation(yyS[yypt-4].str, yyS[yypt-2].ruleNode, yyS[yypt-0].labelNameSlice)
if err != nil { yylex.Error(err.Error()); return 1 }
}
case 24:
//line parser.y:144
//line parser.y:147
{
var err error
yyVAL.ruleNode, err = NewArithExpr(yyS[yypt-1].str, yyS[yypt-2].ruleNode, yyS[yypt-0].ruleNode)
if err != nil {
yylex.Error(err.Error())
return 1
}
}
var err error
yyVAL.ruleNode, err = NewArithExpr(yyS[yypt-1].str, yyS[yypt-2].ruleNode, yyS[yypt-0].ruleNode)
if err != nil { yylex.Error(err.Error()); return 1 }
}
case 25:
//line parser.y:150
//line parser.y:153
{
var err error
yyVAL.ruleNode, err = NewArithExpr(yyS[yypt-1].str, yyS[yypt-2].ruleNode, yyS[yypt-0].ruleNode)
if err != nil {
yylex.Error(err.Error())
return 1
}
}
var err error
yyVAL.ruleNode, err = NewArithExpr(yyS[yypt-1].str, yyS[yypt-2].ruleNode, yyS[yypt-0].ruleNode)
if err != nil { yylex.Error(err.Error()); return 1 }
}
case 26:
//line parser.y:156
//line parser.y:159
{
var err error
yyVAL.ruleNode, err = NewArithExpr(yyS[yypt-1].str, yyS[yypt-2].ruleNode, yyS[yypt-0].ruleNode)
if err != nil {
yylex.Error(err.Error())
return 1
}
}
var err error
yyVAL.ruleNode, err = NewArithExpr(yyS[yypt-1].str, yyS[yypt-2].ruleNode, yyS[yypt-0].ruleNode)
if err != nil { yylex.Error(err.Error()); return 1 }
}
case 27:
//line parser.y:162
{
yyVAL.ruleNode = ast.NewScalarLiteral(yyS[yypt-0].num)
}
//line parser.y:165
{ yyVAL.ruleNode = ast.NewScalarLiteral(yyS[yypt-0].num)}
case 28:
//line parser.y:166
{
yyVAL.labelNameSlice = clientmodel.LabelNames{}
}
//line parser.y:169
{ yyVAL.labelNameSlice = clientmodel.LabelNames{} }
case 29:
//line parser.y:168
{
yyVAL.labelNameSlice = yyS[yypt-1].labelNameSlice
}
//line parser.y:171
{ yyVAL.labelNameSlice = yyS[yypt-1].labelNameSlice }
case 30:
//line parser.y:172
{
yyVAL.labelNameSlice = clientmodel.LabelNames{clientmodel.LabelName(yyS[yypt-0].str)}
}
//line parser.y:175
{ yyVAL.labelNameSlice = clientmodel.LabelNames{clientmodel.LabelName(yyS[yypt-0].str)} }
case 31:
//line parser.y:174
{
yyVAL.labelNameSlice = append(yyVAL.labelNameSlice, clientmodel.LabelName(yyS[yypt-0].str))
}
//line parser.y:177
{ yyVAL.labelNameSlice = append(yyVAL.labelNameSlice, clientmodel.LabelName(yyS[yypt-0].str)) }
case 32:
//line parser.y:178
{
yyVAL.ruleNodeSlice = []ast.Node{yyS[yypt-0].ruleNode}
}
//line parser.y:181
{ yyVAL.ruleNodeSlice = []ast.Node{yyS[yypt-0].ruleNode} }
case 33:
//line parser.y:180
{
yyVAL.ruleNodeSlice = append(yyVAL.ruleNodeSlice, yyS[yypt-0].ruleNode)
}
//line parser.y:183
{ yyVAL.ruleNodeSlice = append(yyVAL.ruleNodeSlice, yyS[yypt-0].ruleNode) }
case 34:
//line parser.y:184
{
yyVAL.ruleNode = yyS[yypt-0].ruleNode
}
//line parser.y:187
{ yyVAL.ruleNode = yyS[yypt-0].ruleNode }
case 35:
//line parser.y:186
{
yyVAL.ruleNode = ast.NewStringLiteral(yyS[yypt-0].str)
}
//line parser.y:189
{ yyVAL.ruleNode = ast.NewStringLiteral(yyS[yypt-0].str) }
}
goto yystack /* stack new state and value */
}

View File

@ -123,6 +123,17 @@ func TestExpressions(t *testing.T) {
},
fullRanges: 0,
intervalRanges: 8,
}, {
expr: `
// Test comment.
SUM(http_requests) BY /* comments shouldn't
have any effect */ (job) // another comment`,
output: []string{
`http_requests{job="api-server"} => 1000 @[%v]`,
`http_requests{job="app-server"} => 2600 @[%v]`,
},
fullRanges: 0,
intervalRanges: 8,
}, {
expr: `COUNT(http_requests) BY (job)`,
output: []string{
@ -448,7 +459,7 @@ var ruleTests = []struct {
{
inputFile: "syntax_error.rules",
shouldFail: true,
errContains: "Error parsing rules at line 3",
errContains: "Error parsing rules at line 5",
},
{
inputFile: "non_vector.rules",