2023-09-05 10:32:58 +00:00
|
|
|
// Copyright 2023 The Prometheus Authors
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package parse
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"strings"
|
|
|
|
"unicode"
|
|
|
|
"unicode/utf8"
|
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
|
|
|
eof rune = -1
|
|
|
|
)
|
|
|
|
|
|
|
|
func isReserved(r rune) bool {
|
2023-10-30 13:56:54 +00:00
|
|
|
return unicode.IsSpace(r) || strings.ContainsRune("{}!=~,\\\"'`", r)
|
2023-09-05 10:32:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// expectedError is returned when the next rune does not match what is expected.
|
|
|
|
type expectedError struct {
|
|
|
|
position
|
|
|
|
input string
|
|
|
|
expected string
|
|
|
|
}
|
|
|
|
|
|
|
|
func (e expectedError) Error() string {
|
|
|
|
if e.offsetEnd >= len(e.input) {
|
|
|
|
return fmt.Sprintf("%d:%d: unexpected end of input, expected one of '%s'",
|
|
|
|
e.columnStart,
|
|
|
|
e.columnEnd,
|
|
|
|
e.expected,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
return fmt.Sprintf("%d:%d: %s: expected one of '%s'",
|
|
|
|
e.columnStart,
|
|
|
|
e.columnEnd,
|
|
|
|
e.input[e.offsetStart:e.offsetEnd],
|
|
|
|
e.expected,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
// invalidInputError is returned when the next rune in the input does not match
|
|
|
|
// the grammar of Prometheus-like matchers.
|
|
|
|
type invalidInputError struct {
|
|
|
|
position
|
|
|
|
input string
|
|
|
|
}
|
|
|
|
|
|
|
|
func (e invalidInputError) Error() string {
|
|
|
|
return fmt.Sprintf("%d:%d: %s: invalid input",
|
|
|
|
e.columnStart,
|
|
|
|
e.columnEnd,
|
|
|
|
e.input[e.offsetStart:e.offsetEnd],
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
// unterminatedError is returned when text in quotes does not have a closing quote.
|
|
|
|
type unterminatedError struct {
|
|
|
|
position
|
|
|
|
input string
|
|
|
|
quote rune
|
|
|
|
}
|
|
|
|
|
|
|
|
func (e unterminatedError) Error() string {
|
|
|
|
return fmt.Sprintf("%d:%d: %s: missing end %c",
|
|
|
|
e.columnStart,
|
|
|
|
e.columnEnd,
|
|
|
|
e.input[e.offsetStart:e.offsetEnd],
|
|
|
|
e.quote,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
// lexer scans a sequence of tokens that match the grammar of Prometheus-like
|
|
|
|
// matchers. A token is emitted for each call to scan() which returns the
|
|
|
|
// next token in the input or an error if the input does not conform to the
|
|
|
|
// grammar. A token can be one of a number of kinds and corresponds to a
|
|
|
|
// subslice of the input. Once the input has been consumed successive calls to
|
|
|
|
// scan() return a tokenEOF token.
|
|
|
|
type lexer struct {
|
|
|
|
input string
|
|
|
|
err error
|
|
|
|
start int // The offset of the current token.
|
|
|
|
pos int // The position of the cursor in the input.
|
|
|
|
width int // The width of the last rune.
|
|
|
|
column int // The column offset of the current token.
|
|
|
|
cols int // The number of columns (runes) decoded from the input.
|
|
|
|
}
|
|
|
|
|
|
|
|
// Scans the next token in the input or an error if the input does not
|
|
|
|
// conform to the grammar. Once the input has been consumed successive
|
|
|
|
// calls scan() return a tokenEOF token.
|
|
|
|
func (l *lexer) scan() (token, error) {
|
|
|
|
t := token{}
|
|
|
|
// Do not attempt to emit more tokens if the input is invalid.
|
|
|
|
if l.err != nil {
|
|
|
|
return t, l.err
|
|
|
|
}
|
|
|
|
// Iterate over each rune in the input and either emit a token or an error.
|
|
|
|
for r := l.next(); r != eof; r = l.next() {
|
|
|
|
switch {
|
|
|
|
case r == '{':
|
|
|
|
t = l.emit(tokenOpenBrace)
|
|
|
|
return t, l.err
|
|
|
|
case r == '}':
|
|
|
|
t = l.emit(tokenCloseBrace)
|
|
|
|
return t, l.err
|
|
|
|
case r == ',':
|
|
|
|
t = l.emit(tokenComma)
|
|
|
|
return t, l.err
|
|
|
|
case r == '=' || r == '!':
|
|
|
|
l.rewind()
|
|
|
|
t, l.err = l.scanOperator()
|
|
|
|
return t, l.err
|
|
|
|
case r == '"':
|
|
|
|
l.rewind()
|
|
|
|
t, l.err = l.scanQuoted()
|
|
|
|
return t, l.err
|
|
|
|
case !isReserved(r):
|
|
|
|
l.rewind()
|
|
|
|
t, l.err = l.scanUnquoted()
|
|
|
|
return t, l.err
|
|
|
|
case unicode.IsSpace(r):
|
|
|
|
l.skip()
|
|
|
|
default:
|
|
|
|
l.err = invalidInputError{
|
|
|
|
position: l.position(),
|
|
|
|
input: l.input,
|
|
|
|
}
|
|
|
|
return t, l.err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return t, l.err
|
|
|
|
}
|
|
|
|
|
|
|
|
func (l *lexer) scanOperator() (token, error) {
|
|
|
|
// If the first rune is an '!' then it must be followed with either an
|
|
|
|
// '=' or '~' to not match a string or regex.
|
|
|
|
if l.accept("!") {
|
|
|
|
if l.accept("=") {
|
|
|
|
return l.emit(tokenNotEquals), nil
|
|
|
|
}
|
|
|
|
if l.accept("~") {
|
|
|
|
return l.emit(tokenNotMatches), nil
|
|
|
|
}
|
|
|
|
return token{}, expectedError{
|
|
|
|
position: l.position(),
|
|
|
|
input: l.input,
|
|
|
|
expected: "=~",
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// If the first rune is an '=' then it can be followed with an optional
|
|
|
|
// '~' to match a regex.
|
|
|
|
if l.accept("=") {
|
|
|
|
if l.accept("~") {
|
|
|
|
return l.emit(tokenMatches), nil
|
|
|
|
}
|
|
|
|
return l.emit(tokenEquals), nil
|
|
|
|
}
|
|
|
|
return token{}, expectedError{
|
|
|
|
position: l.position(),
|
|
|
|
input: l.input,
|
|
|
|
expected: "!=",
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (l *lexer) scanQuoted() (token, error) {
|
|
|
|
if err := l.expect("\""); err != nil {
|
|
|
|
return token{}, err
|
|
|
|
}
|
|
|
|
var isEscaped bool
|
|
|
|
for r := l.next(); r != eof; r = l.next() {
|
|
|
|
if isEscaped {
|
|
|
|
isEscaped = false
|
|
|
|
} else if r == '\\' {
|
|
|
|
isEscaped = true
|
|
|
|
} else if r == '"' {
|
|
|
|
l.rewind()
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if err := l.expect("\""); err != nil {
|
|
|
|
return token{}, unterminatedError{
|
|
|
|
position: l.position(),
|
|
|
|
input: l.input,
|
|
|
|
quote: '"',
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return l.emit(tokenQuoted), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (l *lexer) scanUnquoted() (token, error) {
|
|
|
|
for r := l.next(); r != eof; r = l.next() {
|
|
|
|
if isReserved(r) {
|
|
|
|
l.rewind()
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return l.emit(tokenUnquoted), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// peek the next token in the input or an error if the input does not
|
|
|
|
// conform to the grammar. Once the input has been consumed successive
|
|
|
|
// calls peek() return a tokenEOF token.
|
|
|
|
func (l *lexer) peek() (token, error) {
|
|
|
|
start := l.start
|
|
|
|
pos := l.pos
|
|
|
|
width := l.width
|
|
|
|
column := l.column
|
|
|
|
cols := l.cols
|
|
|
|
// Do not reset l.err because we can return it on the next call to scan().
|
|
|
|
defer func() {
|
|
|
|
l.start = start
|
|
|
|
l.pos = pos
|
|
|
|
l.width = width
|
|
|
|
l.column = column
|
|
|
|
l.cols = cols
|
|
|
|
}()
|
|
|
|
return l.scan()
|
|
|
|
}
|
|
|
|
|
|
|
|
// position returns the position of the last emitted token.
|
|
|
|
func (l *lexer) position() position {
|
|
|
|
return position{
|
|
|
|
offsetStart: l.start,
|
|
|
|
offsetEnd: l.pos,
|
|
|
|
columnStart: l.column,
|
|
|
|
columnEnd: l.cols,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// accept consumes the next if its one of the valid runes.
|
|
|
|
// It returns true if the next rune was accepted, otherwise false.
|
|
|
|
func (l *lexer) accept(valid string) bool {
|
|
|
|
if strings.ContainsRune(valid, l.next()) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
l.rewind()
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
// expect consumes the next rune if its one of the valid runes.
|
2024-03-21 11:26:46 +00:00
|
|
|
// It returns nil if the next rune is valid, otherwise an expectedError
|
2023-09-05 10:32:58 +00:00
|
|
|
// error.
|
|
|
|
func (l *lexer) expect(valid string) error {
|
|
|
|
if strings.ContainsRune(valid, l.next()) {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
l.rewind()
|
|
|
|
return expectedError{
|
|
|
|
position: l.position(),
|
|
|
|
input: l.input,
|
|
|
|
expected: valid,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// emits returns the scanned input as a token.
|
|
|
|
func (l *lexer) emit(kind tokenKind) token {
|
|
|
|
t := token{
|
|
|
|
kind: kind,
|
|
|
|
value: l.input[l.start:l.pos],
|
|
|
|
position: l.position(),
|
|
|
|
}
|
|
|
|
l.start = l.pos
|
|
|
|
l.column = l.cols
|
|
|
|
return t
|
|
|
|
}
|
|
|
|
|
|
|
|
// next returns the next rune in the input or eof.
|
|
|
|
func (l *lexer) next() rune {
|
|
|
|
if l.pos >= len(l.input) {
|
|
|
|
l.width = 0
|
|
|
|
return eof
|
|
|
|
}
|
|
|
|
r, width := utf8.DecodeRuneInString(l.input[l.pos:])
|
|
|
|
l.width = width
|
|
|
|
l.pos += width
|
|
|
|
l.cols++
|
|
|
|
return r
|
|
|
|
}
|
|
|
|
|
|
|
|
// rewind the last rune in the input. It should not be called more than once
|
|
|
|
// between consecutive calls of next.
|
|
|
|
func (l *lexer) rewind() {
|
|
|
|
l.pos -= l.width
|
|
|
|
// When the next rune in the input is eof the width is zero. This check
|
|
|
|
// prevents cols from being decremented when the next rune being accepted
|
|
|
|
// is instead eof.
|
|
|
|
if l.width > 0 {
|
|
|
|
l.cols--
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// skip the scanned input between start and pos.
|
|
|
|
func (l *lexer) skip() {
|
|
|
|
l.start = l.pos
|
|
|
|
l.column = l.cols
|
|
|
|
}
|