alertmanager/matcher/parse/lexer.go

// Copyright 2023 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package parse

import (
	"fmt"
	"strings"
	"unicode"
	"unicode/utf8"
)

const (
	eof rune = -1
)

func isReserved(r rune) bool {
	return unicode.IsSpace(r) || strings.ContainsRune("{}!=~,\\\"'`", r)
}

// expectedError is returned when the next rune does not match what is expected.
type expectedError struct {
	position
	input    string
	expected string
}

func (e expectedError) Error() string {
	if e.offsetEnd >= len(e.input) {
		return fmt.Sprintf("%d:%d: unexpected end of input, expected one of '%s'",
			e.columnStart,
			e.columnEnd,
			e.expected,
		)
	}
	return fmt.Sprintf("%d:%d: %s: expected one of '%s'",
		e.columnStart,
		e.columnEnd,
		e.input[e.offsetStart:e.offsetEnd],
		e.expected,
	)
}

// invalidInputError is returned when the next rune in the input does not match
// the grammar of Prometheus-like matchers.
type invalidInputError struct {
	position
	input string
}

func (e invalidInputError) Error() string {
	return fmt.Sprintf("%d:%d: %s: invalid input",
		e.columnStart,
		e.columnEnd,
		e.input[e.offsetStart:e.offsetEnd],
	)
}

// unterminatedError is returned when text in quotes does not have a closing quote.
type unterminatedError struct {
	position
	input string
	quote rune
}

func (e unterminatedError) Error() string {
	return fmt.Sprintf("%d:%d: %s: missing end %c",
		e.columnStart,
		e.columnEnd,
		e.input[e.offsetStart:e.offsetEnd],
		e.quote,
	)
}

// lexer scans a sequence of tokens that match the grammar of Prometheus-like
// matchers. A token is emitted for each call to scan() which returns the
// next token in the input or an error if the input does not conform to the
// grammar. A token can be one of a number of kinds and corresponds to a
// subslice of the input. Once the input has been consumed successive calls to
// scan() return a tokenEOF token.
type lexer struct {
	input  string
	err    error
	start  int // The offset of the current token.
	pos    int // The position of the cursor in the input.
	width  int // The width of the last rune.
	column int // The column offset of the current token.
	cols   int // The number of columns (runes) decoded from the input.
}

// Scans the next token in the input or an error if the input does not
// conform to the grammar. Once the input has been consumed successive
// calls scan() return a tokenEOF token.
func (l *lexer) scan() (token, error) {
	t := token{}
	// Do not attempt to emit more tokens if the input is invalid.
	if l.err != nil {
		return t, l.err
	}
	// Iterate over each rune in the input and either emit a token or an error.
	for r := l.next(); r != eof; r = l.next() {
		switch {
		case r == '{':
			t = l.emit(tokenOpenBrace)
			return t, l.err
		case r == '}':
			t = l.emit(tokenCloseBrace)
			return t, l.err
		case r == ',':
			t = l.emit(tokenComma)
			return t, l.err
		case r == '=' || r == '!':
			l.rewind()
			t, l.err = l.scanOperator()
			return t, l.err
		case r == '"':
			l.rewind()
			t, l.err = l.scanQuoted()
			return t, l.err
		case !isReserved(r):
			l.rewind()
			t, l.err = l.scanUnquoted()
			return t, l.err
		case unicode.IsSpace(r):
			l.skip()
		default:
			l.err = invalidInputError{
				position: l.position(),
				input:    l.input,
			}
			return t, l.err
		}
	}
	return t, l.err
}

func (l *lexer) scanOperator() (token, error) {
	// If the first rune is an '!' then it must be followed with either an
	// '=' or '~' to not match a string or regex.
	if l.accept("!") {
		if l.accept("=") {
			return l.emit(tokenNotEquals), nil
		}
		if l.accept("~") {
			return l.emit(tokenNotMatches), nil
		}
		return token{}, expectedError{
			position: l.position(),
			input:    l.input,
			expected: "=~",
		}
	}
	// If the first rune is an '=' then it can be followed with an optional
	// '~' to match a regex.
	if l.accept("=") {
		if l.accept("~") {
			return l.emit(tokenMatches), nil
		}
		return l.emit(tokenEquals), nil
	}
	return token{}, expectedError{
		position: l.position(),
		input:    l.input,
		expected: "!=",
	}
}

func (l *lexer) scanQuoted() (token, error) {
	if err := l.expect("\""); err != nil {
		return token{}, err
	}
	var isEscaped bool
	for r := l.next(); r != eof; r = l.next() {
		if isEscaped {
			isEscaped = false
		} else if r == '\\' {
			isEscaped = true
		} else if r == '"' {
			l.rewind()
			break
		}
	}
	if err := l.expect("\""); err != nil {
		return token{}, unterminatedError{
			position: l.position(),
			input:    l.input,
			quote:    '"',
		}
	}
	return l.emit(tokenQuoted), nil
}

func (l *lexer) scanUnquoted() (token, error) {
	for r := l.next(); r != eof; r = l.next() {
		if isReserved(r) {
			l.rewind()
			break
		}
	}
	return l.emit(tokenUnquoted), nil
}

// peek the next token in the input or an error if the input does not
// conform to the grammar. Once the input has been consumed successive
// calls peek() return a tokenEOF token.
func (l *lexer) peek() (token, error) {
	start := l.start
	pos := l.pos
	width := l.width
	column := l.column
	cols := l.cols
	// Do not reset l.err because we can return it on the next call to scan().
	defer func() {
		l.start = start
		l.pos = pos
		l.width = width
		l.column = column
		l.cols = cols
	}()
	return l.scan()
}

// position returns the position of the last emitted token.
func (l *lexer) position() position {
	return position{
		offsetStart: l.start,
		offsetEnd:   l.pos,
		columnStart: l.column,
		columnEnd:   l.cols,
	}
}

// accept consumes the next if its one of the valid runes.
// It returns true if the next rune was accepted, otherwise false.
func (l *lexer) accept(valid string) bool {
	if strings.ContainsRune(valid, l.next()) {
		return true
	}
	l.rewind()
	return false
}

// expect consumes the next rune if its one of the valid runes.
// It returns nil if the next rune is valid, otherwise an expectedError
// error.
func (l *lexer) expect(valid string) error {
	if strings.ContainsRune(valid, l.next()) {
		return nil
	}
	l.rewind()
	return expectedError{
		position: l.position(),
		input:    l.input,
		expected: valid,
	}
}

// emits returns the scanned input as a token.
func (l *lexer) emit(kind tokenKind) token {
	t := token{
		kind:     kind,
		value:    l.input[l.start:l.pos],
		position: l.position(),
	}
	l.start = l.pos
	l.column = l.cols
	return t
}

// next returns the next rune in the input or eof.
func (l *lexer) next() rune {
	if l.pos >= len(l.input) {
		l.width = 0
		return eof
	}
	r, width := utf8.DecodeRuneInString(l.input[l.pos:])
	l.width = width
	l.pos += width
	l.cols++
	return r
}

// rewind the last rune in the input. It should not be called more than once
// between consecutive calls of next.
func (l *lexer) rewind() {
	l.pos -= l.width
	// When the next rune in the input is eof the width is zero. This check
	// prevents cols from being decremented when the next rune being accepted
	// is instead eof.
	if l.width > 0 {
		l.cols--
	}
}

// skip the scanned input between start and pos.
func (l *lexer) skip() {
	l.start = l.pos
	l.column = l.cols
}