alertmanager/matcher/parse/lexer_test.go

// Copyright 2023 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package parse

import (
	"testing"

	"github.com/stretchr/testify/require"
)

func TestLexer_Scan(t *testing.T) {
	tests := []struct {
		name     string
		input    string
		expected []token
		err      string
	}{{
		name:  "no input",
		input: "",
	}, {
		name:  "open brace",
		input: "{",
		expected: []token{{
			kind:  tokenOpenBrace,
			value: "{",
			position: position{
				offsetStart: 0,
				offsetEnd:   1,
				columnStart: 0,
				columnEnd:   1,
			},
		}},
	}, {
		name:  "open brace with leading space",
		input: " {",
		expected: []token{{
			kind:  tokenOpenBrace,
			value: "{",
			position: position{
				offsetStart: 1,
				offsetEnd:   2,
				columnStart: 1,
				columnEnd:   2,
			},
		}},
	}, {
		name:  "close brace",
		input: "}",
		expected: []token{{
			kind:  tokenCloseBrace,
			value: "}",
			position: position{
				offsetStart: 0,
				offsetEnd:   1,
				columnStart: 0,
				columnEnd:   1,
			},
		}},
	}, {
		name:  "close brace with leading space",
		input: " }",
		expected: []token{{
			kind:  tokenCloseBrace,
			value: "}",
			position: position{
				offsetStart: 1,
				offsetEnd:   2,
				columnStart: 1,
				columnEnd:   2,
			},
		}},
	}, {
		name:  "open and closing braces",
		input: "{}",
		expected: []token{{
			kind:  tokenOpenBrace,
			value: "{",
			position: position{
				offsetStart: 0,
				offsetEnd:   1,
				columnStart: 0,
				columnEnd:   1,
			},
		}, {
			kind:  tokenCloseBrace,
			value: "}",
			position: position{
				offsetStart: 1,
				offsetEnd:   2,
				columnStart: 1,
				columnEnd:   2,
			},
		}},
	}, {
		name:  "open and closing braces with space",
		input: "{ }",
		expected: []token{{
			kind:  tokenOpenBrace,
			value: "{",
			position: position{
				offsetStart: 0,
				offsetEnd:   1,
				columnStart: 0,
				columnEnd:   1,
			},
		}, {
			kind:  tokenCloseBrace,
			value: "}",
			position: position{
				offsetStart: 2,
				offsetEnd:   3,
				columnStart: 2,
				columnEnd:   3,
			},
		}},
	}, {
		name:  "unquoted",
		input: "hello",
		expected: []token{{
			kind:  tokenUnquoted,
			value: "hello",
			position: position{
				offsetStart: 0,
				offsetEnd:   5,
				columnStart: 0,
				columnEnd:   5,
			},
		}},
	}, {
		name:  "unquoted with underscore",
		input: "hello_world",
		expected: []token{{
			kind:  tokenUnquoted,
			value: "hello_world",
			position: position{
				offsetStart: 0,
				offsetEnd:   11,
				columnStart: 0,
				columnEnd:   11,
			},
		}},
	}, {
		name:  "unquoted with colon",
		input: "hello:world",
		expected: []token{{
			kind:  tokenUnquoted,
			value: "hello:world",
			position: position{
				offsetStart: 0,
				offsetEnd:   11,
				columnStart: 0,
				columnEnd:   11,
			},
		}},
	}, {
		name:  "unquoted with numbers",
		input: "hello0123456789",
		expected: []token{{
			kind:  tokenUnquoted,
			value: "hello0123456789",
			position: position{
				offsetStart: 0,
				offsetEnd:   15,
				columnStart: 0,
				columnEnd:   15,
			},
		}},
	}, {
		name:  "unquoted can start with underscore",
		input: "_hello",
		expected: []token{{
			kind:  tokenUnquoted,
			value: "_hello",
			position: position{
				offsetStart: 0,
				offsetEnd:   6,
				columnStart: 0,
				columnEnd:   6,
			},
		}},
	}, {
		name:  "unquoted separated with space",
		input: "hello world",
		expected: []token{{
			kind:  tokenUnquoted,
			value: "hello",
			position: position{
				offsetStart: 0,
				offsetEnd:   5,
				columnStart: 0,
				columnEnd:   5,
			},
		}, {
			kind:  tokenUnquoted,
			value: "world",
			position: position{
				offsetStart: 6,
				offsetEnd:   11,
				columnStart: 6,
				columnEnd:   11,
			},
		}},
	}, {
		name:  "unquoted $",
		input: "$",
		expected: []token{{
			kind:  tokenUnquoted,
			value: "$",
			position: position{
				offsetStart: 0,
				offsetEnd:   1,
				columnStart: 0,
				columnEnd:   1,
			},
		}},
	}, {
		name:  "unquoted emoji",
		input: "🙂",
		expected: []token{{
			kind:  tokenUnquoted,
			value: "🙂",
			position: position{
				offsetStart: 0,
				offsetEnd:   4,
				columnStart: 0,
				columnEnd:   1,
			},
		}},
	}, {
		name:  "unquoted unicode",
		input: "Σ",
		expected: []token{{
			kind:  tokenUnquoted,
			value: "Σ",
			position: position{
				offsetStart: 0,
				offsetEnd:   2,
				columnStart: 0,
				columnEnd:   1,
			},
		}},
	}, {
		name:  "unquoted unicode sentence",
		input: "hello🙂Σ world",
		expected: []token{{
			kind:  tokenUnquoted,
			value: "hello🙂Σ",
			position: position{
				offsetStart: 0,
				offsetEnd:   11,
				columnStart: 0,
				columnEnd:   7,
			},
		}, {
			kind:  tokenUnquoted,
			value: "world",
			position: position{
				offsetStart: 12,
				offsetEnd:   17,
				columnStart: 8,
				columnEnd:   13,
			},
		}},
	}, {
		name:  "unquoted unicode sentence with unicode space",
		input: "hello🙂Σ\u202fworld",
		expected: []token{{
			kind:  tokenUnquoted,
			value: "hello🙂Σ",
			position: position{
				offsetStart: 0,
				offsetEnd:   11,
				columnStart: 0,
				columnEnd:   7,
			},
		}, {
			kind:  tokenUnquoted,
			value: "world",
			position: position{
				offsetStart: 14,
				offsetEnd:   19,
				columnStart: 8,
				columnEnd:   13,
			},
		}},
	}, {
		name:  "quoted",
		input: "\"hello\"",
		expected: []token{{
			kind:  tokenQuoted,
			value: "\"hello\"",
			position: position{
				offsetStart: 0,
				offsetEnd:   7,
				columnStart: 0,
				columnEnd:   7,
			},
		}},
	}, {
		name:  "quoted with unicode",
		input: "\"hello 🙂\"",
		expected: []token{{
			kind:  tokenQuoted,
			value: "\"hello 🙂\"",
			position: position{
				offsetStart: 0,
				offsetEnd:   12,
				columnStart: 0,
				columnEnd:   9,
			},
		}},
	}, {
		name:  "quoted with space",
		input: "\"hello world\"",
		expected: []token{{
			kind:  tokenQuoted,
			value: "\"hello world\"",
			position: position{
				offsetStart: 0,
				offsetEnd:   13,
				columnStart: 0,
				columnEnd:   13,
			},
		}},
	}, {
		name:  "quoted with unicode space",
		input: "\"hello\u202fworld\"",
		expected: []token{{
			kind:  tokenQuoted,
			value: "\"hello\u202fworld\"",
			position: position{
				offsetStart: 0,
				offsetEnd:   15,
				columnStart: 0,
				columnEnd:   13,
			},
		}},
	}, {
		name:  "quoted with newline",
		input: "\"hello\nworld\"",
		expected: []token{{
			kind:  tokenQuoted,
			value: "\"hello\nworld\"",
			position: position{
				offsetStart: 0,
				offsetEnd:   13,
				columnStart: 0,
				columnEnd:   13,
			},
		}},
	}, {
		name:  "quoted with tab",
		input: "\"hello\tworld\"",
		expected: []token{{
			kind:  tokenQuoted,
			value: "\"hello\tworld\"",
			position: position{
				offsetStart: 0,
				offsetEnd:   13,
				columnStart: 0,
				columnEnd:   13,
			},
		}},
	}, {
		name:  "quoted with escaped quotes",
		input: "\"hello \\\"world\\\"\"",
		expected: []token{{
			kind:  tokenQuoted,
			value: "\"hello \\\"world\\\"\"",
			position: position{
				offsetStart: 0,
				offsetEnd:   17,
				columnStart: 0,
				columnEnd:   17,
			},
		}},
	}, {
		name:  "quoted with escaped backslash",
		input: "\"hello world\\\\\"",
		expected: []token{{
			kind:  tokenQuoted,
			value: "\"hello world\\\\\"",
			position: position{
				offsetStart: 0,
				offsetEnd:   15,
				columnStart: 0,
				columnEnd:   15,
			},
		}},
	}, {
		name:  "equals operator",
		input: "=",
		expected: []token{{
			kind:  tokenEquals,
			value: "=",
			position: position{
				offsetStart: 0,
				offsetEnd:   1,
				columnStart: 0,
				columnEnd:   1,
			},
		}},
	}, {
		name:  "not equals operator",
		input: "!=",
		expected: []token{{
			kind:  tokenNotEquals,
			value: "!=",
			position: position{
				offsetStart: 0,
				offsetEnd:   2,
				columnStart: 0,
				columnEnd:   2,
			},
		}},
	}, {
		name:  "matches regex operator",
		input: "=~",
		expected: []token{{
			kind:  tokenMatches,
			value: "=~",
			position: position{
				offsetStart: 0,
				offsetEnd:   2,
				columnStart: 0,
				columnEnd:   2,
			},
		}},
	}, {
		name:  "not matches regex operator",
		input: "!~",
		expected: []token{{
			kind:  tokenNotMatches,
			value: "!~",
			position: position{
				offsetStart: 0,
				offsetEnd:   2,
				columnStart: 0,
				columnEnd:   2,
			},
		}},
	}, {
		name:  "invalid operator",
		input: "!",
		err:   "0:1: unexpected end of input, expected one of '=~'",
	}, {
		name:  "another invalid operator",
		input: "~",
		err:   "0:1: ~: invalid input",
	}, {
		name:  "unexpected ! after unquoted",
		input: "hello!",
		expected: []token{{
			kind:  tokenUnquoted,
			value: "hello",
			position: position{
				offsetStart: 0,
				offsetEnd:   5,
				columnStart: 0,
				columnEnd:   5,
			},
		}},
		err: "5:6: unexpected end of input, expected one of '=~'",
	}, {
		name:  "unexpected ! after operator",
		input: "=!",
		expected: []token{{
			kind:  tokenEquals,
			value: "=",
			position: position{
				offsetStart: 0,
				offsetEnd:   1,
				columnStart: 0,
				columnEnd:   1,
			},
		}},
		err: "1:2: unexpected end of input, expected one of '=~'",
	}, {
		name:  "unexpected !! after operator",
		input: "!=!!",
		expected: []token{{
			kind:  tokenNotEquals,
			value: "!=",
			position: position{
				offsetStart: 0,
				offsetEnd:   2,
				columnStart: 0,
				columnEnd:   2,
			},
		}},
		err: "2:3: !: expected one of '=~'",
	}, {
		name:  "unterminated quoted",
		input: "\"hello",
		err:   "0:6: \"hello: missing end \"",
	}, {
		name:  "unterminated quoted with escaped quote",
		input: "\"hello\\\"",
		err:   "0:8: \"hello\\\": missing end \"",
	}}

	for _, test := range tests {
		t.Run(test.name, func(t *testing.T) {
			l := lexer{input: test.input}
			// scan all expected tokens.
			for i := 0; i < len(test.expected); i++ {
				tok, err := l.scan()
				require.NoError(t, err)
				require.Equal(t, test.expected[i], tok)
			}
			if test.err == "" {
				// Check there are no more tokens.
				tok, err := l.scan()
				require.NoError(t, err)
				require.Equal(t, token{}, tok)
			} else {
				// Check if expected error is returned.
				tok, err := l.scan()
				require.Equal(t, token{}, tok)
				require.EqualError(t, err, test.err)
			}
		})
	}
}

// This test asserts that the lexer does not emit more tokens after an
// error has occurred.
func TestLexer_ScanError(t *testing.T) {
	l := lexer{input: "\"hello"}
	for i := 0; i < 10; i++ {
		tok, err := l.scan()
		require.Equal(t, token{}, tok)
		require.EqualError(t, err, "0:6: \"hello: missing end \"")
	}
}

func TestLexer_Peek(t *testing.T) {
	l := lexer{input: "hello world"}
	expected1 := token{
		kind:  tokenUnquoted,
		value: "hello",
		position: position{
			offsetStart: 0,
			offsetEnd:   5,
			columnStart: 0,
			columnEnd:   5,
		},
	}
	expected2 := token{
		kind:  tokenUnquoted,
		value: "world",
		position: position{
			offsetStart: 6,
			offsetEnd:   11,
			columnStart: 6,
			columnEnd:   11,
		},
	}
	// Check that peek() returns the first token.
	tok, err := l.peek()
	require.NoError(t, err)
	require.Equal(t, expected1, tok)
	// Check that scan() returns the peeked token.
	tok, err = l.scan()
	require.NoError(t, err)
	require.Equal(t, expected1, tok)
	// Check that peek() returns the second token until the next scan().
	for i := 0; i < 10; i++ {
		tok, err = l.peek()
		require.NoError(t, err)
		require.Equal(t, expected2, tok)
	}
	// Check that scan() returns the last token.
	tok, err = l.scan()
	require.NoError(t, err)
	require.Equal(t, expected2, tok)
	// Should not be able to peek() further tokens.
	for i := 0; i < 10; i++ {
		tok, err = l.peek()
		require.NoError(t, err)
		require.Equal(t, token{}, tok)
	}
}

// This test asserts that the lexer does not emit more tokens after an
// error has occurred.
func TestLexer_PeekError(t *testing.T) {
	l := lexer{input: "\"hello"}
	for i := 0; i < 10; i++ {
		tok, err := l.peek()
		require.Equal(t, token{}, tok)
		require.EqualError(t, err, "0:6: \"hello: missing end \"")
	}
}

func TestLexer_Pos(t *testing.T) {
	l := lexer{input: "hello🙂"}
	// The start position should be the zero-value.
	require.Equal(t, position{}, l.position())
	_, err := l.scan()
	require.NoError(t, err)
	// The position should contain the offset and column of the end.
	expected := position{
		offsetStart: 9,
		offsetEnd:   9,
		columnStart: 6,
		columnEnd:   6,
	}
	require.Equal(t, expected, l.position())
	// The position should not change once the input has been consumed.
	tok, err := l.scan()
	require.NoError(t, err)
	require.True(t, tok.isEOF())
	require.Equal(t, expected, l.position())
}