alertmanager/pkg/labels/parse.go

// Copyright 2018 Prometheus Team
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package labels

import (
	"regexp"
	"strings"
	"unicode/utf8"

	"github.com/pkg/errors"
)

var (
	// '=~' has to come before '=' because otherwise only the '='
	// will be consumed, and the '~' will be part of the 3rd token.
	re      = regexp.MustCompile(`^\s*([a-zA-Z_:][a-zA-Z0-9_:]*)\s*(=~|=|!=|!~)\s*((?s).*?)\s*$`)
	typeMap = map[string]MatchType{
		"=":  MatchEqual,
		"!=": MatchNotEqual,
		"=~": MatchRegexp,
		"!~": MatchNotRegexp,
	}
)

// ParseMatchers parses a comma-separated list of Matchers. A leading '{' and/or
// a trailing '}' is optional and will be trimmed before further
// parsing. Individual Matchers are separated by commas outside of quoted parts
// of the input string. Those commas may be surrounded by whitespace. Parts of the
// string inside unescaped double quotes ('"…"') are considered quoted (and
// commas don't act as separators there). If double quotes are escaped with a
// single backslash ('\"'), they are ignored for the purpose of identifying
// quoted parts of the input string. If the input string, after trimming the
// optional trailing '}', ends with a comma, followed by optional whitespace,
// this comma and whitespace will be trimmed.
//
// Examples for valid input strings:
//   {foo = "bar", dings != "bums", }
//   foo=bar,dings!=bums
//   foo=bar, dings!=bums
//   {quote="She said: \"Hi, ladies! That's gender-neutral…\""}
//   statuscode=~"5.."
//
// See ParseMatcher for details on how an individual Matcher is parsed.
func ParseMatchers(s string) ([]*Matcher, error) {
	matchers := []*Matcher{}
	s = strings.TrimPrefix(s, "{")
	s = strings.TrimSuffix(s, "}")

	var (
		insideQuotes bool
		escaped      bool
		token        strings.Builder
		tokens       []string
	)
	for _, r := range s {
		switch r {
		case ',':
			if !insideQuotes {
				tokens = append(tokens, token.String())
				token.Reset()
				continue
			}
		case '"':
			if !escaped {
				insideQuotes = !insideQuotes
			} else {
				escaped = false
			}
		case '\\':
			escaped = !escaped
		default:
			escaped = false
		}
		token.WriteRune(r)
	}
	if s := strings.TrimSpace(token.String()); s != "" {
		tokens = append(tokens, s)
	}
	for _, token := range tokens {
		m, err := ParseMatcher(token)
		if err != nil {
			return nil, err
		}
		matchers = append(matchers, m)
	}

	return matchers, nil
}

// ParseMatcher parses a matcher with a syntax inspired by PromQL and
// OpenMetrics. This syntax is convenient to describe filters and selectors in
// UIs and config files. To support the interactive nature of the use cases, the
// parser is in various aspects fairly tolerant.
//
// The syntax of a matcher consists of three tokens: (1) A valid Prometheus
// label name. (2) One of '=', '!=', '=~', or '!~', with the same meaning as
// known from PromQL selectors. (3) A UTF-8 string, which may be enclosed in
// double quotes. Before or after each token, there may be any amount of
// whitespace, which will be discarded. The 3rd token may be the empty
// string. Within the 3rd token, OpenMetrics escaping rules apply: '\"' for a
// double-quote, '\n' for a line feed, '\\' for a literal backslash. Unescaped
// '"' must not occur inside the 3rd token (only as the 1st or last
// character). However, literal line feed characters are tolerated, as are
// single '\' characters not followed by '\', 'n', or '"'. They act as a literal
// backslash in that case.
func ParseMatcher(s string) (_ *Matcher, err error) {
	ms := re.FindStringSubmatch(s)
	if len(ms) == 0 {
		return nil, errors.Errorf("bad matcher format: %s", s)
	}

	var (
		rawValue            = ms[3]
		value               strings.Builder
		escaped             bool
		expectTrailingQuote bool
	)

	if strings.HasPrefix(rawValue, "\"") {
		rawValue = strings.TrimPrefix(rawValue, "\"")
		expectTrailingQuote = true
	}

	if !utf8.ValidString(rawValue) {
		return nil, errors.Errorf("matcher value not valid UTF-8: %s", ms[3])
	}

	// Unescape the rawValue:
	for i, r := range rawValue {
		if escaped {
			escaped = false
			switch r {
			case 'n':
				value.WriteByte('\n')
			case '"', '\\':
				value.WriteRune(r)
			default:
				// This was a spurious escape, so treat the '\' as literal.
				value.WriteByte('\\')
				value.WriteRune(r)
			}
			continue
		}
		switch r {
		case '\\':
			if i < len(rawValue)-1 {
				escaped = true
				continue
			}
			// '\' encountered as last byte. Treat it as literal.
			value.WriteByte('\\')
		case '"':
			if !expectTrailingQuote || i < len(rawValue)-1 {
				return nil, errors.Errorf("matcher value contains unescaped double quote: %s", ms[3])
			}
			expectTrailingQuote = false
		default:
			value.WriteRune(r)
		}
	}

	if expectTrailingQuote {
		return nil, errors.Errorf("matcher value contains unescaped double quote: %s", ms[3])
	}

	return NewMatcher(typeMap[ms[2]], ms[1], value.String())
}