From 7e86cd1ef4ecb45c6e5e343046f0a67c8de50dc6 Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Wed, 30 Sep 2015 21:27:21 +0200 Subject: [PATCH 1/2] Add Unquote() strutil function. --- util/strutil/quote.go | 221 +++++++++++++++++++++++++++++++++++++ util/strutil/quote_test.go | 125 +++++++++++++++++++++ 2 files changed, 346 insertions(+) create mode 100644 util/strutil/quote.go create mode 100644 util/strutil/quote_test.go diff --git a/util/strutil/quote.go b/util/strutil/quote.go new file mode 100644 index 000000000..81be1c5b6 --- /dev/null +++ b/util/strutil/quote.go @@ -0,0 +1,221 @@ +// Copyright 2015 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package strutil + +import ( + "errors" + "unicode/utf8" +) + +// ErrSyntax indicates that a value does not have the right syntax for the target type. +var ErrSyntax = errors.New("invalid syntax") + +// Unquote interprets s as a single-quoted, double-quoted, or backquoted +// Prometheus query language string literal, returning the string value that s +// quotes. +// +// NOTE: This function as well as the necessary helper functions below +// (unquoteChar, contains, unhex) and associated tests have been adapted from +// the corresponding functions in the "strconv" package of the Go standard +// library to work for Prometheus-style strings. +func Unquote(s string) (t string, err error) { + n := len(s) + if n < 2 { + return "", ErrSyntax + } + quote := s[0] + if quote != s[n-1] { + return "", ErrSyntax + } + s = s[1 : n-1] + + if quote == '`' { + if contains(s, '`') { + return "", ErrSyntax + } + return s, nil + } + if quote != '"' && quote != '\'' { + return "", ErrSyntax + } + if contains(s, '\n') { + return "", ErrSyntax + } + + // Is it trivial? Avoid allocation. + if !contains(s, '\\') && !contains(s, quote) { + return s, nil + } + + var runeTmp [utf8.UTFMax]byte + buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. + for len(s) > 0 { + c, multibyte, ss, err := unquoteChar(s, quote) + if err != nil { + return "", err + } + s = ss + if c < utf8.RuneSelf || !multibyte { + buf = append(buf, byte(c)) + } else { + n := utf8.EncodeRune(runeTmp[:], c) + buf = append(buf, runeTmp[:n]...) + } + } + return string(buf), nil +} + +// unquoteChar decodes the first character or byte in the escaped string +// or character literal represented by the string s. +// It returns four values: +// +// 1) value, the decoded Unicode code point or byte value; +// 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation; +// 3) tail, the remainder of the string after the character; and +// 4) an error that will be nil if the character is syntactically valid. +// +// The second argument, quote, specifies the type of literal being parsed +// and therefore which escaped quote character is permitted. +// If set to a single quote, it permits the sequence \' and disallows unescaped '. +// If set to a double quote, it permits \" and disallows unescaped ". +// If set to zero, it does not permit either escape and allows both quote characters to appear unescaped. +func unquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) { + // easy cases + switch c := s[0]; { + case c == quote && (quote == '\'' || quote == '"'): + err = ErrSyntax + return + case c >= utf8.RuneSelf: + r, size := utf8.DecodeRuneInString(s) + return r, true, s[size:], nil + case c != '\\': + return rune(s[0]), false, s[1:], nil + } + + // hard case: c is backslash + if len(s) <= 1 { + err = ErrSyntax + return + } + c := s[1] + s = s[2:] + + switch c { + case 'a': + value = '\a' + case 'b': + value = '\b' + case 'f': + value = '\f' + case 'n': + value = '\n' + case 'r': + value = '\r' + case 't': + value = '\t' + case 'v': + value = '\v' + case 'x', 'u', 'U': + n := 0 + switch c { + case 'x': + n = 2 + case 'u': + n = 4 + case 'U': + n = 8 + } + var v rune + if len(s) < n { + err = ErrSyntax + return + } + for j := 0; j < n; j++ { + x, ok := unhex(s[j]) + if !ok { + err = ErrSyntax + return + } + v = v<<4 | x + } + s = s[n:] + if c == 'x' { + // single-byte string, possibly not UTF-8 + value = v + break + } + if v > utf8.MaxRune { + err = ErrSyntax + return + } + value = v + multibyte = true + case '0', '1', '2', '3', '4', '5', '6', '7': + v := rune(c) - '0' + if len(s) < 2 { + err = ErrSyntax + return + } + for j := 0; j < 2; j++ { // one digit already; two more + x := rune(s[j]) - '0' + if x < 0 || x > 7 { + err = ErrSyntax + return + } + v = (v << 3) | x + } + s = s[2:] + if v > 255 { + err = ErrSyntax + return + } + value = v + case '\\': + value = '\\' + case '\'', '"': + if c != quote { + err = ErrSyntax + return + } + value = rune(c) + default: + err = ErrSyntax + return + } + tail = s + return +} + +// contains reports whether the string contains the byte c. +func contains(s string, c byte) bool { + for i := 0; i < len(s); i++ { + if s[i] == c { + return true + } + } + return false +} + +func unhex(b byte) (v rune, ok bool) { + c := rune(b) + switch { + case '0' <= c && c <= '9': + return c - '0', true + case 'a' <= c && c <= 'f': + return c - 'a' + 10, true + case 'A' <= c && c <= 'F': + return c - 'A' + 10, true + } + return +} diff --git a/util/strutil/quote_test.go b/util/strutil/quote_test.go new file mode 100644 index 000000000..35bd6842c --- /dev/null +++ b/util/strutil/quote_test.go @@ -0,0 +1,125 @@ +// Copyright 2015 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package strutil + +import ( + "testing" +) + +type quoteTest struct { + in string + out string + ascii string +} + +var quotetests = []quoteTest{ + {"\a\b\f\r\n\t\v", `"\a\b\f\r\n\t\v"`, `"\a\b\f\r\n\t\v"`}, + {"\\", `"\\"`, `"\\"`}, + {"abc\xffdef", `"abc\xffdef"`, `"abc\xffdef"`}, + {"\u263a", `"☺"`, `"\u263a"`}, + {"\U0010ffff", `"\U0010ffff"`, `"\U0010ffff"`}, + {"\x04", `"\x04"`, `"\x04"`}, +} + +type unQuoteTest struct { + in string + out string +} + +var unquotetests = []unQuoteTest{ + {`""`, ""}, + {`"a"`, "a"}, + {`"abc"`, "abc"}, + {`"☺"`, "☺"}, + {`"hello world"`, "hello world"}, + {`"\xFF"`, "\xFF"}, + {`"\377"`, "\377"}, + {`"\u1234"`, "\u1234"}, + {`"\U00010111"`, "\U00010111"}, + {`"\U0001011111"`, "\U0001011111"}, + {`"\a\b\f\n\r\t\v\\\""`, "\a\b\f\n\r\t\v\\\""}, + {`"'"`, "'"}, + + {`''`, ""}, + {`'a'`, "a"}, + {`'abc'`, "abc"}, + {`'☺'`, "☺"}, + {`'hello world'`, "hello world"}, + {`'\xFF'`, "\xFF"}, + {`'\377'`, "\377"}, + {`'\u1234'`, "\u1234"}, + {`'\U00010111'`, "\U00010111"}, + {`'\U0001011111'`, "\U0001011111"}, + {`'\a\b\f\n\r\t\v\\\''`, "\a\b\f\n\r\t\v\\'"}, + {`'"'`, "\""}, + + {"``", ``}, + {"`a`", `a`}, + {"`abc`", `abc`}, + {"`☺`", `☺`}, + {"`hello world`", `hello world`}, + {"`\\xFF`", `\xFF`}, + {"`\\377`", `\377`}, + {"`\\`", `\`}, + {"`\n`", "\n"}, + {"` `", ` `}, +} + +var misquoted = []string{ + ``, + `"`, + `"a`, + `"'`, + `b"`, + `"\"`, + `"\9"`, + `"\19"`, + `"\129"`, + `'\'`, + `'\9'`, + `'\19'`, + `'\129'`, + `"\x1!"`, + `"\U12345678"`, + `"\z"`, + "`", + "`xxx", + "`\"", + `"\'"`, + `'\"'`, + "\"\n\"", + "\"\\n\n\"", + "'\n'", +} + +func TestUnquote(t *testing.T) { + for _, tt := range unquotetests { + if out, err := Unquote(tt.in); err != nil && out != tt.out { + t.Errorf("Unquote(%#q) = %q, %v want %q, nil", tt.in, out, err, tt.out) + } + } + + // run the quote tests too, backward + for _, tt := range quotetests { + if in, err := Unquote(tt.out); in != tt.in { + t.Errorf("Unquote(%#q) = %q, %v, want %q, nil", tt.out, in, err, tt.in) + } + } + + for _, s := range misquoted { + if out, err := Unquote(s); out != "" || err != ErrSyntax { + t.Errorf("Unquote(%#q) = %q, %v want %q, %v", s, out, err, "", ErrSyntax) + } + } +} From 46c52607611992aeee631a1e19f053d886ca34d4 Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Wed, 30 Sep 2015 21:27:08 +0200 Subject: [PATCH 2/2] Support escape sequences in strings and add raw strings. This adapts some functionality from the Go standard library for string literal lexing and unquoting/unescaping. The following string types are now supported: Double- or single-quoted strings: These support all escape sequences that Go supports in double-quoted string literals. The difference is that Prometheus also has single-quoted strings (instead of single-quoted runes in Go). Raw newlines are not allowed. Backtick-quoted raw strings: Strings quoted in backticks are treated as raw strings just like in Go and may contain raw newlines and other special characters directly. Fixes https://github.com/prometheus/prometheus/issues/1122 Fixes https://github.com/prometheus/prometheus/issues/1121 --- promql/lex.go | 93 ++++++++++++++++++++++++++++++++++++-- promql/parse.go | 34 ++++++-------- promql/parse_test.go | 48 ++++++++++++++++++++ util/strutil/quote.go | 10 ++-- util/strutil/quote_test.go | 2 +- web/api/legacy/api_test.go | 4 +- 6 files changed, 161 insertions(+), 30 deletions(-) diff --git a/promql/lex.go b/promql/lex.go index 9cdccd04b..52957b2d9 100644 --- a/promql/lex.go +++ b/promql/lex.go @@ -16,6 +16,7 @@ package promql import ( "fmt" "strings" + "unicode" "unicode/utf8" ) @@ -465,6 +466,9 @@ func lexStatements(l *lexer) stateFn { case r == '"' || r == '\'': l.stringOpen = r return lexString + case r == '`': + l.stringOpen = r + return lexRawString case isAlpha(r) || r == ':': l.backup() return lexKeywordOrIdentifier @@ -523,6 +527,9 @@ func lexInsideBraces(l *lexer) stateFn { case r == '"' || r == '\'': l.stringOpen = r return lexString + case r == '`': + l.stringOpen = r + return lexRawString case r == '=': if l.next() == '~' { l.emit(itemEQLRegex) @@ -583,16 +590,79 @@ func lexValueSequence(l *lexer) stateFn { return lexValueSequence } +// lexEscape scans a string escape sequence. The initial escaping character (\) +// has already been seen. +// +// NOTE: This function as well as the helper function digitVal() and associated +// tests have been adapted from the corresponding functions in the "go/scanner" +// package of the Go standard library to work for Prometheus-style strings. +// None of the actual escaping/quoting logic was changed in this function - it +// was only modified to integrate with our lexer. +func lexEscape(l *lexer) { + var n int + var base, max uint32 + + ch := l.next() + switch ch { + case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', l.stringOpen: + return + case '0', '1', '2', '3', '4', '5', '6', '7': + n, base, max = 3, 8, 255 + case 'x': + ch = l.next() + n, base, max = 2, 16, 255 + case 'u': + ch = l.next() + n, base, max = 4, 16, unicode.MaxRune + case 'U': + ch = l.next() + n, base, max = 8, 16, unicode.MaxRune + case eof: + l.errorf("escape sequence not terminated") + default: + l.errorf("unknown escape sequence %#U", ch) + } + + var x uint32 + for n > 0 { + d := uint32(digitVal(ch)) + if d >= base { + if ch == eof { + l.errorf("escape sequence not terminated") + } + l.errorf("illegal character %#U in escape sequence", ch) + } + x = x*base + d + ch = l.next() + n-- + } + + if x > max || 0xD800 <= x && x < 0xE000 { + l.errorf("escape sequence is an invalid Unicode code point") + } +} + +// digitVal returns the digit value of a rune or 16 in case the rune does not +// represent a valid digit. +func digitVal(ch rune) int { + switch { + case '0' <= ch && ch <= '9': + return int(ch - '0') + case 'a' <= ch && ch <= 'f': + return int(ch - 'a' + 10) + case 'A' <= ch && ch <= 'F': + return int(ch - 'A' + 10) + } + return 16 // Larger than any legal digit val. +} + // lexString scans a quoted string. The initial quote has already been seen. func lexString(l *lexer) stateFn { Loop: for { switch l.next() { case '\\': - if r := l.next(); r != eof && r != '\n' { - break - } - fallthrough + lexEscape(l) case eof, '\n': return l.errorf("unterminated quoted string") case l.stringOpen: @@ -603,6 +673,21 @@ Loop: return lexStatements } +// lexRawString scans a raw quoted string. The initial quote has already been seen. +func lexRawString(l *lexer) stateFn { +Loop: + for { + switch l.next() { + case eof: + return l.errorf("unterminated raw string") + case l.stringOpen: + break Loop + } + } + l.emit(itemString) + return lexStatements +} + // lexSpace scans a run of space characters. One space has already been seen. func lexSpace(l *lexer) stateFn { for isSpace(l.peek()) { diff --git a/promql/parse.go b/promql/parse.go index 283ecf723..d871adc7a 100644 --- a/promql/parse.go +++ b/promql/parse.go @@ -43,9 +43,9 @@ type ParseErr struct { func (e *ParseErr) Error() string { if e.Line == 0 { - return fmt.Sprintf("Parse error at char %d: %s", e.Pos, e.Err) + return fmt.Sprintf("parse error at char %d: %s", e.Pos, e.Err) } - return fmt.Sprintf("Parse error at line %d, char %d: %s", e.Line, e.Pos, e.Err) + return fmt.Sprintf("parse error at line %d, char %d: %s", e.Line, e.Pos, e.Err) } // ParseStmts parses the input and returns the resulting statements or any ocurring error. @@ -401,21 +401,21 @@ Loop: p.errorf("summary must not be defined twice") } hasSum = true - sum = trimOne(p.expect(itemString, ctx).val) + sum = p.unquoteString(p.expect(itemString, ctx).val) case itemDescription: if hasDesc { p.errorf("description must not be defined twice") } hasDesc = true - desc = trimOne(p.expect(itemString, ctx).val) + desc = p.unquoteString(p.expect(itemString, ctx).val) case itemRunbook: if hasRunbook { p.errorf("runbook must not be defined twice") } hasRunbook = true - runbook = trimOne(p.expect(itemString, ctx).val) + runbook = p.unquoteString(p.expect(itemString, ctx).val) default: p.backup() @@ -654,8 +654,7 @@ func (p *parser) primaryExpr() Expr { return &NumberLiteral{model.SampleValue(f)} case t.typ == itemString: - s := t.val[1 : len(t.val)-1] - return &StringLiteral{s} + return &StringLiteral{p.unquoteString(t.val)} case t.typ == itemLeftBrace: // Metric selector without metric name. @@ -843,7 +842,7 @@ func (p *parser) labelMatchers(operators ...itemType) metric.LabelMatchers { p.errorf("operator must be one of %q, is %q", operators, op) } - val := trimOne(p.expect(itemString, ctx).val) + val := p.unquoteString(p.expect(itemString, ctx).val) // Map the item to the respective match type. var matchType metric.MatchType @@ -1104,6 +1103,14 @@ func (p *parser) checkType(node Node) (typ model.ValueType) { return } +func (p *parser) unquoteString(s string) string { + unquoted, err := strutil.Unquote(s) + if err != nil { + p.errorf("error unquoting string %q: %s", s, err) + } + return unquoted +} + func parseDuration(ds string) (time.Duration, error) { dur, err := strutil.StringToDuration(ds) if err != nil { @@ -1114,14 +1121,3 @@ func parseDuration(ds string) (time.Duration, error) { } return dur, nil } - -// trimOne removes the first and last character from a string. -func trimOne(s string) string { - if len(s) > 0 { - s = s[1:] - } - if len(s) > 0 { - s = s[:len(s)-1] - } - return s -} diff --git a/promql/parse_test.go b/promql/parse_test.go index e270e00e4..ecbc95da8 100644 --- a/promql/parse_test.go +++ b/promql/parse_test.go @@ -1016,6 +1016,54 @@ var testExpr = []struct { fail: true, errMsg: `no valid expression found`, }, + // String quoting and escape sequence interpretation tests. + { + input: `"double-quoted string \" with escaped quote"`, + expected: &StringLiteral{ + Val: "double-quoted string \" with escaped quote", + }, + }, { + input: `'single-quoted string \' with escaped quote'`, + expected: &StringLiteral{ + Val: "single-quoted string ' with escaped quote", + }, + }, { + input: "`backtick-quoted string`", + expected: &StringLiteral{ + Val: "backtick-quoted string", + }, + }, { + input: `"\a\b\f\n\r\t\v\\\" - \xFF\377\u1234\U00010111\U0001011111☺"`, + expected: &StringLiteral{ + Val: "\a\b\f\n\r\t\v\\\" - \xFF\377\u1234\U00010111\U0001011111☺", + }, + }, { + input: `'\a\b\f\n\r\t\v\\\' - \xFF\377\u1234\U00010111\U0001011111☺'`, + expected: &StringLiteral{ + Val: "\a\b\f\n\r\t\v\\' - \xFF\377\u1234\U00010111\U0001011111☺", + }, + }, { + input: "`" + `\a\b\f\n\r\t\v\\\"\' - \xFF\377\u1234\U00010111\U0001011111☺` + "`", + expected: &StringLiteral{ + Val: `\a\b\f\n\r\t\v\\\"\' - \xFF\377\u1234\U00010111\U0001011111☺`, + }, + }, { + input: "`\\``", + fail: true, + errMsg: "could not parse remaining input", + }, { + input: `"\`, + fail: true, + errMsg: "escape sequence not terminated", + }, { + input: `"\c"`, + fail: true, + errMsg: "unknown escape sequence U+0063 'c'", + }, { + input: `"\x."`, + fail: true, + errMsg: "illegal character U+002E '.' in escape sequence", + }, } func TestParseExpressions(t *testing.T) { diff --git a/util/strutil/quote.go b/util/strutil/quote.go index 81be1c5b6..981ad473d 100644 --- a/util/strutil/quote.go +++ b/util/strutil/quote.go @@ -28,7 +28,9 @@ var ErrSyntax = errors.New("invalid syntax") // NOTE: This function as well as the necessary helper functions below // (unquoteChar, contains, unhex) and associated tests have been adapted from // the corresponding functions in the "strconv" package of the Go standard -// library to work for Prometheus-style strings. +// library to work for Prometheus-style strings. Go's special-casing for single +// quotes was removed and single quoted strings are now treated the same as +// double quoted ones. func Unquote(s string) (t string, err error) { n := len(s) if n < 2 { @@ -103,7 +105,7 @@ func unquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, return rune(s[0]), false, s[1:], nil } - // hard case: c is backslash + // Hard case: c is backslash. if len(s) <= 1 { err = ErrSyntax return @@ -151,7 +153,7 @@ func unquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, } s = s[n:] if c == 'x' { - // single-byte string, possibly not UTF-8 + // Single-byte string, possibly not UTF-8. value = v break } @@ -167,7 +169,7 @@ func unquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err = ErrSyntax return } - for j := 0; j < 2; j++ { // one digit already; two more + for j := 0; j < 2; j++ { // One digit already; two more. x := rune(s[j]) - '0' if x < 0 || x > 7 { err = ErrSyntax diff --git a/util/strutil/quote_test.go b/util/strutil/quote_test.go index 35bd6842c..0068ada0d 100644 --- a/util/strutil/quote_test.go +++ b/util/strutil/quote_test.go @@ -110,7 +110,7 @@ func TestUnquote(t *testing.T) { } } - // run the quote tests too, backward + // Run the quote tests too, backward. for _, tt := range quotetests { if in, err := Unquote(tt.out); in != tt.in { t.Errorf("Unquote(%#q) = %q, %v, want %q, nil", tt.out, in, err, tt.in) diff --git a/web/api/legacy/api_test.go b/web/api/legacy/api_test.go index ce0ce91d8..770e0af0e 100644 --- a/web/api/legacy/api_test.go +++ b/web/api/legacy/api_test.go @@ -53,7 +53,7 @@ func TestQuery(t *testing.T) { { queryStr: "", status: http.StatusOK, - bodyRe: `{"type":"error","value":"Parse error at char 1: no expression found in input","version":1}`, + bodyRe: `{"type":"error","value":"parse error at char 1: no expression found in input","version":1}`, }, { queryStr: "expr=1.4", @@ -83,7 +83,7 @@ func TestQuery(t *testing.T) { { queryStr: "expr=(badexpression", status: http.StatusOK, - bodyRe: `{"type":"error","value":"Parse error at char 15: unclosed left parenthesis","version":1}`, + bodyRe: `{"type":"error","value":"parse error at char 15: unclosed left parenthesis","version":1}`, }, }