unquote should check for invalid UTF-8 code points (#3595)
Quoted tokens can contain both UTF-8 byte and code point literals that should be interpreted when quoted. However, we need to check that the interpreted literals are valid UTF-8 code points or not. This now happens in unquote. Signed-off-by: George Robinson <george.robinson@grafana.com>
This commit is contained in:
parent
ce6efba023
commit
f96ba1b58f
|
@ -107,6 +107,14 @@ func TestMatchers(t *testing.T) {
|
|||
name: "equals unicode emoji in quotes",
|
||||
input: "{\"foo\"=\"🙂\"}",
|
||||
expected: labels.Matchers{mustNewMatcher(t, labels.MatchEqual, "foo", "🙂")},
|
||||
}, {
|
||||
name: "equals unicode emoji as bytes in quotes",
|
||||
input: "{\"foo\"=\"\\xf0\\x9f\\x99\\x82\"}",
|
||||
expected: labels.Matchers{mustNewMatcher(t, labels.MatchEqual, "foo", "🙂")},
|
||||
}, {
|
||||
name: "equals unicode emoji as code points in quotes",
|
||||
input: "{\"foo\"=\"\\U0001f642\"}",
|
||||
expected: labels.Matchers{mustNewMatcher(t, labels.MatchEqual, "foo", "🙂")},
|
||||
}, {
|
||||
name: "equals unicode sentence in quotes",
|
||||
input: "{\"foo\"=\"🙂bar\"}",
|
||||
|
@ -199,6 +207,10 @@ func TestMatchers(t *testing.T) {
|
|||
name: "no unquoted escape sequences",
|
||||
input: "{foo=bar\\n}",
|
||||
error: "8:9: \\: invalid input: expected a comma or close brace",
|
||||
}, {
|
||||
name: "invalid unicode",
|
||||
input: "{\"foo\"=\"\\xf0\\x9f\"}",
|
||||
error: "7:17: \"\\xf0\\x9f\": invalid input",
|
||||
}}
|
||||
|
||||
for _, test := range tests {
|
||||
|
@ -244,6 +256,14 @@ func TestMatcher(t *testing.T) {
|
|||
name: "equals unicode emoji",
|
||||
input: "{foo=🙂}",
|
||||
expected: mustNewMatcher(t, labels.MatchEqual, "foo", "🙂"),
|
||||
}, {
|
||||
name: "equals unicode emoji as bytes in quotes",
|
||||
input: "{\"foo\"=\"\\xf0\\x9f\\x99\\x82\"}",
|
||||
expected: mustNewMatcher(t, labels.MatchEqual, "foo", "🙂"),
|
||||
}, {
|
||||
name: "equals unicode emoji as code points in quotes",
|
||||
input: "{\"foo\"=\"\\U0001f642\"}",
|
||||
expected: mustNewMatcher(t, labels.MatchEqual, "foo", "🙂"),
|
||||
}, {
|
||||
name: "equals unicode sentence",
|
||||
input: "{foo=🙂bar}",
|
||||
|
@ -331,6 +351,10 @@ func TestMatcher(t *testing.T) {
|
|||
name: "two or more returns error",
|
||||
input: "foo=bar,bar=baz",
|
||||
error: "expected 1 matcher, found 2",
|
||||
}, {
|
||||
name: "invalid unicode",
|
||||
input: "foo=\"\\xf0\\x9f\"",
|
||||
error: "4:14: \"\\xf0\\x9f\": invalid input",
|
||||
}}
|
||||
|
||||
for _, test := range tests {
|
||||
|
|
|
@ -14,8 +14,10 @@
|
|||
package parse
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
type tokenKind int
|
||||
|
@ -82,7 +84,14 @@ func (t token) isOneOf(kinds ...tokenKind) bool {
|
|||
// unquote the value in token. If unquoted returns it unmodified.
|
||||
func (t token) unquote() (string, error) {
|
||||
if t.kind == tokenQuoted {
|
||||
return strconv.Unquote(t.value)
|
||||
unquoted, err := strconv.Unquote(t.value)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if !utf8.ValidString(unquoted) {
|
||||
return "", errors.New("quoted string contains invalid UTF-8 code points")
|
||||
}
|
||||
return unquoted, nil
|
||||
}
|
||||
return t.value, nil
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue