From f96ba1b58fa3d7f2993f1e6c9e5c52cec2a575bb Mon Sep 17 00:00:00 2001 From: George Robinson Date: Mon, 13 Nov 2023 16:31:15 +0000 Subject: [PATCH] unquote should check for invalid UTF-8 code points (#3595) Quoted tokens can contain both UTF-8 byte and code point literals that should be interpreted when quoted. However, we need to check that the interpreted literals are valid UTF-8 code points or not. This now happens in unquote. Signed-off-by: George Robinson --- matchers/parse/parse_test.go | 24 ++++++++++++++++++++++++ matchers/parse/token.go | 11 ++++++++++- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/matchers/parse/parse_test.go b/matchers/parse/parse_test.go index 9b99bde5..ca859178 100644 --- a/matchers/parse/parse_test.go +++ b/matchers/parse/parse_test.go @@ -107,6 +107,14 @@ func TestMatchers(t *testing.T) { name: "equals unicode emoji in quotes", input: "{\"foo\"=\"πŸ™‚\"}", expected: labels.Matchers{mustNewMatcher(t, labels.MatchEqual, "foo", "πŸ™‚")}, + }, { + name: "equals unicode emoji as bytes in quotes", + input: "{\"foo\"=\"\\xf0\\x9f\\x99\\x82\"}", + expected: labels.Matchers{mustNewMatcher(t, labels.MatchEqual, "foo", "πŸ™‚")}, + }, { + name: "equals unicode emoji as code points in quotes", + input: "{\"foo\"=\"\\U0001f642\"}", + expected: labels.Matchers{mustNewMatcher(t, labels.MatchEqual, "foo", "πŸ™‚")}, }, { name: "equals unicode sentence in quotes", input: "{\"foo\"=\"πŸ™‚bar\"}", @@ -199,6 +207,10 @@ func TestMatchers(t *testing.T) { name: "no unquoted escape sequences", input: "{foo=bar\\n}", error: "8:9: \\: invalid input: expected a comma or close brace", + }, { + name: "invalid unicode", + input: "{\"foo\"=\"\\xf0\\x9f\"}", + error: "7:17: \"\\xf0\\x9f\": invalid input", }} for _, test := range tests { @@ -244,6 +256,14 @@ func TestMatcher(t *testing.T) { name: "equals unicode emoji", input: "{foo=πŸ™‚}", expected: mustNewMatcher(t, labels.MatchEqual, "foo", "πŸ™‚"), + }, { + name: "equals unicode emoji as bytes in quotes", + input: "{\"foo\"=\"\\xf0\\x9f\\x99\\x82\"}", + expected: mustNewMatcher(t, labels.MatchEqual, "foo", "πŸ™‚"), + }, { + name: "equals unicode emoji as code points in quotes", + input: "{\"foo\"=\"\\U0001f642\"}", + expected: mustNewMatcher(t, labels.MatchEqual, "foo", "πŸ™‚"), }, { name: "equals unicode sentence", input: "{foo=πŸ™‚bar}", @@ -331,6 +351,10 @@ func TestMatcher(t *testing.T) { name: "two or more returns error", input: "foo=bar,bar=baz", error: "expected 1 matcher, found 2", + }, { + name: "invalid unicode", + input: "foo=\"\\xf0\\x9f\"", + error: "4:14: \"\\xf0\\x9f\": invalid input", }} for _, test := range tests { diff --git a/matchers/parse/token.go b/matchers/parse/token.go index 29ab5514..96baeeef 100644 --- a/matchers/parse/token.go +++ b/matchers/parse/token.go @@ -14,8 +14,10 @@ package parse import ( + "errors" "fmt" "strconv" + "unicode/utf8" ) type tokenKind int @@ -82,7 +84,14 @@ func (t token) isOneOf(kinds ...tokenKind) bool { // unquote the value in token. If unquoted returns it unmodified. func (t token) unquote() (string, error) { if t.kind == tokenQuoted { - return strconv.Unquote(t.value) + unquoted, err := strconv.Unquote(t.value) + if err != nil { + return "", err + } + if !utf8.ValidString(unquoted) { + return "", errors.New("quoted string contains invalid UTF-8 code points") + } + return unquoted, nil } return t.value, nil }