unquote should check for invalid UTF-8 code points (#3595)

Quoted tokens can contain both UTF-8 byte and code point literals
that should be interpreted when quoted. However, we need to check
that the interpreted literals are valid UTF-8 code points or not.
This now happens in unquote.

Signed-off-by: George Robinson <george.robinson@grafana.com>
This commit is contained in:
George Robinson 2023-11-13 16:31:15 +00:00 committed by GitHub
parent ce6efba023
commit f96ba1b58f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 34 additions and 1 deletions

View File

@ -107,6 +107,14 @@ func TestMatchers(t *testing.T) {
name: "equals unicode emoji in quotes",
input: "{\"foo\"=\"🙂\"}",
expected: labels.Matchers{mustNewMatcher(t, labels.MatchEqual, "foo", "🙂")},
}, {
name: "equals unicode emoji as bytes in quotes",
input: "{\"foo\"=\"\\xf0\\x9f\\x99\\x82\"}",
expected: labels.Matchers{mustNewMatcher(t, labels.MatchEqual, "foo", "🙂")},
}, {
name: "equals unicode emoji as code points in quotes",
input: "{\"foo\"=\"\\U0001f642\"}",
expected: labels.Matchers{mustNewMatcher(t, labels.MatchEqual, "foo", "🙂")},
}, {
name: "equals unicode sentence in quotes",
input: "{\"foo\"=\"🙂bar\"}",
@ -199,6 +207,10 @@ func TestMatchers(t *testing.T) {
name: "no unquoted escape sequences",
input: "{foo=bar\\n}",
error: "8:9: \\: invalid input: expected a comma or close brace",
}, {
name: "invalid unicode",
input: "{\"foo\"=\"\\xf0\\x9f\"}",
error: "7:17: \"\\xf0\\x9f\": invalid input",
}}
for _, test := range tests {
@ -244,6 +256,14 @@ func TestMatcher(t *testing.T) {
name: "equals unicode emoji",
input: "{foo=🙂}",
expected: mustNewMatcher(t, labels.MatchEqual, "foo", "🙂"),
}, {
name: "equals unicode emoji as bytes in quotes",
input: "{\"foo\"=\"\\xf0\\x9f\\x99\\x82\"}",
expected: mustNewMatcher(t, labels.MatchEqual, "foo", "🙂"),
}, {
name: "equals unicode emoji as code points in quotes",
input: "{\"foo\"=\"\\U0001f642\"}",
expected: mustNewMatcher(t, labels.MatchEqual, "foo", "🙂"),
}, {
name: "equals unicode sentence",
input: "{foo=🙂bar}",
@ -331,6 +351,10 @@ func TestMatcher(t *testing.T) {
name: "two or more returns error",
input: "foo=bar,bar=baz",
error: "expected 1 matcher, found 2",
}, {
name: "invalid unicode",
input: "foo=\"\\xf0\\x9f\"",
error: "4:14: \"\\xf0\\x9f\": invalid input",
}}
for _, test := range tests {

View File

@ -14,8 +14,10 @@
package parse
import (
"errors"
"fmt"
"strconv"
"unicode/utf8"
)
type tokenKind int
@ -82,7 +84,14 @@ func (t token) isOneOf(kinds ...tokenKind) bool {
// unquote the value in token. If unquoted returns it unmodified.
func (t token) unquote() (string, error) {
if t.kind == tokenQuoted {
return strconv.Unquote(t.value)
unquoted, err := strconv.Unquote(t.value)
if err != nil {
return "", err
}
if !utf8.ValidString(unquoted) {
return "", errors.New("quoted string contains invalid UTF-8 code points")
}
return unquoted, nil
}
return t.value, nil
}