mirror of https://github.com/mpv-player/mpv
common: simplify and optimize string escape parsing
This code is shared between input.conf parser and option parser. Until now, the performance didn't really matter. But I want to use this code for JSON parsing too, and since JSON will have to be parsed a lot, it should probably try to avoid realloc'ing too much. This commit moves parsing of C-style escaped strings into a common function, and allows using it in a way realloc can be completely avoided, if the already allocated buffer is large enough.
This commit is contained in:
parent
097fe8ea6f
commit
066ecfcbfb
|
@ -115,12 +115,22 @@ char *mp_append_utf8_buffer(char *buffer, uint32_t codepoint)
|
||||||
return talloc_strndup_append_buffer(buffer, data, output - data);
|
return talloc_strndup_append_buffer(buffer, data, output - data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Like mp_append_utf8_buffer, but use bstr_xappend().
|
||||||
|
void mp_append_utf8_bstr(void *talloc_ctx, struct bstr *buf, uint32_t codepoint)
|
||||||
|
{
|
||||||
|
char data[8];
|
||||||
|
uint8_t tmp;
|
||||||
|
char *output = data;
|
||||||
|
PUT_UTF8(codepoint, tmp, *output++ = tmp;);
|
||||||
|
bstr_xappend(talloc_ctx, buf, (bstr){data, output - data});
|
||||||
|
}
|
||||||
|
|
||||||
// Parse a C-style escape beginning at code, and append the result to *str
|
// Parse a C-style escape beginning at code, and append the result to *str
|
||||||
// using talloc. The input string (*code) must point to the first character
|
// using talloc. The input string (*code) must point to the first character
|
||||||
// after the initial '\', and after parsing *code is set to the first character
|
// after the initial '\', and after parsing *code is set to the first character
|
||||||
// after the current escape.
|
// after the current escape.
|
||||||
// On error, false is returned, and all input remains unchanged.
|
// On error, false is returned, and all input remains unchanged.
|
||||||
bool mp_parse_escape(bstr *code, char **str)
|
static bool mp_parse_escape(void *talloc_ctx, bstr *dst, bstr *code)
|
||||||
{
|
{
|
||||||
if (code->len < 1)
|
if (code->len < 1)
|
||||||
return false;
|
return false;
|
||||||
|
@ -137,7 +147,7 @@ bool mp_parse_escape(bstr *code, char **str)
|
||||||
case '\'': replace = '\''; break;
|
case '\'': replace = '\''; break;
|
||||||
}
|
}
|
||||||
if (replace) {
|
if (replace) {
|
||||||
*str = talloc_strndup_append_buffer(*str, &replace, 1);
|
bstr_xappend(talloc_ctx, dst, (bstr){&replace, 1});
|
||||||
*code = bstr_cut(*code, 1);
|
*code = bstr_cut(*code, 1);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -146,7 +156,7 @@ bool mp_parse_escape(bstr *code, char **str)
|
||||||
char c = bstrtoll(num, &num, 16);
|
char c = bstrtoll(num, &num, 16);
|
||||||
if (!num.len)
|
if (!num.len)
|
||||||
return false;
|
return false;
|
||||||
*str = talloc_strndup_append_buffer(*str, &c, 1);
|
bstr_xappend(talloc_ctx, dst, (bstr){&c, 1});
|
||||||
*code = bstr_cut(*code, 3);
|
*code = bstr_cut(*code, 3);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -155,9 +165,64 @@ bool mp_parse_escape(bstr *code, char **str)
|
||||||
int c = bstrtoll(num, &num, 16);
|
int c = bstrtoll(num, &num, 16);
|
||||||
if (num.len)
|
if (num.len)
|
||||||
return false;
|
return false;
|
||||||
*str = mp_append_utf8_buffer(*str, c);
|
mp_append_utf8_bstr(talloc_ctx, dst, c);
|
||||||
*code = bstr_cut(*code, 5);
|
*code = bstr_cut(*code, 5);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Like mp_append_escaped_string, but set *dst to sliced *src if no escape
|
||||||
|
// sequences have to be parsed (i.e. no memory allocation is required), and
|
||||||
|
// if dst->start was NULL on function entry.
|
||||||
|
bool mp_append_escaped_string_noalloc(void *talloc_ctx, bstr *dst, bstr *src)
|
||||||
|
{
|
||||||
|
bstr t = *src;
|
||||||
|
int cur = 0;
|
||||||
|
while (1) {
|
||||||
|
if (cur >= t.len || t.start[cur] == '"') {
|
||||||
|
*src = bstr_cut(t, cur);
|
||||||
|
t = bstr_splice(t, 0, cur);
|
||||||
|
if (dst->start == NULL) {
|
||||||
|
*dst = t;
|
||||||
|
} else {
|
||||||
|
bstr_xappend(talloc_ctx, dst, t);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
} else if (t.start[cur] == '\\') {
|
||||||
|
bstr_xappend(talloc_ctx, dst, bstr_splice(t, 0, cur));
|
||||||
|
t = bstr_cut(t, cur + 1);
|
||||||
|
cur = 0;
|
||||||
|
if (!mp_parse_escape(talloc_ctx, dst, &t))
|
||||||
|
goto error;
|
||||||
|
} else {
|
||||||
|
cur++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
error:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// src is expected to point to a C-style string literal, *src pointing to the
|
||||||
|
// first char after the starting '"'. It will append the contents of the literal
|
||||||
|
// to *dst (using talloc_ctx) until the first '"' or the end of *str is found.
|
||||||
|
// See bstr_xappend() how data is appended to *dst.
|
||||||
|
// On success, *src will either start with '"', or be empty.
|
||||||
|
// On error, return false, and *dst will contain the string until the first
|
||||||
|
// error, *src is not changed.
|
||||||
|
// Note that dst->start will be implicitly \0-terminated on successful return,
|
||||||
|
// and if it was NULL or \0-terminated before calling the function.
|
||||||
|
// As mentioned above, the caller is responsible for skipping the '"' chars.
|
||||||
|
bool mp_append_escaped_string(void *talloc_ctx, bstr *dst, bstr *src)
|
||||||
|
{
|
||||||
|
if (mp_append_escaped_string_noalloc(talloc_ctx, dst, src)) {
|
||||||
|
// Guarantee copy (or allocation).
|
||||||
|
if (!dst->start || dst->start == src->start) {
|
||||||
|
bstr res = *dst;
|
||||||
|
*dst = (bstr){0};
|
||||||
|
bstr_xappend(talloc_ctx, dst, res);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
|
@ -76,6 +76,12 @@ bool mp_rect_intersection(struct mp_rect *rc, const struct mp_rect *rc2);
|
||||||
char *mp_append_utf8_buffer(char *buffer, uint32_t codepoint);
|
char *mp_append_utf8_buffer(char *buffer, uint32_t codepoint);
|
||||||
|
|
||||||
struct bstr;
|
struct bstr;
|
||||||
bool mp_parse_escape(struct bstr *code, char **str);
|
|
||||||
|
void mp_append_utf8_bstr(void *talloc_ctx, struct bstr *buf, uint32_t codepoint);
|
||||||
|
|
||||||
|
bool mp_append_escaped_string_noalloc(void *talloc_ctx, struct bstr *dst,
|
||||||
|
struct bstr *src);
|
||||||
|
bool mp_append_escaped_string(void *talloc_ctx, struct bstr *dst,
|
||||||
|
struct bstr *src);
|
||||||
|
|
||||||
#endif /* MPLAYER_MPCOMMON_H */
|
#endif /* MPLAYER_MPCOMMON_H */
|
||||||
|
|
|
@ -41,31 +41,6 @@ static bool read_token(bstr str, bstr *out_rest, bstr *out_token)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool read_escaped_string(void *talloc_ctx, bstr *str, bstr *literal)
|
|
||||||
{
|
|
||||||
bstr t = *str;
|
|
||||||
char *new = talloc_strdup(talloc_ctx, "");
|
|
||||||
while (t.len) {
|
|
||||||
if (t.start[0] == '"')
|
|
||||||
break;
|
|
||||||
if (t.start[0] == '\\') {
|
|
||||||
t = bstr_cut(t, 1);
|
|
||||||
if (!mp_parse_escape(&t, &new))
|
|
||||||
goto error;
|
|
||||||
} else {
|
|
||||||
new = talloc_strndup_append_buffer(new, t.start, 1);
|
|
||||||
t = bstr_cut(t, 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
int len = str->len - t.len;
|
|
||||||
*literal = new ? bstr0(new) : bstr_splice(*str, 0, len);
|
|
||||||
*str = bstr_cut(*str, len);
|
|
||||||
return true;
|
|
||||||
error:
|
|
||||||
talloc_free(new);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Somewhat awkward; the main purpose is supporting both strings and
|
// Somewhat awkward; the main purpose is supporting both strings and
|
||||||
// pre-split string arrays as input.
|
// pre-split string arrays as input.
|
||||||
struct parse_ctx {
|
struct parse_ctx {
|
||||||
|
@ -92,7 +67,7 @@ static int pctx_read_token(struct parse_ctx *ctx, bstr *out)
|
||||||
ctx->str = bstr_lstrip(ctx->str);
|
ctx->str = bstr_lstrip(ctx->str);
|
||||||
bstr start = ctx->str;
|
bstr start = ctx->str;
|
||||||
if (bstr_eatstart0(&ctx->str, "\"")) {
|
if (bstr_eatstart0(&ctx->str, "\"")) {
|
||||||
if (!read_escaped_string(ctx->tmp, &ctx->str, out)) {
|
if (!mp_append_escaped_string_noalloc(ctx->tmp, out, &ctx->str)) {
|
||||||
MP_ERR(ctx, "Broken string escapes: ...>%.*s<.\n", BSTR_P(start));
|
MP_ERR(ctx, "Broken string escapes: ...>%.*s<.\n", BSTR_P(start));
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -746,20 +746,17 @@ const m_option_type_t m_option_type_float = {
|
||||||
|
|
||||||
static char *unescape_string(void *talloc_ctx, bstr str)
|
static char *unescape_string(void *talloc_ctx, bstr str)
|
||||||
{
|
{
|
||||||
char *res = talloc_strdup(talloc_ctx, "");
|
bstr dst = {0};
|
||||||
while (str.len) {
|
while (str.len) {
|
||||||
bstr rest;
|
if (!mp_append_escaped_string(talloc_ctx, &dst, &str)) {
|
||||||
bool esc = bstr_split_tok(str, "\\", &str, &rest);
|
talloc_free(dst.start);
|
||||||
res = talloc_strndup_append_buffer(res, str.start, str.len);
|
return NULL;
|
||||||
if (esc) {
|
|
||||||
if (!mp_parse_escape(&rest, &res)) {
|
|
||||||
talloc_free(res);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
str = rest;
|
if (!bstr_eatstart0(&str, "\""))
|
||||||
|
break;
|
||||||
|
bstr_xappend(talloc_ctx, &dst, bstr0("\""));
|
||||||
}
|
}
|
||||||
return res;
|
return dst.start;
|
||||||
}
|
}
|
||||||
|
|
||||||
static char *escape_string(char *str0)
|
static char *escape_string(char *str0)
|
||||||
|
|
Loading…
Reference in New Issue