mirror of https://github.com/mpv-player/mpv
common: simplify and optimize string escape parsing
This code is shared between input.conf parser and option parser. Until now, the performance didn't really matter. But I want to use this code for JSON parsing too, and since JSON will have to be parsed a lot, it should probably try to avoid realloc'ing too much. This commit moves parsing of C-style escaped strings into a common function, and allows using it in a way realloc can be completely avoided, if the already allocated buffer is large enough.
This commit is contained in:
parent
097fe8ea6f
commit
066ecfcbfb
|
@ -115,12 +115,22 @@ char *mp_append_utf8_buffer(char *buffer, uint32_t codepoint)
|
|||
return talloc_strndup_append_buffer(buffer, data, output - data);
|
||||
}
|
||||
|
||||
// Like mp_append_utf8_buffer, but use bstr_xappend().
|
||||
void mp_append_utf8_bstr(void *talloc_ctx, struct bstr *buf, uint32_t codepoint)
|
||||
{
|
||||
char data[8];
|
||||
uint8_t tmp;
|
||||
char *output = data;
|
||||
PUT_UTF8(codepoint, tmp, *output++ = tmp;);
|
||||
bstr_xappend(talloc_ctx, buf, (bstr){data, output - data});
|
||||
}
|
||||
|
||||
// Parse a C-style escape beginning at code, and append the result to *str
|
||||
// using talloc. The input string (*code) must point to the first character
|
||||
// after the initial '\', and after parsing *code is set to the first character
|
||||
// after the current escape.
|
||||
// On error, false is returned, and all input remains unchanged.
|
||||
bool mp_parse_escape(bstr *code, char **str)
|
||||
static bool mp_parse_escape(void *talloc_ctx, bstr *dst, bstr *code)
|
||||
{
|
||||
if (code->len < 1)
|
||||
return false;
|
||||
|
@ -137,7 +147,7 @@ bool mp_parse_escape(bstr *code, char **str)
|
|||
case '\'': replace = '\''; break;
|
||||
}
|
||||
if (replace) {
|
||||
*str = talloc_strndup_append_buffer(*str, &replace, 1);
|
||||
bstr_xappend(talloc_ctx, dst, (bstr){&replace, 1});
|
||||
*code = bstr_cut(*code, 1);
|
||||
return true;
|
||||
}
|
||||
|
@ -146,7 +156,7 @@ bool mp_parse_escape(bstr *code, char **str)
|
|||
char c = bstrtoll(num, &num, 16);
|
||||
if (!num.len)
|
||||
return false;
|
||||
*str = talloc_strndup_append_buffer(*str, &c, 1);
|
||||
bstr_xappend(talloc_ctx, dst, (bstr){&c, 1});
|
||||
*code = bstr_cut(*code, 3);
|
||||
return true;
|
||||
}
|
||||
|
@ -155,9 +165,64 @@ bool mp_parse_escape(bstr *code, char **str)
|
|||
int c = bstrtoll(num, &num, 16);
|
||||
if (num.len)
|
||||
return false;
|
||||
*str = mp_append_utf8_buffer(*str, c);
|
||||
mp_append_utf8_bstr(talloc_ctx, dst, c);
|
||||
*code = bstr_cut(*code, 5);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Like mp_append_escaped_string, but set *dst to sliced *src if no escape
|
||||
// sequences have to be parsed (i.e. no memory allocation is required), and
|
||||
// if dst->start was NULL on function entry.
|
||||
bool mp_append_escaped_string_noalloc(void *talloc_ctx, bstr *dst, bstr *src)
|
||||
{
|
||||
bstr t = *src;
|
||||
int cur = 0;
|
||||
while (1) {
|
||||
if (cur >= t.len || t.start[cur] == '"') {
|
||||
*src = bstr_cut(t, cur);
|
||||
t = bstr_splice(t, 0, cur);
|
||||
if (dst->start == NULL) {
|
||||
*dst = t;
|
||||
} else {
|
||||
bstr_xappend(talloc_ctx, dst, t);
|
||||
}
|
||||
return true;
|
||||
} else if (t.start[cur] == '\\') {
|
||||
bstr_xappend(talloc_ctx, dst, bstr_splice(t, 0, cur));
|
||||
t = bstr_cut(t, cur + 1);
|
||||
cur = 0;
|
||||
if (!mp_parse_escape(talloc_ctx, dst, &t))
|
||||
goto error;
|
||||
} else {
|
||||
cur++;
|
||||
}
|
||||
}
|
||||
error:
|
||||
return false;
|
||||
}
|
||||
|
||||
// src is expected to point to a C-style string literal, *src pointing to the
|
||||
// first char after the starting '"'. It will append the contents of the literal
|
||||
// to *dst (using talloc_ctx) until the first '"' or the end of *str is found.
|
||||
// See bstr_xappend() how data is appended to *dst.
|
||||
// On success, *src will either start with '"', or be empty.
|
||||
// On error, return false, and *dst will contain the string until the first
|
||||
// error, *src is not changed.
|
||||
// Note that dst->start will be implicitly \0-terminated on successful return,
|
||||
// and if it was NULL or \0-terminated before calling the function.
|
||||
// As mentioned above, the caller is responsible for skipping the '"' chars.
|
||||
bool mp_append_escaped_string(void *talloc_ctx, bstr *dst, bstr *src)
|
||||
{
|
||||
if (mp_append_escaped_string_noalloc(talloc_ctx, dst, src)) {
|
||||
// Guarantee copy (or allocation).
|
||||
if (!dst->start || dst->start == src->start) {
|
||||
bstr res = *dst;
|
||||
*dst = (bstr){0};
|
||||
bstr_xappend(talloc_ctx, dst, res);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -76,6 +76,12 @@ bool mp_rect_intersection(struct mp_rect *rc, const struct mp_rect *rc2);
|
|||
char *mp_append_utf8_buffer(char *buffer, uint32_t codepoint);
|
||||
|
||||
struct bstr;
|
||||
bool mp_parse_escape(struct bstr *code, char **str);
|
||||
|
||||
void mp_append_utf8_bstr(void *talloc_ctx, struct bstr *buf, uint32_t codepoint);
|
||||
|
||||
bool mp_append_escaped_string_noalloc(void *talloc_ctx, struct bstr *dst,
|
||||
struct bstr *src);
|
||||
bool mp_append_escaped_string(void *talloc_ctx, struct bstr *dst,
|
||||
struct bstr *src);
|
||||
|
||||
#endif /* MPLAYER_MPCOMMON_H */
|
||||
|
|
|
@ -41,31 +41,6 @@ static bool read_token(bstr str, bstr *out_rest, bstr *out_token)
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool read_escaped_string(void *talloc_ctx, bstr *str, bstr *literal)
|
||||
{
|
||||
bstr t = *str;
|
||||
char *new = talloc_strdup(talloc_ctx, "");
|
||||
while (t.len) {
|
||||
if (t.start[0] == '"')
|
||||
break;
|
||||
if (t.start[0] == '\\') {
|
||||
t = bstr_cut(t, 1);
|
||||
if (!mp_parse_escape(&t, &new))
|
||||
goto error;
|
||||
} else {
|
||||
new = talloc_strndup_append_buffer(new, t.start, 1);
|
||||
t = bstr_cut(t, 1);
|
||||
}
|
||||
}
|
||||
int len = str->len - t.len;
|
||||
*literal = new ? bstr0(new) : bstr_splice(*str, 0, len);
|
||||
*str = bstr_cut(*str, len);
|
||||
return true;
|
||||
error:
|
||||
talloc_free(new);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Somewhat awkward; the main purpose is supporting both strings and
|
||||
// pre-split string arrays as input.
|
||||
struct parse_ctx {
|
||||
|
@ -92,7 +67,7 @@ static int pctx_read_token(struct parse_ctx *ctx, bstr *out)
|
|||
ctx->str = bstr_lstrip(ctx->str);
|
||||
bstr start = ctx->str;
|
||||
if (bstr_eatstart0(&ctx->str, "\"")) {
|
||||
if (!read_escaped_string(ctx->tmp, &ctx->str, out)) {
|
||||
if (!mp_append_escaped_string_noalloc(ctx->tmp, out, &ctx->str)) {
|
||||
MP_ERR(ctx, "Broken string escapes: ...>%.*s<.\n", BSTR_P(start));
|
||||
return -1;
|
||||
}
|
||||
|
|
|
@ -746,20 +746,17 @@ const m_option_type_t m_option_type_float = {
|
|||
|
||||
static char *unescape_string(void *talloc_ctx, bstr str)
|
||||
{
|
||||
char *res = talloc_strdup(talloc_ctx, "");
|
||||
bstr dst = {0};
|
||||
while (str.len) {
|
||||
bstr rest;
|
||||
bool esc = bstr_split_tok(str, "\\", &str, &rest);
|
||||
res = talloc_strndup_append_buffer(res, str.start, str.len);
|
||||
if (esc) {
|
||||
if (!mp_parse_escape(&rest, &res)) {
|
||||
talloc_free(res);
|
||||
return NULL;
|
||||
}
|
||||
if (!mp_append_escaped_string(talloc_ctx, &dst, &str)) {
|
||||
talloc_free(dst.start);
|
||||
return NULL;
|
||||
}
|
||||
str = rest;
|
||||
if (!bstr_eatstart0(&str, "\""))
|
||||
break;
|
||||
bstr_xappend(talloc_ctx, &dst, bstr0("\""));
|
||||
}
|
||||
return res;
|
||||
return dst.start;
|
||||
}
|
||||
|
||||
static char *escape_string(char *str0)
|
||||
|
|
Loading…
Reference in New Issue