common: simplify and optimize string escape parsing

This code is shared between input.conf parser and option parser. Until
now, the performance didn't really matter. But I want to use this code
for JSON parsing too, and since JSON will have to be parsed a lot, it
should probably try to avoid realloc'ing too much.

This commit moves parsing of C-style escaped strings into a common
function, and allows using it in a way realloc can be completely
avoided, if the already allocated buffer is large enough.
This commit is contained in:
wm4 2013-12-30 20:28:32 +01:00
parent 097fe8ea6f
commit 066ecfcbfb
4 changed files with 85 additions and 42 deletions

View File

@ -115,12 +115,22 @@ char *mp_append_utf8_buffer(char *buffer, uint32_t codepoint)
return talloc_strndup_append_buffer(buffer, data, output - data);
}
// Like mp_append_utf8_buffer, but use bstr_xappend().
void mp_append_utf8_bstr(void *talloc_ctx, struct bstr *buf, uint32_t codepoint)
{
char data[8];
uint8_t tmp;
char *output = data;
PUT_UTF8(codepoint, tmp, *output++ = tmp;);
bstr_xappend(talloc_ctx, buf, (bstr){data, output - data});
}
// Parse a C-style escape beginning at code, and append the result to *str
// using talloc. The input string (*code) must point to the first character
// after the initial '\', and after parsing *code is set to the first character
// after the current escape.
// On error, false is returned, and all input remains unchanged.
bool mp_parse_escape(bstr *code, char **str)
static bool mp_parse_escape(void *talloc_ctx, bstr *dst, bstr *code)
{
if (code->len < 1)
return false;
@ -137,7 +147,7 @@ bool mp_parse_escape(bstr *code, char **str)
case '\'': replace = '\''; break;
}
if (replace) {
*str = talloc_strndup_append_buffer(*str, &replace, 1);
bstr_xappend(talloc_ctx, dst, (bstr){&replace, 1});
*code = bstr_cut(*code, 1);
return true;
}
@ -146,7 +156,7 @@ bool mp_parse_escape(bstr *code, char **str)
char c = bstrtoll(num, &num, 16);
if (!num.len)
return false;
*str = talloc_strndup_append_buffer(*str, &c, 1);
bstr_xappend(talloc_ctx, dst, (bstr){&c, 1});
*code = bstr_cut(*code, 3);
return true;
}
@ -155,9 +165,64 @@ bool mp_parse_escape(bstr *code, char **str)
int c = bstrtoll(num, &num, 16);
if (num.len)
return false;
*str = mp_append_utf8_buffer(*str, c);
mp_append_utf8_bstr(talloc_ctx, dst, c);
*code = bstr_cut(*code, 5);
return true;
}
return false;
}
// Like mp_append_escaped_string, but set *dst to sliced *src if no escape
// sequences have to be parsed (i.e. no memory allocation is required), and
// if dst->start was NULL on function entry.
bool mp_append_escaped_string_noalloc(void *talloc_ctx, bstr *dst, bstr *src)
{
bstr t = *src;
int cur = 0;
while (1) {
if (cur >= t.len || t.start[cur] == '"') {
*src = bstr_cut(t, cur);
t = bstr_splice(t, 0, cur);
if (dst->start == NULL) {
*dst = t;
} else {
bstr_xappend(talloc_ctx, dst, t);
}
return true;
} else if (t.start[cur] == '\\') {
bstr_xappend(talloc_ctx, dst, bstr_splice(t, 0, cur));
t = bstr_cut(t, cur + 1);
cur = 0;
if (!mp_parse_escape(talloc_ctx, dst, &t))
goto error;
} else {
cur++;
}
}
error:
return false;
}
// src is expected to point to a C-style string literal, *src pointing to the
// first char after the starting '"'. It will append the contents of the literal
// to *dst (using talloc_ctx) until the first '"' or the end of *str is found.
// See bstr_xappend() how data is appended to *dst.
// On success, *src will either start with '"', or be empty.
// On error, return false, and *dst will contain the string until the first
// error, *src is not changed.
// Note that dst->start will be implicitly \0-terminated on successful return,
// and if it was NULL or \0-terminated before calling the function.
// As mentioned above, the caller is responsible for skipping the '"' chars.
bool mp_append_escaped_string(void *talloc_ctx, bstr *dst, bstr *src)
{
if (mp_append_escaped_string_noalloc(talloc_ctx, dst, src)) {
// Guarantee copy (or allocation).
if (!dst->start || dst->start == src->start) {
bstr res = *dst;
*dst = (bstr){0};
bstr_xappend(talloc_ctx, dst, res);
}
return true;
}
return false;
}

View File

@ -76,6 +76,12 @@ bool mp_rect_intersection(struct mp_rect *rc, const struct mp_rect *rc2);
char *mp_append_utf8_buffer(char *buffer, uint32_t codepoint);
struct bstr;
bool mp_parse_escape(struct bstr *code, char **str);
void mp_append_utf8_bstr(void *talloc_ctx, struct bstr *buf, uint32_t codepoint);
bool mp_append_escaped_string_noalloc(void *talloc_ctx, struct bstr *dst,
struct bstr *src);
bool mp_append_escaped_string(void *talloc_ctx, struct bstr *dst,
struct bstr *src);
#endif /* MPLAYER_MPCOMMON_H */

View File

@ -41,31 +41,6 @@ static bool read_token(bstr str, bstr *out_rest, bstr *out_token)
return true;
}
static bool read_escaped_string(void *talloc_ctx, bstr *str, bstr *literal)
{
bstr t = *str;
char *new = talloc_strdup(talloc_ctx, "");
while (t.len) {
if (t.start[0] == '"')
break;
if (t.start[0] == '\\') {
t = bstr_cut(t, 1);
if (!mp_parse_escape(&t, &new))
goto error;
} else {
new = talloc_strndup_append_buffer(new, t.start, 1);
t = bstr_cut(t, 1);
}
}
int len = str->len - t.len;
*literal = new ? bstr0(new) : bstr_splice(*str, 0, len);
*str = bstr_cut(*str, len);
return true;
error:
talloc_free(new);
return false;
}
// Somewhat awkward; the main purpose is supporting both strings and
// pre-split string arrays as input.
struct parse_ctx {
@ -92,7 +67,7 @@ static int pctx_read_token(struct parse_ctx *ctx, bstr *out)
ctx->str = bstr_lstrip(ctx->str);
bstr start = ctx->str;
if (bstr_eatstart0(&ctx->str, "\"")) {
if (!read_escaped_string(ctx->tmp, &ctx->str, out)) {
if (!mp_append_escaped_string_noalloc(ctx->tmp, out, &ctx->str)) {
MP_ERR(ctx, "Broken string escapes: ...>%.*s<.\n", BSTR_P(start));
return -1;
}

View File

@ -746,20 +746,17 @@ const m_option_type_t m_option_type_float = {
static char *unescape_string(void *talloc_ctx, bstr str)
{
char *res = talloc_strdup(talloc_ctx, "");
bstr dst = {0};
while (str.len) {
bstr rest;
bool esc = bstr_split_tok(str, "\\", &str, &rest);
res = talloc_strndup_append_buffer(res, str.start, str.len);
if (esc) {
if (!mp_parse_escape(&rest, &res)) {
talloc_free(res);
if (!mp_append_escaped_string(talloc_ctx, &dst, &str)) {
talloc_free(dst.start);
return NULL;
}
if (!bstr_eatstart0(&str, "\""))
break;
bstr_xappend(talloc_ctx, &dst, bstr0("\""));
}
str = rest;
}
return res;
return dst.start;
}
static char *escape_string(char *str0)