subassconvert: make subrip attribute parsing more robust

Add general code to separate the HTML-like attribute=value syntax used
in srt font tags into attribute and value parts. This simplifies some
of the parsing code, makes detection of malformed input more robust,
and allows warning about unrecognized attributes.
This commit is contained in:
Uoti Urpala 2012-04-17 02:07:55 +03:00
parent f0ce95607f
commit 81eb911763
1 changed files with 34 additions and 33 deletions

View File

@ -106,25 +106,30 @@ static const struct {
#define SUBRIP_MAX_STACKED_FONT_TAGS 16 #define SUBRIP_MAX_STACKED_FONT_TAGS 16
/* Read the attribute value starting at *s, and skip *s past the value. /* Read the HTML-style attribute starting at *s, and skip *s past the value.
* Set out_value to the parsed value, with possible '"' stripped. * Set attr and val to the parsed attribute name and value.
* Return whether the attribute is well formed. */ * Return 0 on success, or -1 if no valid attribute was found.
static bool read_value(char **s, struct bstr *out_value) */
static int read_attr(char **s, struct bstr *attr, struct bstr *val)
{ {
char term = 0; char *eq = strchr(*s, '=');
if (**s == '"') { if (!eq)
term = '"'; return -1;
(*s)++; attr->start = *s;
} attr->len = eq - *s;
out_value->start = *s; for (int i = 0; i < attr->len; i++)
out_value->len = 0; if (!isalnum(attr->start[i]))
unsigned char *start = *s; return -1;
unsigned char *end = term ? strchr(start, term) : strpbrk(start, " >"); val->start = eq + 1;
bool quoted = val->start[0] == '"';
if (quoted)
val->start++;
unsigned char *end = strpbrk(val->start, quoted ? "\"" : " >");
if (!end) if (!end)
return false; return -1;
out_value->len = end - out_value->start; val->len = end - val->start;
*s = end + (term ? 1 : 0); *s = end + quoted;
return true; return 0;
} }
void subassconvert_subrip(const char *orig, char *dest, int dest_buffer_size) void subassconvert_subrip(const char *orig, char *dest, int dest_buffer_size)
@ -195,22 +200,21 @@ void subassconvert_subrip(const char *orig, char *dest, int dest_buffer_size)
line += 6; line += 6;
while (*line && *line != '>') { while (*line && *line != '>') {
if (strncmp(line, "size=", 5) == 0) { if (*line == ' ') {
line += 5; line++;
struct bstr val; continue;
if (!read_value(&line, &val)) }
struct bstr attr, val;
if (read_attr(&line, &attr, &val) < 0)
break; break;
if (!bstrcmp0(attr, "size")) {
tag->size = bstrtoll(val, &val, 10); tag->size = bstrtoll(val, &val, 10);
if (val.len) if (val.len)
break; break;
append_text(&new_line, "{\\fs%d}", tag->size); append_text(&new_line, "{\\fs%d}", tag->size);
tag->has_size = true; tag->has_size = true;
has_valid_attr = true; has_valid_attr = true;
} else if (strncmp(line, "color=", 6) == 0) { } else if (!bstrcmp0(attr, "color")) {
line += 6;
struct bstr val;
if (!read_value(&line, &val))
break;
if (bstr_eatstart(&val, bstr("#"))) { if (bstr_eatstart(&val, bstr("#"))) {
// #RRGGBB format // #RRGGBB format
tag->color = bstrtoll(val, &val, 16) & 0x00ffffff; tag->color = bstrtoll(val, &val, 16) & 0x00ffffff;
@ -240,18 +244,15 @@ void subassconvert_subrip(const char *orig, char *dest, int dest_buffer_size)
append_text(&new_line, "{\\c&H%06X&}", tag->color); append_text(&new_line, "{\\c&H%06X&}", tag->color);
tag->has_color = true; tag->has_color = true;
has_valid_attr = true; has_valid_attr = true;
} else if (strncmp(line, "face=", 5) == 0) { } else if (!bstrcmp0(attr, "face")) {
/* Font face attribute */ /* Font face attribute */
line += 5;
struct bstr val;
if (!read_value(&line, &val))
break;
tag->face = val; tag->face = val;
append_text(&new_line, "{\\fn%.*s}", BSTR_P(tag->face)); append_text(&new_line, "{\\fn%.*s}", BSTR_P(tag->face));
tag->has_face = true; tag->has_face = true;
has_valid_attr = true; has_valid_attr = true;
} else } else
line++; mp_tmsg(MSGT_SUBREADER, MSGL_WARN,"SubRip: unrecognized "
"attribute \"%.*s\" in font tag\n", BSTR_P(attr));
} }
if (!has_valid_attr || *line != '>') { /* Not valid font tag */ if (!has_valid_attr || *line != '>') { /* Not valid font tag */