From aa0a9ce2ec15af780eb8f02636aad4cf4e357d0c Mon Sep 17 00:00:00 2001 From: rcombs Date: Wed, 17 Apr 2024 01:26:56 -0700 Subject: [PATCH] sd_ass: allow get_text to return more than 500 bytes --- sub/filter_jsre.c | 2 +- sub/filter_regex.c | 2 +- sub/sd.h | 8 ++-- sub/sd_ass.c | 102 ++++++++++++++++++++++----------------------- 4 files changed, 56 insertions(+), 58 deletions(-) diff --git a/sub/filter_jsre.c b/sub/filter_jsre.c index f956000d55..fb711814e3 100644 --- a/sub/filter_jsre.c +++ b/sub/filter_jsre.c @@ -115,7 +115,7 @@ static struct demux_packet *jsre_filter(struct sd_filter *ft, bool drop = false; if (ft->opts->rf_plain) - sd_ass_to_plaintext(text, strlen(text), text); + sd_ass_to_plaintext(&text, text); for (int n = 0; n < p->num_regexes; n++) { int found, err = p_regexec(p->J, n, text, &found); diff --git a/sub/filter_regex.c b/sub/filter_regex.c index 8e299918ce..1d29ea1ac1 100644 --- a/sub/filter_regex.c +++ b/sub/filter_regex.c @@ -64,7 +64,7 @@ static struct demux_packet *rf_filter(struct sd_filter *ft, bool drop = false; if (ft->opts->rf_plain) - sd_ass_to_plaintext(text, strlen(text), text); + sd_ass_to_plaintext(&text, text); for (int n = 0; n < p->num_regexes; n++) { int err = regexec(&p->regexes[n], text, 0, NULL, 0); diff --git a/sub/sd.h b/sub/sd.h index 681eaa9b22..0f5b8c7c72 100644 --- a/sub/sd.h +++ b/sub/sd.h @@ -107,8 +107,10 @@ int sd_ass_fmt_offset(const char *event_format); bstr sd_ass_pkt_text(struct sd_filter *ft, struct demux_packet *pkt, int offset); // convert \0-terminated "Text" (ass) content to plaintext, possibly in-place. -// result.start is out, result.len is MIN(out_siz, strlen(in)) or smaller. -// if there's room: out[result.len] is set to \0. out == in is allowed. -bstr sd_ass_to_plaintext(char *out, size_t out_siz, const char *in); +// result.start is *out, result.len is strlen(in) or smaller. +// (*out)[result.len] is always set to \0. *out == in is allowed. +// *out must be a talloc-allocated buffer or NULL, and will be reallocated if needed. +// *out will not be reallocated if *out == in. +bstr sd_ass_to_plaintext(char **out, const char *in); #endif diff --git a/sub/sd_ass.c b/sub/sd_ass.c index 881383e965..07e336f4c2 100644 --- a/sub/sd_ass.c +++ b/sub/sd_ass.c @@ -51,7 +51,7 @@ struct sd_ass_priv { bool clear_once; struct mp_ass_packer *packer; struct sub_bitmap_copy_cache *copy_cache; - char last_text[500]; + bstr last_text; struct mp_image_params video_params; struct mp_image_params last_params; struct mp_osd_res osd; @@ -709,30 +709,23 @@ done: return res; } -struct buf { - char *start; - int size; - int len; -}; +#define MAX_BUF_SIZE 1024 * 1024 +#define MIN_EXPAND_SIZE 4096 -static void append(struct buf *b, char c) +static void append(bstr *b, char c) { - if (b->len < b->size) { - b->start[b->len] = c; - b->len++; - } + bstr_xappend(NULL, b, (bstr){&c, 1}); } -static void ass_to_plaintext(struct buf *b, const char *in) +static void ass_to_plaintext(bstr *b, const char *in) { - bool in_tag = false; const char *open_tag_pos = NULL; bool in_drawing = false; while (*in) { - if (in_tag) { + if (open_tag_pos) { if (in[0] == '}') { in += 1; - in_tag = false; + open_tag_pos = NULL; } else if (in[0] == '\\' && in[1] == 'p' && in[2] != 'o') { in += 2; // Skip text between \pN and \p0 tags. A \p without a number @@ -756,7 +749,6 @@ static void ass_to_plaintext(struct buf *b, const char *in) } else if (in[0] == '{') { open_tag_pos = in; in += 1; - in_tag = true; } else { if (!in_drawing) append(b, in[0]); @@ -765,65 +757,64 @@ static void ass_to_plaintext(struct buf *b, const char *in) } } // A '{' without a closing '}' is always visible. - if (in_tag) { - while (*open_tag_pos) - append(b, *open_tag_pos++); + if (open_tag_pos) { + bstr_xappend(NULL, b, bstr0(open_tag_pos)); } } -// Empty string counts as whitespace. Reads s[len-1] even if there are \0s. -static bool is_whitespace_only(char *s, int len) +// Empty string counts as whitespace. +static bool is_whitespace_only(bstr b) { - for (int n = 0; n < len; n++) { - if (s[n] != ' ' && s[n] != '\t') + for (int n = 0; n < b.len; n++) { + if (b.start[n] != ' ' && b.start[n] != '\t') return false; } return true; } -static char *get_text_buf(struct sd *sd, double pts, enum sd_text_type type) +static bstr get_text_buf(struct sd *sd, double pts, enum sd_text_type type) { struct sd_ass_priv *ctx = sd->priv; ASS_Track *track = ctx->ass_track; if (pts == MP_NOPTS_VALUE) - return NULL; + return (bstr){0}; long long ipts = find_timestamp(sd, pts); - struct buf b = {ctx->last_text, sizeof(ctx->last_text) - 1}; + bstr *b = &ctx->last_text; + + if (!b->start) + b->start = talloc_size(ctx, 4096); + + b->len = 0; for (int i = 0; i < track->n_events; ++i) { ASS_Event *event = track->events + i; if (ipts >= event->Start && ipts < event->Start + event->Duration) { if (event->Text) { - int start = b.len; + int start = b->len; if (type == SD_TEXT_TYPE_PLAIN) { - ass_to_plaintext(&b, event->Text); + ass_to_plaintext(b, event->Text); } else { - char *t = event->Text; - while (*t) - append(&b, *t++); + bstr_xappend(NULL, b, bstr0(event->Text)); } - if (is_whitespace_only(&b.start[start], b.len - start)) { - b.len = start; + if (is_whitespace_only(bstr_cut(*b, start))) { + b->len = start; } else { - append(&b, '\n'); + append(b, '\n'); } } } } - b.start[b.len] = '\0'; + bstr_eatend(b, (bstr)bstr0_lit("\n")); - if (b.len > 0 && b.start[b.len - 1] == '\n') - b.start[b.len - 1] = '\0'; - - return ctx->last_text; + return *b; } static char *get_text(struct sd *sd, double pts, enum sd_text_type type) { - return talloc_strdup(NULL, get_text_buf(sd, pts, type)); + return bstrto0(NULL, get_text_buf(sd, pts, type)); } static struct sd_times get_times(struct sd *sd, double pts) @@ -862,20 +853,26 @@ static void fill_plaintext(struct sd *sd, double pts) ass_flush_events(track); - char *text = get_text_buf(sd, pts, SD_TEXT_TYPE_PLAIN); - if (!text) + bstr text = get_text_buf(sd, pts, SD_TEXT_TYPE_PLAIN); + if (!text.len) return; bstr dst = {0}; - while (*text) { - if (*text == '{') + while (text.len) { + if (*text.start == '{') { + bstr_xappend(NULL, &dst, bstr0("\\{")); + text = bstr_cut(text, 1); + } else if (*text.start == '\\') { bstr_xappend(NULL, &dst, bstr0("\\")); - bstr_xappend(NULL, &dst, (bstr){text, 1}); - // Break ASS escapes with U+2060 WORD JOINER - if (*text == '\\') + // Break ASS escapes with U+2060 WORD JOINER mp_append_utf8_bstr(NULL, &dst, 0x2060); - text++; + text = bstr_cut(text, 1); + } + + int i = bstrcspn(text, "{\\"); + bstr_xappend(NULL, &dst, (bstr){text.start, i}); + text = bstr_cut(text, i); } if (!dst.start) @@ -1103,11 +1100,10 @@ bstr sd_ass_pkt_text(struct sd_filter *ft, struct demux_packet *pkt, int offset) return txt; } -bstr sd_ass_to_plaintext(char *out, size_t out_siz, const char *in) +bstr sd_ass_to_plaintext(char **out, const char *in) { - struct buf b = {out, out_siz, 0}; + bstr b = {*out}; ass_to_plaintext(&b, in); - if (b.len < out_siz) - out[b.len] = 0; - return (bstr){out, b.len}; + *out = b.start; + return b; }