sd_ass: allow get_text to return more than 500 bytes

This commit is contained in:
rcombs 2024-04-17 01:26:56 -07:00 committed by Kacper Michajłow
parent e3fd24496a
commit aa0a9ce2ec
4 changed files with 56 additions and 58 deletions

View File

@ -115,7 +115,7 @@ static struct demux_packet *jsre_filter(struct sd_filter *ft,
bool drop = false; bool drop = false;
if (ft->opts->rf_plain) if (ft->opts->rf_plain)
sd_ass_to_plaintext(text, strlen(text), text); sd_ass_to_plaintext(&text, text);
for (int n = 0; n < p->num_regexes; n++) { for (int n = 0; n < p->num_regexes; n++) {
int found, err = p_regexec(p->J, n, text, &found); int found, err = p_regexec(p->J, n, text, &found);

View File

@ -64,7 +64,7 @@ static struct demux_packet *rf_filter(struct sd_filter *ft,
bool drop = false; bool drop = false;
if (ft->opts->rf_plain) if (ft->opts->rf_plain)
sd_ass_to_plaintext(text, strlen(text), text); sd_ass_to_plaintext(&text, text);
for (int n = 0; n < p->num_regexes; n++) { for (int n = 0; n < p->num_regexes; n++) {
int err = regexec(&p->regexes[n], text, 0, NULL, 0); int err = regexec(&p->regexes[n], text, 0, NULL, 0);

View File

@ -107,8 +107,10 @@ int sd_ass_fmt_offset(const char *event_format);
bstr sd_ass_pkt_text(struct sd_filter *ft, struct demux_packet *pkt, int offset); bstr sd_ass_pkt_text(struct sd_filter *ft, struct demux_packet *pkt, int offset);
// convert \0-terminated "Text" (ass) content to plaintext, possibly in-place. // convert \0-terminated "Text" (ass) content to plaintext, possibly in-place.
// result.start is out, result.len is MIN(out_siz, strlen(in)) or smaller. // result.start is *out, result.len is strlen(in) or smaller.
// if there's room: out[result.len] is set to \0. out == in is allowed. // (*out)[result.len] is always set to \0. *out == in is allowed.
bstr sd_ass_to_plaintext(char *out, size_t out_siz, const char *in); // *out must be a talloc-allocated buffer or NULL, and will be reallocated if needed.
// *out will not be reallocated if *out == in.
bstr sd_ass_to_plaintext(char **out, const char *in);
#endif #endif

View File

@ -51,7 +51,7 @@ struct sd_ass_priv {
bool clear_once; bool clear_once;
struct mp_ass_packer *packer; struct mp_ass_packer *packer;
struct sub_bitmap_copy_cache *copy_cache; struct sub_bitmap_copy_cache *copy_cache;
char last_text[500]; bstr last_text;
struct mp_image_params video_params; struct mp_image_params video_params;
struct mp_image_params last_params; struct mp_image_params last_params;
struct mp_osd_res osd; struct mp_osd_res osd;
@ -709,30 +709,23 @@ done:
return res; return res;
} }
struct buf { #define MAX_BUF_SIZE 1024 * 1024
char *start; #define MIN_EXPAND_SIZE 4096
int size;
int len;
};
static void append(struct buf *b, char c) static void append(bstr *b, char c)
{ {
if (b->len < b->size) { bstr_xappend(NULL, b, (bstr){&c, 1});
b->start[b->len] = c;
b->len++;
}
} }
static void ass_to_plaintext(struct buf *b, const char *in) static void ass_to_plaintext(bstr *b, const char *in)
{ {
bool in_tag = false;
const char *open_tag_pos = NULL; const char *open_tag_pos = NULL;
bool in_drawing = false; bool in_drawing = false;
while (*in) { while (*in) {
if (in_tag) { if (open_tag_pos) {
if (in[0] == '}') { if (in[0] == '}') {
in += 1; in += 1;
in_tag = false; open_tag_pos = NULL;
} else if (in[0] == '\\' && in[1] == 'p' && in[2] != 'o') { } else if (in[0] == '\\' && in[1] == 'p' && in[2] != 'o') {
in += 2; in += 2;
// Skip text between \pN and \p0 tags. A \p without a number // Skip text between \pN and \p0 tags. A \p without a number
@ -756,7 +749,6 @@ static void ass_to_plaintext(struct buf *b, const char *in)
} else if (in[0] == '{') { } else if (in[0] == '{') {
open_tag_pos = in; open_tag_pos = in;
in += 1; in += 1;
in_tag = true;
} else { } else {
if (!in_drawing) if (!in_drawing)
append(b, in[0]); append(b, in[0]);
@ -765,65 +757,64 @@ static void ass_to_plaintext(struct buf *b, const char *in)
} }
} }
// A '{' without a closing '}' is always visible. // A '{' without a closing '}' is always visible.
if (in_tag) { if (open_tag_pos) {
while (*open_tag_pos) bstr_xappend(NULL, b, bstr0(open_tag_pos));
append(b, *open_tag_pos++);
} }
} }
// Empty string counts as whitespace. Reads s[len-1] even if there are \0s. // Empty string counts as whitespace.
static bool is_whitespace_only(char *s, int len) static bool is_whitespace_only(bstr b)
{ {
for (int n = 0; n < len; n++) { for (int n = 0; n < b.len; n++) {
if (s[n] != ' ' && s[n] != '\t') if (b.start[n] != ' ' && b.start[n] != '\t')
return false; return false;
} }
return true; return true;
} }
static char *get_text_buf(struct sd *sd, double pts, enum sd_text_type type) static bstr get_text_buf(struct sd *sd, double pts, enum sd_text_type type)
{ {
struct sd_ass_priv *ctx = sd->priv; struct sd_ass_priv *ctx = sd->priv;
ASS_Track *track = ctx->ass_track; ASS_Track *track = ctx->ass_track;
if (pts == MP_NOPTS_VALUE) if (pts == MP_NOPTS_VALUE)
return NULL; return (bstr){0};
long long ipts = find_timestamp(sd, pts); long long ipts = find_timestamp(sd, pts);
struct buf b = {ctx->last_text, sizeof(ctx->last_text) - 1}; bstr *b = &ctx->last_text;
if (!b->start)
b->start = talloc_size(ctx, 4096);
b->len = 0;
for (int i = 0; i < track->n_events; ++i) { for (int i = 0; i < track->n_events; ++i) {
ASS_Event *event = track->events + i; ASS_Event *event = track->events + i;
if (ipts >= event->Start && ipts < event->Start + event->Duration) { if (ipts >= event->Start && ipts < event->Start + event->Duration) {
if (event->Text) { if (event->Text) {
int start = b.len; int start = b->len;
if (type == SD_TEXT_TYPE_PLAIN) { if (type == SD_TEXT_TYPE_PLAIN) {
ass_to_plaintext(&b, event->Text); ass_to_plaintext(b, event->Text);
} else { } else {
char *t = event->Text; bstr_xappend(NULL, b, bstr0(event->Text));
while (*t)
append(&b, *t++);
} }
if (is_whitespace_only(&b.start[start], b.len - start)) { if (is_whitespace_only(bstr_cut(*b, start))) {
b.len = start; b->len = start;
} else { } else {
append(&b, '\n'); append(b, '\n');
} }
} }
} }
} }
b.start[b.len] = '\0'; bstr_eatend(b, (bstr)bstr0_lit("\n"));
if (b.len > 0 && b.start[b.len - 1] == '\n') return *b;
b.start[b.len - 1] = '\0';
return ctx->last_text;
} }
static char *get_text(struct sd *sd, double pts, enum sd_text_type type) static char *get_text(struct sd *sd, double pts, enum sd_text_type type)
{ {
return talloc_strdup(NULL, get_text_buf(sd, pts, type)); return bstrto0(NULL, get_text_buf(sd, pts, type));
} }
static struct sd_times get_times(struct sd *sd, double pts) static struct sd_times get_times(struct sd *sd, double pts)
@ -862,20 +853,26 @@ static void fill_plaintext(struct sd *sd, double pts)
ass_flush_events(track); ass_flush_events(track);
char *text = get_text_buf(sd, pts, SD_TEXT_TYPE_PLAIN); bstr text = get_text_buf(sd, pts, SD_TEXT_TYPE_PLAIN);
if (!text) if (!text.len)
return; return;
bstr dst = {0}; bstr dst = {0};
while (*text) { while (text.len) {
if (*text == '{') if (*text.start == '{') {
bstr_xappend(NULL, &dst, bstr0("\\{"));
text = bstr_cut(text, 1);
} else if (*text.start == '\\') {
bstr_xappend(NULL, &dst, bstr0("\\")); bstr_xappend(NULL, &dst, bstr0("\\"));
bstr_xappend(NULL, &dst, (bstr){text, 1}); // Break ASS escapes with U+2060 WORD JOINER
// Break ASS escapes with U+2060 WORD JOINER
if (*text == '\\')
mp_append_utf8_bstr(NULL, &dst, 0x2060); mp_append_utf8_bstr(NULL, &dst, 0x2060);
text++; text = bstr_cut(text, 1);
}
int i = bstrcspn(text, "{\\");
bstr_xappend(NULL, &dst, (bstr){text.start, i});
text = bstr_cut(text, i);
} }
if (!dst.start) if (!dst.start)
@ -1103,11 +1100,10 @@ bstr sd_ass_pkt_text(struct sd_filter *ft, struct demux_packet *pkt, int offset)
return txt; return txt;
} }
bstr sd_ass_to_plaintext(char *out, size_t out_siz, const char *in) bstr sd_ass_to_plaintext(char **out, const char *in)
{ {
struct buf b = {out, out_siz, 0}; bstr b = {*out};
ass_to_plaintext(&b, in); ass_to_plaintext(&b, in);
if (b.len < out_siz) *out = b.start;
out[b.len] = 0; return b;
return (bstr){out, b.len};
} }