mirror of https://github.com/mpv-player/mpv
sub: sub-filter-regex and jsre: support ass-to-plaintext
Using --sub-filter-regex-plain (default:no) The ass-to-plaintext functionality already existed at sd_ass.c, but it's internal and uses a private buffer type, so a trivial utility wrapper was added with standard char*/bstr interface. The plaintext can be multi-line, and the multi-line regexp flag is now always set, but only affects plaintext (the ASS source is one line).
This commit is contained in:
parent
7c264950c0
commit
41650203c3
|
@ -2804,7 +2804,7 @@ Subtitles
|
|||
|
||||
List items are matched in order. If a regular expression matches, the
|
||||
process is stopped, and the subtitle line is discarded. The text matched
|
||||
against is, currently, always the ``Text`` field of ASS events (if the
|
||||
against is, by default, the ``Text`` field of ASS events (if the
|
||||
subtitle format is different, it is always converted). This may include
|
||||
formatting tags. Matching is case-insensitive, but how this is done depends
|
||||
on the libc, and most likely works in ASCII only. It does not work on
|
||||
|
@ -2831,6 +2831,12 @@ Subtitles
|
|||
Shares/affected-by all ``--sub-filter-regex-*`` control options (see below),
|
||||
and also experimental. Requires only JavaScript support.
|
||||
|
||||
``--sub-filter-regex-plain=<yes|no>``
|
||||
Whether to first convert the ASS "Text" field to plain-text (default: no).
|
||||
This strips ASS tags and applies ASS directives, like ``\N`` to new-line.
|
||||
If the result is multi-line then the regexp anchors ``^`` and ``$`` match
|
||||
each line, but still any match discards all lines.
|
||||
|
||||
``--sub-filter-regex-warn=<yes|no>``
|
||||
Log dropped lines with warning log level, instead of verbose (default: no).
|
||||
Helpful for testing.
|
||||
|
|
|
@ -218,6 +218,7 @@ const struct m_sub_options mp_sub_filter_opts = {
|
|||
{"sub-filter-sdh", OPT_FLAG(sub_filter_SDH)},
|
||||
{"sub-filter-sdh-harder", OPT_FLAG(sub_filter_SDH_harder)},
|
||||
{"sub-filter-regex-enable", OPT_FLAG(rf_enable)},
|
||||
{"sub-filter-regex-plain", OPT_FLAG(rf_plain)},
|
||||
{"sub-filter-regex", OPT_STRINGLIST(rf_items)},
|
||||
{"sub-filter-jsre", OPT_STRINGLIST(jsre_items)},
|
||||
{"sub-filter-regex-warn", OPT_FLAG(rf_warn)},
|
||||
|
|
|
@ -114,6 +114,7 @@ struct mp_sub_filter_opts {
|
|||
int sub_filter_SDH;
|
||||
int sub_filter_SDH_harder;
|
||||
int rf_enable;
|
||||
int rf_plain;
|
||||
char **rf_items;
|
||||
char **jsre_items;
|
||||
int rf_warn;
|
||||
|
|
|
@ -87,7 +87,7 @@ static bool jsre_init(struct sd_filter *ft)
|
|||
for (int n = 0; ft->opts->jsre_items && ft->opts->jsre_items[n]; n++) {
|
||||
char *item = ft->opts->jsre_items[n];
|
||||
|
||||
int err = p_regcomp(p->J, p->num_regexes, item, JS_REGEXP_I);
|
||||
int err = p_regcomp(p->J, p->num_regexes, item, JS_REGEXP_I | JS_REGEXP_M);
|
||||
if (err) {
|
||||
MP_ERR(ft, "jsre: %s -- '%s'\n", get_err(p->J), item);
|
||||
js_pop(p->J, 1);
|
||||
|
@ -111,6 +111,9 @@ static struct demux_packet *jsre_filter(struct sd_filter *ft,
|
|||
char *text = bstrto0(NULL, sd_ass_pkt_text(ft, pkt, p->offset));
|
||||
bool drop = false;
|
||||
|
||||
if (ft->opts->rf_plain)
|
||||
sd_ass_to_plaintext(text, strlen(text), text);
|
||||
|
||||
for (int n = 0; n < p->num_regexes; n++) {
|
||||
int found, err = p_regexec(p->J, n, text, &found);
|
||||
if (err == 0 && found) {
|
||||
|
|
|
@ -30,7 +30,7 @@ static bool rf_init(struct sd_filter *ft)
|
|||
MP_TARRAY_GROW(p, p->regexes, p->num_regexes);
|
||||
regex_t *preg = &p->regexes[p->num_regexes];
|
||||
|
||||
int err = regcomp(preg, item, REG_ICASE | REG_EXTENDED | REG_NOSUB);
|
||||
int err = regcomp(preg, item, REG_ICASE | REG_EXTENDED | REG_NOSUB | REG_NEWLINE);
|
||||
if (err) {
|
||||
char errbuf[512];
|
||||
regerror(err, preg, errbuf, sizeof(errbuf));
|
||||
|
@ -63,6 +63,9 @@ static struct demux_packet *rf_filter(struct sd_filter *ft,
|
|||
char *text = bstrto0(NULL, sd_ass_pkt_text(ft, pkt, p->offset));
|
||||
bool drop = false;
|
||||
|
||||
if (ft->opts->rf_plain)
|
||||
sd_ass_to_plaintext(text, strlen(text), text);
|
||||
|
||||
for (int n = 0; n < p->num_regexes; n++) {
|
||||
int err = regexec(&p->regexes[n], text, 0, NULL, 0);
|
||||
if (err == 0) {
|
||||
|
|
5
sub/sd.h
5
sub/sd.h
|
@ -101,4 +101,9 @@ int sd_ass_fmt_offset(const char *event_format);
|
|||
// on malformed event: warns and returns (bstr){NULL,0}
|
||||
bstr sd_ass_pkt_text(struct sd_filter *ft, struct demux_packet *pkt, int offset);
|
||||
|
||||
// convert \0-terminated "Text" (ass) content to plaintext, possibly in-place.
|
||||
// result.start is out, result.len is MIN(out_siz, strlen(in)) or smaller.
|
||||
// if there's room: out[result.len] is set to \0. out == in is allowed.
|
||||
bstr sd_ass_to_plaintext(char *out, size_t out_siz, const char *in);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -973,3 +973,12 @@ bstr sd_ass_pkt_text(struct sd_filter *ft, struct demux_packet *pkt, int offset)
|
|||
}
|
||||
return txt;
|
||||
}
|
||||
|
||||
bstr sd_ass_to_plaintext(char *out, size_t out_siz, const char *in)
|
||||
{
|
||||
struct buf b = {out, out_siz, 0};
|
||||
ass_to_plaintext(&b, in);
|
||||
if (b.len < out_siz)
|
||||
out[b.len] = 0;
|
||||
return (bstr){out, b.len};
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue