diff --git a/DOCS/man/options.rst b/DOCS/man/options.rst index 65626b80c0..c540400c74 100644 --- a/DOCS/man/options.rst +++ b/DOCS/man/options.rst @@ -2826,6 +2826,11 @@ Subtitles include replacing the regexes with a very primitive and small subset of sed, or some method to control case-sensitivity. +``--sub-filter-jsre-...=...`` + Same as ``--sub-filter-regex`` but with JavaScript regular expressions. + Shares/affected-by all ``--sub-filter-regex-*`` control options (see below), + and also experimental. Requires only JavaScript support. + ``--sub-filter-regex-warn=`` Log dropped lines with warning log level, instead of verbose (default: no). Helpful for testing. diff --git a/options/options.c b/options/options.c index fac31a2bb9..465ac9a35f 100644 --- a/options/options.c +++ b/options/options.c @@ -219,6 +219,7 @@ const struct m_sub_options mp_sub_filter_opts = { {"sub-filter-sdh-harder", OPT_FLAG(sub_filter_SDH_harder)}, {"sub-filter-regex-enable", OPT_FLAG(rf_enable)}, {"sub-filter-regex", OPT_STRINGLIST(rf_items)}, + {"sub-filter-jsre", OPT_STRINGLIST(jsre_items)}, {"sub-filter-regex-warn", OPT_FLAG(rf_warn)}, {0} }, diff --git a/options/options.h b/options/options.h index 41aa88abb9..7963d6bd28 100644 --- a/options/options.h +++ b/options/options.h @@ -115,6 +115,7 @@ struct mp_sub_filter_opts { int sub_filter_SDH_harder; int rf_enable; char **rf_items; + char **jsre_items; int rf_warn; }; diff --git a/sub/filter_jsre.c b/sub/filter_jsre.c new file mode 100644 index 0000000000..896382714a --- /dev/null +++ b/sub/filter_jsre.c @@ -0,0 +1,134 @@ +#include +#include + +#include + +#include "common/common.h" +#include "common/msg.h" +#include "misc/bstr.h" +#include "options/options.h" +#include "sd.h" + + +// p_NAME are protected functions (never throw) which interact with the JS VM. +// return 0 on successful interaction, not-0 on (caught) js-error. +// on error: stack is the same as on entry + an error value + +// js: global[n] = new RegExp(str, flags) +static int p_regcomp(js_State *J, int n, const char *str, int flags) +{ + if (js_try(J)) + return 1; + + js_pushnumber(J, n); // n + js_newregexp(J, str, flags); // n regex + js_setglobal(J, js_tostring(J, -2)); // n (and global[n] is the regex) + js_pop(J, 1); + + js_endtry(J); + return 0; +} + +// js: found = global[n].test(text) +static int p_regexec(js_State *J, int n, const char *text, int *found) +{ + if (js_try(J)) + return 1; + + js_pushnumber(J, n); // n + js_getglobal(J, js_tostring(J, -1)); // n global[n] + js_getproperty(J, -1, "test"); // n global[n] global[n].test + js_rot2(J); // n global[n].test global[n] (n, test(), and its `this') + js_pushstring(J, text); // n global[n].test global[n] text + js_call(J, 1); // n test-result + *found = js_toboolean(J, -1); + js_pop(J, 2); // the result and n + + js_endtry(J); + return 0; +} + +// protected. caller should pop the error after using the result string. +static const char *get_err(js_State *J) +{ + return js_trystring(J, -1, "unknown error"); +} + + +struct priv { + js_State *J; + int num_regexes; + int offset; +}; + +static void destruct_priv(void *p) +{ + js_freestate(((struct priv *)p)->J); +} + +static bool jsre_init(struct sd_filter *ft) +{ + if (strcmp(ft->codec, "ass") != 0) + return false; + + if (!ft->opts->rf_enable) + return false; + + struct priv *p = talloc_zero(ft, struct priv); + ft->priv = p; + + p->J = js_newstate(0, 0, JS_STRICT); + if (!p->J) { + MP_ERR(ft, "jsre: VM init error\n"); + return false; + } + talloc_set_destructor(p, destruct_priv); + + for (int n = 0; ft->opts->jsre_items && ft->opts->jsre_items[n]; n++) { + char *item = ft->opts->jsre_items[n]; + + int err = p_regcomp(p->J, p->num_regexes, item, JS_REGEXP_I); + if (err) { + MP_ERR(ft, "jsre: %s -- '%s'\n", get_err(p->J), item); + js_pop(p->J, 1); + continue; + } + + p->num_regexes += 1; + } + + if (!p->num_regexes) + return false; + + p->offset = sd_ass_fmt_offset(ft->event_format); + return true; +} + +static struct demux_packet *jsre_filter(struct sd_filter *ft, + struct demux_packet *pkt) +{ + struct priv *p = ft->priv; + char *text = bstrto0(NULL, sd_ass_pkt_text(ft, pkt, p->offset)); + bool drop = false; + + for (int n = 0; n < p->num_regexes; n++) { + int found, err = p_regexec(p->J, n, text, &found); + if (err == 0 && found) { + int level = ft->opts->rf_warn ? MSGL_WARN : MSGL_V; + MP_MSG(ft, level, "jsre: regex %d => drop: '%s'\n", n, text); + drop = true; + break; + } else if (err) { + MP_WARN(ft, "jsre: test regex %d: %s.\n", n, get_err(p->J)); + js_pop(p->J, 1); + } + } + + talloc_free(text); + return drop ? NULL : pkt; +} + +const struct sd_filter_functions sd_filter_jsre = { + .init = jsre_init, + .filter = jsre_filter, +}; diff --git a/sub/sd.h b/sub/sd.h index 2d107d1769..2e8d71ba79 100644 --- a/sub/sd.h +++ b/sub/sd.h @@ -88,6 +88,7 @@ struct sd_filter_functions { extern const struct sd_filter_functions sd_filter_sdh; extern const struct sd_filter_functions sd_filter_regex; +extern const struct sd_filter_functions sd_filter_jsre; // convenience utils for filters with ass codec diff --git a/sub/sd_ass.c b/sub/sd_ass.c index 0da6df41c8..e100b5c5e0 100644 --- a/sub/sd_ass.c +++ b/sub/sd_ass.c @@ -67,6 +67,9 @@ static const struct sd_filter_functions *const filters[] = { &sd_filter_sdh, #if HAVE_POSIX &sd_filter_regex, +#endif +#if HAVE_JAVASCRIPT + &sd_filter_jsre, #endif NULL, }; diff --git a/wscript_build.py b/wscript_build.py index fbec5006f8..384bb50d2e 100644 --- a/wscript_build.py +++ b/wscript_build.py @@ -376,6 +376,7 @@ def build(ctx): ( "sub/dec_sub.c" ), ( "sub/draw_bmp.c" ), ( "sub/filter_regex.c", "posix" ), + ( "sub/filter_jsre.c", "javascript" ), ( "sub/filter_sdh.c" ), ( "sub/img_convert.c" ), ( "sub/lavc_conv.c" ),