sub: new: --sub-filter-jsre (js regex)

Pretty much identical to filter-regex but with JS expressions and
requires only JS support. Shares the filter-regex-* control options.

The target audience is Windows users - where filter-regex doesn't
work due to missing APIs, but mujs builds cleanly on Windows, and JS
is usually enabled in 3rd party Windows mpv builds.

Lua could have been used with similar effort, however, the JS regex
syntax is more extensive and also much more similar to POSIX.
This commit is contained in:
Avi Halachmi (:avih) 2021-07-23 19:11:23 +03:00 committed by avih
parent d82a073069
commit 7c264950c0
7 changed files with 146 additions and 0 deletions

View File

@ -2826,6 +2826,11 @@ Subtitles
include replacing the regexes with a very primitive and small subset of
sed, or some method to control case-sensitivity.
``--sub-filter-jsre-...=...``
Same as ``--sub-filter-regex`` but with JavaScript regular expressions.
Shares/affected-by all ``--sub-filter-regex-*`` control options (see below),
and also experimental. Requires only JavaScript support.
``--sub-filter-regex-warn=<yes|no>``
Log dropped lines with warning log level, instead of verbose (default: no).
Helpful for testing.

View File

@ -219,6 +219,7 @@ const struct m_sub_options mp_sub_filter_opts = {
{"sub-filter-sdh-harder", OPT_FLAG(sub_filter_SDH_harder)},
{"sub-filter-regex-enable", OPT_FLAG(rf_enable)},
{"sub-filter-regex", OPT_STRINGLIST(rf_items)},
{"sub-filter-jsre", OPT_STRINGLIST(jsre_items)},
{"sub-filter-regex-warn", OPT_FLAG(rf_warn)},
{0}
},

View File

@ -115,6 +115,7 @@ struct mp_sub_filter_opts {
int sub_filter_SDH_harder;
int rf_enable;
char **rf_items;
char **jsre_items;
int rf_warn;
};

134
sub/filter_jsre.c Normal file
View File

@ -0,0 +1,134 @@
#include <stdio.h>
#include <sys/types.h>
#include <mujs.h>
#include "common/common.h"
#include "common/msg.h"
#include "misc/bstr.h"
#include "options/options.h"
#include "sd.h"
// p_NAME are protected functions (never throw) which interact with the JS VM.
// return 0 on successful interaction, not-0 on (caught) js-error.
// on error: stack is the same as on entry + an error value
// js: global[n] = new RegExp(str, flags)
static int p_regcomp(js_State *J, int n, const char *str, int flags)
{
if (js_try(J))
return 1;
js_pushnumber(J, n); // n
js_newregexp(J, str, flags); // n regex
js_setglobal(J, js_tostring(J, -2)); // n (and global[n] is the regex)
js_pop(J, 1);
js_endtry(J);
return 0;
}
// js: found = global[n].test(text)
static int p_regexec(js_State *J, int n, const char *text, int *found)
{
if (js_try(J))
return 1;
js_pushnumber(J, n); // n
js_getglobal(J, js_tostring(J, -1)); // n global[n]
js_getproperty(J, -1, "test"); // n global[n] global[n].test
js_rot2(J); // n global[n].test global[n] (n, test(), and its `this')
js_pushstring(J, text); // n global[n].test global[n] text
js_call(J, 1); // n test-result
*found = js_toboolean(J, -1);
js_pop(J, 2); // the result and n
js_endtry(J);
return 0;
}
// protected. caller should pop the error after using the result string.
static const char *get_err(js_State *J)
{
return js_trystring(J, -1, "unknown error");
}
struct priv {
js_State *J;
int num_regexes;
int offset;
};
static void destruct_priv(void *p)
{
js_freestate(((struct priv *)p)->J);
}
static bool jsre_init(struct sd_filter *ft)
{
if (strcmp(ft->codec, "ass") != 0)
return false;
if (!ft->opts->rf_enable)
return false;
struct priv *p = talloc_zero(ft, struct priv);
ft->priv = p;
p->J = js_newstate(0, 0, JS_STRICT);
if (!p->J) {
MP_ERR(ft, "jsre: VM init error\n");
return false;
}
talloc_set_destructor(p, destruct_priv);
for (int n = 0; ft->opts->jsre_items && ft->opts->jsre_items[n]; n++) {
char *item = ft->opts->jsre_items[n];
int err = p_regcomp(p->J, p->num_regexes, item, JS_REGEXP_I);
if (err) {
MP_ERR(ft, "jsre: %s -- '%s'\n", get_err(p->J), item);
js_pop(p->J, 1);
continue;
}
p->num_regexes += 1;
}
if (!p->num_regexes)
return false;
p->offset = sd_ass_fmt_offset(ft->event_format);
return true;
}
static struct demux_packet *jsre_filter(struct sd_filter *ft,
struct demux_packet *pkt)
{
struct priv *p = ft->priv;
char *text = bstrto0(NULL, sd_ass_pkt_text(ft, pkt, p->offset));
bool drop = false;
for (int n = 0; n < p->num_regexes; n++) {
int found, err = p_regexec(p->J, n, text, &found);
if (err == 0 && found) {
int level = ft->opts->rf_warn ? MSGL_WARN : MSGL_V;
MP_MSG(ft, level, "jsre: regex %d => drop: '%s'\n", n, text);
drop = true;
break;
} else if (err) {
MP_WARN(ft, "jsre: test regex %d: %s.\n", n, get_err(p->J));
js_pop(p->J, 1);
}
}
talloc_free(text);
return drop ? NULL : pkt;
}
const struct sd_filter_functions sd_filter_jsre = {
.init = jsre_init,
.filter = jsre_filter,
};

View File

@ -88,6 +88,7 @@ struct sd_filter_functions {
extern const struct sd_filter_functions sd_filter_sdh;
extern const struct sd_filter_functions sd_filter_regex;
extern const struct sd_filter_functions sd_filter_jsre;
// convenience utils for filters with ass codec

View File

@ -67,6 +67,9 @@ static const struct sd_filter_functions *const filters[] = {
&sd_filter_sdh,
#if HAVE_POSIX
&sd_filter_regex,
#endif
#if HAVE_JAVASCRIPT
&sd_filter_jsre,
#endif
NULL,
};

View File

@ -376,6 +376,7 @@ def build(ctx):
( "sub/dec_sub.c" ),
( "sub/draw_bmp.c" ),
( "sub/filter_regex.c", "posix" ),
( "sub/filter_jsre.c", "javascript" ),
( "sub/filter_sdh.c" ),
( "sub/img_convert.c" ),
( "sub/lavc_conv.c" ),