1
0
mirror of https://github.com/mpv-player/mpv synced 2025-03-08 15:18:00 +00:00

sub: add an option to filter subtitles by regex

Works as ad-filter. I had some more plans, for example replacing
matching text with different text, but for now it's dropping matches
only. There's a big warning in the manpage that I might change
semantics. For example, I might turn it into a primitive sed.

In a sane world, you'd probably write a simple script that processes
downloaded subtitles before giving them to mpv, and avoid all this
complexity. But we don't live in a sane world, and the sooner you learn
this, the happier you will be. (But I also want to run this on muxed
subtitles.)

This is pretty straightforward. We use POSIX regexes, which are readily
available without additional pain or dependencies. This also means it's
(apparently) not available on win32 (MinGW). The regex list is because I
hate big monolithic regexes, and this makes it slightly better.

Very superficially tested.
This commit is contained in:
wm4 2020-02-16 02:03:36 +01:00
parent 0b35b4c917
commit a4eb8f75c0
7 changed files with 160 additions and 0 deletions

View File

@ -2571,6 +2571,47 @@ Subtitles
Default: ``no``.
``--sub-filter-regex-...=...``
Set a list of regular expressions to match on text subtitles, and remove any
lines that match (default: empty). This is a string list option. See
`List Options`_ for details. Normally, you should use
``--sub-filter-regex-append=<regex>``, where each option use will append a
new regular expression, without having to fight escaping problems.
List items are matched in order. If a regular expression matches, the
process is stopped, and the subtitle line is discarded. The text matched
against is, currently, always the ``Text`` field of ASS events (if the
subtitle format is different, it is always converted). This may include
formatting tags. Matching is case-insensitive, but how this is done depends
on the libc, and most likely works in ASCII only. It does not work on
bitmap/image subtitles. Unavailable on inferior OSes (requires POSIX regex
support).
.. admonition:: Example
``--sub-filter-regex-append=opensubtitles\.org`` filters some ads.
Technically, using a list for matching is redundant, since you could just
use a single combined regular expression. But it helps with diagnosis,
ease of use, and temporarily disabling or enabling individual filters.
.. warning::
This is experimental. The semantics most likely will change, and if you
use this, you should be prepared to update the option later. Ideas
include replacing the regexes with a very primitive and small subset of
sed, or some method to control case-sensitivity.
``--sub-filter-regex-warn=<yes|no>``
Log dropped lines with warning log level, instead of verbose (default: no).
Helpful for testing.
``--sub-filter-regex-enable=<yes|no>``
Whether to enable regex filtering (default: yes). Note that if no regexes
are added to the ``--sub-filter-regex`` list, setting this option to ``yes``
has no default. It's meant to easily disable or enable filtering
temporarily.
``--sub-create-cc-track=<yes|no>``
For every video stream, create a closed captions track (default: no). The
only purpose is to make the track available for selection at the start of

View File

@ -201,9 +201,15 @@ const struct m_sub_options mp_sub_filter_opts = {
.opts = (const struct m_option[]){
OPT_FLAG("sub-filter-sdh", sub_filter_SDH, 0),
OPT_FLAG("sub-filter-sdh-harder", sub_filter_SDH_harder, 0),
OPT_FLAG("sub-filter-regex-enable", rf_enable, 0),
OPT_STRINGLIST("sub-filter-regex", rf_items, 0),
OPT_FLAG("sub-filter-regex-warn", rf_warn, 0),
{0}
},
.size = sizeof(OPT_BASE_STRUCT),
.defaults = &(OPT_BASE_STRUCT){
.rf_enable = 1,
},
.change_flags = UPDATE_SUB_FILT,
};

View File

@ -103,6 +103,9 @@ struct mp_subtitle_opts {
struct mp_sub_filter_opts {
int sub_filter_SDH;
int sub_filter_SDH_harder;
int rf_enable;
char **rf_items;
int rf_warn;
};
struct mp_osd_render_opts {

105
sub/filter_regex.c Normal file
View File

@ -0,0 +1,105 @@
#include <regex.h>
#include <sys/types.h>
#include "common/common.h"
#include "common/msg.h"
#include "misc/bstr.h"
#include "options/options.h"
#include "sd.h"
struct priv {
int offset;
regex_t *regexes;
int num_regexes;
};
static bool rf_init(struct sd_filter *ft)
{
if (strcmp(ft->codec, "ass") != 0)
return false;
if (!ft->opts->rf_enable)
return false;
struct priv *p = talloc_zero(ft, struct priv);
ft->priv = p;
for (int n = 0; ft->opts->rf_items && ft->opts->rf_items[n]; n++) {
char *item = ft->opts->rf_items[n];
MP_TARRAY_GROW(p, p->regexes, p->num_regexes);
regex_t *preg = &p->regexes[p->num_regexes];
int err = regcomp(preg, item, REG_ICASE | REG_EXTENDED | REG_NOSUB);
if (err) {
char errbuf[512];
regerror(err, preg, errbuf, sizeof(errbuf));
MP_ERR(ft, "Regular expression error: '%s'\n", errbuf);
continue;
}
p->num_regexes += 1;
}
if (!p->num_regexes)
return false;
char *headers = ft->event_format;
while (headers && headers[0]) {
p->offset += 1;
headers = strchr(headers, ',');
if (headers)
headers += 1;
}
p->offset -= 1; // removes Start/End, adds ReadOrder
return true;
}
static void rf_uninit(struct sd_filter *ft)
{
struct priv *p = ft->priv;
for (int n = 0; n < p->num_regexes; n++)
regfree(&p->regexes[n]);
}
static struct demux_packet *rf_filter(struct sd_filter *ft,
struct demux_packet *pkt)
{
struct priv *p = ft->priv;
char *line = bstrto0(NULL, (bstr){(char *)pkt->buffer, pkt->len});
bool drop = false;
char *text = line;
for (int n = 0; n < p->offset - 1; n++) {
text = strchr(text, ',');
if (!text) {
MP_WARN(ft, "Malformed event: '%s'\n", line);
text = line; // shouldn't happen; random fallback
break;
}
text = text + 1;
}
for (int n = 0; n < p->num_regexes; n++) {
int err = regexec(&p->regexes[n], text, 0, NULL, 0);
if (err == 0) {
int level = ft->opts->rf_warn ? MSGL_WARN : MSGL_V;
MP_MSG(ft, level, "Matching regex %d => drop: '%s'\n", n, text);
drop = true;
break;
} else if (err != REG_NOMATCH) {
MP_WARN(ft, "Error on regexec() on regex %d.\n", n);
}
}
talloc_free(line);
return drop ? NULL : pkt;
}
const struct sd_filter_functions sd_filter_regex = {
.init = rf_init,
.uninit = rf_uninit,
.filter = rf_filter,
};

View File

@ -87,5 +87,6 @@ struct sd_filter_functions {
};
extern const struct sd_filter_functions sd_filter_sdh;
extern const struct sd_filter_functions sd_filter_regex;
#endif

View File

@ -65,6 +65,9 @@ static void fill_plaintext(struct sd *sd, double pts);
static const struct sd_filter_functions *const filters[] = {
// Note: list order defines filter order.
&sd_filter_sdh,
#if HAVE_POSIX
&sd_filter_regex,
#endif
NULL,
};

View File

@ -388,6 +388,7 @@ def build(ctx):
( "sub/ass_mp.c", "libass"),
( "sub/dec_sub.c" ),
( "sub/draw_bmp.c" ),
( "sub/filter_regex.c", "posix" ),
( "sub/filter_sdh.c" ),
( "sub/img_convert.c" ),
( "sub/lavc_conv.c" ),