1
0
mirror of https://github.com/mpv-player/mpv synced 2025-02-27 02:40:53 +00:00

filter_sdh: combine skip_bracketed and skip_parenthesized

These two functions are almost exactly the same. The parenthesis variant
is essentially just a special case with more conditions to not remove
text. These can easily be combined together into one generic
skip_enclosed function to handle both cases. We also use char * instead
of char for the character comparison here since not everything is
neccesarily 1 byte and can fit into a char. This will be useful for the
following commits where we extend this logic further.
This commit is contained in:
Dudemanguy 2023-11-05 12:16:48 -06:00
parent 2193893be7
commit b7d85f0d4a

View File

@ -86,7 +86,8 @@ static void copy_ass(struct sd_filter *sd, char **rpp, struct buffer *buf)
return; return;
} }
static bool skip_bracketed(struct sd_filter *sd, char **rpp, struct buffer *buf); static bool skip_enclosed(struct sd_filter *sd, char **rpp, struct buffer *buf,
const char *left, const char *right);
// check for speaker label, like MAN: // check for speaker label, like MAN:
// normal subtitles may include mixed case text with : after so // normal subtitles may include mixed case text with : after so
@ -128,7 +129,7 @@ static void skip_speaker_label(struct sd_filter *sd, char **rpp, struct buffer *
copy_ass(sd, &rp, buf); copy_ass(sd, &rp, buf);
} else if (rp[0] == '[') { } else if (rp[0] == '[') {
// not uncommon with [xxxx]: which should also be skipped // not uncommon with [xxxx]: which should also be skipped
if (!skip_bracketed(sd, &rp, buf)) { if (!skip_enclosed(sd, &rp, buf, "[", "]")) {
buf->pos = old_pos; buf->pos = old_pos;
return; return;
} }
@ -174,94 +175,54 @@ static void skip_speaker_label(struct sd_filter *sd, char **rpp, struct buffer *
return; return;
} }
// check for bracketed text, like [SOUND] // Check for text enclosed in symbols, like (SOUND)
// and skip it while preserving ass tags // and skip it while preserving ass tags.
// any characters are allowed, brackets are seldom used in normal text // Parentheses are a special case since normal subtitles may have
// them so only upper case is accepted and lower case l which for
// some looks like upper case I. If sub_filter_SDH_harder is used,
// both upper and lower case is accepted.
//
// For other symbols, all text in between is removed.
// //
// Parameters: // Parameters:
// rpp read pointer pointer to source string, updated on return // rpp read pointer pointer to source string, updated on return
// buf write buffer // buf write buffer
// //
// scan in source string // scan in source string
// the first character in source string must by the starting '[' // the first character in source string must be the starting left symbol
// and copy ass tags to destination string but // and copy ass tags to destination string but
// skipping bracketed text if it looks like SDH // skipping enclosed text if it looks like SDH
// //
// return true if bracketed text was removed. // return true if enclosed text was removed.
// if not valid SDH read pointer and write buffer position will be unchanged // if not valid SDH read pointer and write buffer position will be unchanged
// otherwise they point to next position after text and next write position // otherwise they point to next position after text and next write position
static bool skip_bracketed(struct sd_filter *sd, char **rpp, struct buffer *buf) static bool skip_enclosed(struct sd_filter *sd, char **rpp, struct buffer *buf,
const char *left, const char *right)
{ {
bool filter_harder = sd->opts->sub_filter_SDH_harder;
char *rp = *rpp; char *rp = *rpp;
int old_pos = buf->pos; int old_pos = buf->pos;
rp++; // skip past '[' rp++; // skip past the left character
// skip past valid data searching for ] // skip past valid data searching for the right character
while (*rp && rp[0] != ']') { bool only_digits = strcmp(left, "(") == 0;
while (*rp && rp[0] != right[0]) {
if (rp[0] == '{') { if (rp[0] == '{') {
copy_ass(sd, &rp, buf); copy_ass(sd, &rp, buf);
} else { } else if (strcmp(left, "(") == 0 && ((mp_isalpha(rp[0]) &&
rp++;
}
}
if (!*rp) {
// ] was not found
buf->pos = old_pos;
return false;
}
rp++; // skip ]
// skip trailing spaces
while (rp[0] == ' ') {
rp++;
}
*rpp = rp;
return true;
}
// check for parenthesized text, like (SOUND)
// and skip it while preserving ass tags
// normal subtitles may include mixed case text in parentheses so
// only upper case is accepted and lower case l which for some
// looks like upper case I but if requested harder filtering
// both upper and lower case is accepted
//
// Parameters:
// rpp read pointer pointer to source string, updated on return
// buf write buffer
//
// scan in source string
// the first character in source string must be the starting '('
// and copy ass tags to destination string but
// skipping parenthesized text if it looks like SDH
//
// return true if parenthesized text was removed.
// if not valid SDH read pointer and write buffer position will be unchanged
// otherwise they point to next position after text and next write position
static bool skip_parenthesized(struct sd_filter *sd, char **rpp, struct buffer *buf)
{
int filter_harder = sd->opts->sub_filter_SDH_harder;
char *rp = *rpp;
int old_pos = buf->pos;
rp++; // skip past '('
// skip past valid data searching for )
bool only_digits = true;
while (*rp && rp[0] != ')') {
if (rp[0] == '{') {
copy_ass(sd, &rp, buf);
} else if ((mp_isalpha(rp[0]) &&
(filter_harder || mp_isupper(rp[0]) || rp[0] == 'l')) || (filter_harder || mp_isupper(rp[0]) || rp[0] == 'l')) ||
mp_isdigit(rp[0]) || mp_isdigit(rp[0]) ||
rp[0] == ' ' || rp[0] == '\'' || rp[0] == '#' || rp[0] == ' ' || rp[0] == '\'' || rp[0] == '#' ||
rp[0] == '.' || rp[0] == ',' || rp[0] == '.' || rp[0] == ',' ||
rp[0] == '-' || rp[0] == '"' || rp[0] == '\\') { rp[0] == '-' || rp[0] == '"' || rp[0] == '\\')) {
if (!mp_isdigit(rp[0])) if (!mp_isdigit(rp[0]))
only_digits = false; only_digits = false;
rp++; rp++;
} else { } else if (strcmp(left, "(") == 0) {
buf->pos = old_pos; buf->pos = old_pos;
return false; return false;
} else {
rp++;
} }
} }
if (!*rp) { if (!*rp) {
@ -274,7 +235,7 @@ static bool skip_parenthesized(struct sd_filter *sd, char **rpp, struct buffer *
buf->pos = old_pos; buf->pos = old_pos;
return false; return false;
} }
rp++; // skip ) rp++; // skip right character
// skip trailing spaces // skip trailing spaces
while (rp[0] == ' ') { while (rp[0] == ' ') {
rp++; rp++;
@ -372,13 +333,13 @@ static char *filter_SDH(struct sd_filter *sd, char *data, int length, ptrdiff_t
while (*rp && !(rp[0] == '\\' && rp[1] == 'N')) { while (*rp && !(rp[0] == '\\' && rp[1] == 'N')) {
copy_ass(sd, &rp, buf); copy_ass(sd, &rp, buf);
if (rp[0] == '[') { if (rp[0] == '[') {
if (!skip_bracketed(sd, &rp, buf)) { if (!skip_enclosed(sd, &rp, buf, "[", "]")) {
append(sd, buf, rp[0]); append(sd, buf, rp[0]);
rp++; rp++;
line_with_text = true; line_with_text = true;
} }
} else if (rp[0] == '(') { } else if (rp[0] == '(') {
if (!skip_parenthesized(sd, &rp, buf)) { if (!skip_enclosed(sd, &rp, buf, "(", ")")) {
append(sd, buf, rp[0]); append(sd, buf, rp[0]);
rp++; rp++;
line_with_text = true; line_with_text = true;