From dac2a5881c13784c7820e28c958a3eb7cc85c549 Mon Sep 17 00:00:00 2001 From: Guido Cella Date: Tue, 16 Apr 2024 15:22:36 +0200 Subject: [PATCH 1/2] external_files: rename variables in guess_lang_from_filename() --- player/external_files.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/player/external_files.c b/player/external_files.c index 2e0091261d..f0194a7d77 100644 --- a/player/external_files.c +++ b/player/external_files.c @@ -110,36 +110,36 @@ static int compare_sub_priority(const void *a, const void *b) return strcoll(s1->fname, s2->fname); } -static struct bstr guess_lang_from_filename(struct bstr name, int *fn_start) +static struct bstr guess_lang_from_filename(struct bstr name, int *lang_start) { if (name.len < 2) return (struct bstr){NULL, 0}; - int n = 0; + int lang_length = 0; int i = name.len - 1; - char thing = '.'; + char delimiter = '.'; if (name.start[i] == ')') { - thing = '('; + delimiter = '('; i--; } if (name.start[i] == ']') { - thing = '['; + delimiter = '['; i--; } while (i >= 0 && mp_isalpha(name.start[i])) { - n++; - if (n > 3) + lang_length++; + if (lang_length > 3) return (struct bstr){NULL, 0}; i--; } - if (n < 2 || i == 0 || name.start[i] != thing) + if (lang_length < 2 || i == 0 || name.start[i] != delimiter) return (struct bstr){NULL, 0}; - *fn_start = i; - return (struct bstr){name.start + i + 1, n}; + *lang_start = i; + return (struct bstr){name.start + i + 1, lang_length}; } char *mp_guess_lang_from_filename(void* ctx, const char *filename) From edc0ed902f9fcdda28929fe8c7f2f4c72a40046a Mon Sep 17 00:00:00 2001 From: Guido Cella Date: Thu, 18 Apr 2024 18:08:17 +0200 Subject: [PATCH 2/2] external_files: detect language tags with hyphens This loads subtitle files like foo.en-US.srt with --sub-auto=exact. To preserve the case of these locales and not convert them to e.g. en-us, stop lower casing filenames, and instead use case insensitive functions to check if the media filename is contained in the external filenames. Extensions, whitelisted cover art filenames and idx files were already being compared case insensitively. Fixes #12372, fixes #13251. --- player/external_files.c | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/player/external_files.c b/player/external_files.c index f0194a7d77..d2a8ac098c 100644 --- a/player/external_files.c +++ b/player/external_files.c @@ -117,6 +117,7 @@ static struct bstr guess_lang_from_filename(struct bstr name, int *lang_start) int lang_length = 0; int i = name.len - 1; + int suffixes_length = 0; char delimiter = '.'; if (name.start[i] == ')') { @@ -128,15 +129,31 @@ static struct bstr guess_lang_from_filename(struct bstr name, int *lang_start) i--; } - while (i >= 0 && mp_isalpha(name.start[i])) { - lang_length++; - if (lang_length > 3) - return (struct bstr){NULL, 0}; - i--; + while (true) { + while (i >= 0 && mp_isalpha(name.start[i])) { + lang_length++; + i--; + } + + // According to + // https://en.wikipedia.org/wiki/IETF_language_tag#Syntax_of_language_tags + // subtags after the first are composed of 1 to 8 letters. + if (lang_length < suffixes_length + 1 || lang_length > suffixes_length + 8) + return (struct bstr){0}; + + if (i >= 0 && name.start[i] == '-') { + lang_length++; + i--; + suffixes_length = lang_length; + } else { + break; + } } - if (lang_length < 2 || i == 0 || name.start[i] != delimiter) - return (struct bstr){NULL, 0}; + // The primary subtag can have 2 or 3 letters. + if (lang_length < suffixes_length + 2 || lang_length > suffixes_length + 3 || + i == 0 || name.start[i] != delimiter) + return (struct bstr){0}; *lang_start = i; return (struct bstr){name.start + i + 1, lang_length}; @@ -162,7 +179,6 @@ static void append_dir_subtitles(struct mpv_global *global, struct MPOpts *opts, struct bstr f_fname = mp_iconv_to_utf8(log, f_fbname, "UTF-8-MAC", MP_NO_LATIN1_FALLBACK); struct bstr f_fname_noext = bstrdup(tmpmem, bstr_strip_ext(f_fname)); - bstr_lower(f_fname_noext); struct bstr f_fname_trim = bstr_strip(f_fname_noext); if (f_fbname.start != f_fname.start) @@ -185,7 +201,6 @@ static void append_dir_subtitles(struct mpv_global *global, struct MPOpts *opts, "UTF-8-MAC", MP_NO_LATIN1_FALLBACK); // retrieve various parts of the filename struct bstr tmp_fname_noext = bstrdup(tmpmem2, bstr_strip_ext(dename)); - bstr_lower(tmp_fname_noext); struct bstr tmp_fname_ext = bstr_get_ext(dename); struct bstr tmp_fname_trim = bstr_strip(tmp_fname_noext); @@ -217,13 +232,13 @@ static void append_dir_subtitles(struct mpv_global *global, struct MPOpts *opts, // higher prio -> auto-selection may prefer it (0 = not loaded) int prio = 0; - if (bstrcmp(tmp_fname_trim, f_fname_trim) == 0) + if (bstrcasecmp(tmp_fname_trim, f_fname_trim) == 0) prio |= 32; // exact movie name match bstr lang = {0}; int start = 0; lang = guess_lang_from_filename(tmp_fname_trim, &start); - if (bstr_startswith(tmp_fname_trim, f_fname_trim)) { + if (bstr_case_startswith(tmp_fname_trim, f_fname_trim)) { if (lang.len && start == f_fname_trim.len) prio |= 16; // exact movie name + followed by lang