mirror of
https://github.com/mpv-player/mpv
synced 2025-04-29 14:50:14 +00:00
charset_conv: mp_msg conversions
This commit is contained in:
parent
0335011f11
commit
33c8fd789d
@ -37,6 +37,7 @@ static int d_check_file(struct demuxer *demuxer, enum demux_check check)
|
|||||||
{
|
{
|
||||||
const char *user_cp = demuxer->opts->sub_cp;
|
const char *user_cp = demuxer->opts->sub_cp;
|
||||||
struct stream *s = demuxer->stream;
|
struct stream *s = demuxer->stream;
|
||||||
|
struct mp_log *log = demuxer->log;
|
||||||
// Older versions of libass will behave strange if renderer and track
|
// Older versions of libass will behave strange if renderer and track
|
||||||
// library handles mismatch, so make sure everything uses a global handle.
|
// library handles mismatch, so make sure everything uses a global handle.
|
||||||
ASS_Library *lib = demuxer->params ? demuxer->params->ass_library : NULL;
|
ASS_Library *lib = demuxer->params ? demuxer->params->ass_library : NULL;
|
||||||
@ -56,7 +57,7 @@ static int d_check_file(struct demuxer *demuxer, enum demux_check check)
|
|||||||
memcpy(tmp, buf.start, buf.len);
|
memcpy(tmp, buf.start, buf.len);
|
||||||
buf.start = tmp;
|
buf.start = tmp;
|
||||||
buf.start[buf.len] = '\0';
|
buf.start[buf.len] = '\0';
|
||||||
bstr cbuf = mp_charset_guess_and_conv_to_utf8(buf, user_cp,
|
bstr cbuf = mp_charset_guess_and_conv_to_utf8(log, buf, user_cp,
|
||||||
MP_ICONV_ALLOW_CUTOFF);
|
MP_ICONV_ALLOW_CUTOFF);
|
||||||
if (cbuf.start == NULL)
|
if (cbuf.start == NULL)
|
||||||
cbuf = buf;
|
cbuf = buf;
|
||||||
@ -77,7 +78,7 @@ static int d_check_file(struct demuxer *demuxer, enum demux_check check)
|
|||||||
"larger than 100 MB: %s\n", demuxer->filename);
|
"larger than 100 MB: %s\n", demuxer->filename);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
bstr cbuf = mp_charset_guess_and_conv_to_utf8(buf, user_cp,
|
bstr cbuf = mp_charset_guess_and_conv_to_utf8(log, buf, user_cp,
|
||||||
MP_ICONV_VERBOSE);
|
MP_ICONV_VERBOSE);
|
||||||
if (cbuf.start == NULL)
|
if (cbuf.start == NULL)
|
||||||
cbuf = buf;
|
cbuf = buf;
|
||||||
|
@ -86,7 +86,7 @@ bool mp_charset_requires_guess(const char *user_cp)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if HAVE_ENCA
|
#if HAVE_ENCA
|
||||||
static const char *enca_guess(bstr buf, const char *language)
|
static const char *enca_guess(struct mp_log *log, bstr buf, const char *language)
|
||||||
{
|
{
|
||||||
if (!language || !language[0])
|
if (!language || !language[0])
|
||||||
language = "__"; // neutral language
|
language = "__"; // neutral language
|
||||||
@ -102,14 +102,14 @@ static const char *enca_guess(bstr buf, const char *language)
|
|||||||
detected_cp = tmp;
|
detected_cp = tmp;
|
||||||
enca_analyser_free(analyser);
|
enca_analyser_free(analyser);
|
||||||
} else {
|
} else {
|
||||||
mp_msg(MSGT_SUBREADER, MSGL_ERR, "ENCA doesn't know language '%s'\n",
|
mp_err(log, "ENCA doesn't know language '%s'\n",
|
||||||
language);
|
language);
|
||||||
size_t langcnt;
|
size_t langcnt;
|
||||||
const char **languages = enca_get_languages(&langcnt);
|
const char **languages = enca_get_languages(&langcnt);
|
||||||
mp_msg(MSGT_SUBREADER, MSGL_ERR, "ENCA supported languages:");
|
mp_err(log, "ENCA supported languages:");
|
||||||
for (int i = 0; i < langcnt; i++)
|
for (int i = 0; i < langcnt; i++)
|
||||||
mp_msg(MSGT_SUBREADER, MSGL_ERR, " %s", languages[i]);
|
mp_err(log, " %s", languages[i]);
|
||||||
mp_msg(MSGT_SUBREADER, MSGL_ERR, "\n");
|
mp_err(log, "\n");
|
||||||
free(languages);
|
free(languages);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -118,10 +118,11 @@ static const char *enca_guess(bstr buf, const char *language)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if HAVE_LIBGUESS
|
#if HAVE_LIBGUESS
|
||||||
static const char *libguess_guess(bstr buf, const char *language)
|
static const char *libguess_guess(struct mp_log *log, bstr buf,
|
||||||
|
const char *language)
|
||||||
{
|
{
|
||||||
if (!language || !language[0] || strcmp(language, "help") == 0) {
|
if (!language || !language[0] || strcmp(language, "help") == 0) {
|
||||||
mp_msg(MSGT_SUBREADER, MSGL_ERR, "libguess needs a language: "
|
mp_err(log, "libguess needs a language: "
|
||||||
"japanese taiwanese chinese korean russian arabic turkish "
|
"japanese taiwanese chinese korean russian arabic turkish "
|
||||||
"greek hebrew polish baltic\n");
|
"greek hebrew polish baltic\n");
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -136,7 +137,8 @@ static const char *libguess_guess(bstr buf, const char *language)
|
|||||||
// If user_cp doesn't refer to any known auto-detection (for example because
|
// If user_cp doesn't refer to any known auto-detection (for example because
|
||||||
// it's a real iconv codepage), user_cp is returned without even looking at
|
// it's a real iconv codepage), user_cp is returned without even looking at
|
||||||
// the buf data.
|
// the buf data.
|
||||||
const char *mp_charset_guess(bstr buf, const char *user_cp, int flags)
|
const char *mp_charset_guess(struct mp_log *log, bstr buf, const char *user_cp,
|
||||||
|
int flags)
|
||||||
{
|
{
|
||||||
if (!mp_charset_requires_guess(user_cp))
|
if (!mp_charset_requires_guess(user_cp))
|
||||||
return user_cp;
|
return user_cp;
|
||||||
@ -159,11 +161,11 @@ const char *mp_charset_guess(bstr buf, const char *user_cp, int flags)
|
|||||||
|
|
||||||
#if HAVE_ENCA
|
#if HAVE_ENCA
|
||||||
if (bstrcasecmp0(type, "enca") == 0)
|
if (bstrcasecmp0(type, "enca") == 0)
|
||||||
res = enca_guess(buf, lang);
|
res = enca_guess(log, buf, lang);
|
||||||
#endif
|
#endif
|
||||||
#if HAVE_LIBGUESS
|
#if HAVE_LIBGUESS
|
||||||
if (bstrcasecmp0(type, "guess") == 0)
|
if (bstrcasecmp0(type, "guess") == 0)
|
||||||
res = libguess_guess(buf, lang);
|
res = libguess_guess(log, buf, lang);
|
||||||
#endif
|
#endif
|
||||||
if (bstrcasecmp0(type, "utf8") == 0 || bstrcasecmp0(type, "utf-8") == 0) {
|
if (bstrcasecmp0(type, "utf8") == 0 || bstrcasecmp0(type, "utf-8") == 0) {
|
||||||
if (!fallback)
|
if (!fallback)
|
||||||
@ -171,12 +173,11 @@ const char *mp_charset_guess(bstr buf, const char *user_cp, int flags)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (res) {
|
if (res) {
|
||||||
mp_msg(MSGT_SUBREADER, MSGL_DBG2, "%.*s detected charset: '%s'\n",
|
mp_dbg(log, "%.*s detected charset: '%s'\n",
|
||||||
BSTR_P(type), res);
|
BSTR_P(type), res);
|
||||||
} else {
|
} else {
|
||||||
res = fallback;
|
res = fallback;
|
||||||
mp_msg(MSGT_SUBREADER, MSGL_DBG2,
|
mp_dbg(log, "Detection with %.*s failed: fallback to %s\n",
|
||||||
"Detection with %.*s failed: fallback to %s\n",
|
|
||||||
BSTR_P(type), res && res[0] ? res : "broken UTF-8/Latin1");
|
BSTR_P(type), res && res[0] ? res : "broken UTF-8/Latin1");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -194,9 +195,11 @@ const char *mp_charset_guess(bstr buf, const char *user_cp, int flags)
|
|||||||
// user_cp: iconv codepage, special value, NULL
|
// user_cp: iconv codepage, special value, NULL
|
||||||
// flags: same as mp_iconv_to_utf8()
|
// flags: same as mp_iconv_to_utf8()
|
||||||
// returns: same as mp_iconv_to_utf8()
|
// returns: same as mp_iconv_to_utf8()
|
||||||
bstr mp_charset_guess_and_conv_to_utf8(bstr buf, const char *user_cp, int flags)
|
bstr mp_charset_guess_and_conv_to_utf8(struct mp_log *log, bstr buf,
|
||||||
|
const char *user_cp, int flags)
|
||||||
{
|
{
|
||||||
return mp_iconv_to_utf8(buf, mp_charset_guess(buf, user_cp, flags), flags);
|
return mp_iconv_to_utf8(log, buf, mp_charset_guess(log, buf, user_cp, flags),
|
||||||
|
flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Use iconv to convert buf to UTF-8.
|
// Use iconv to convert buf to UTF-8.
|
||||||
@ -210,7 +213,7 @@ bstr mp_charset_guess_and_conv_to_utf8(bstr buf, const char *user_cp, int flags)
|
|||||||
// cp: iconv codepage (or NULL)
|
// cp: iconv codepage (or NULL)
|
||||||
// flags: combination of MP_ICONV_* flags
|
// flags: combination of MP_ICONV_* flags
|
||||||
// returns: buf (no conversion), .start==NULL (error), or allocated buffer
|
// returns: buf (no conversion), .start==NULL (error), or allocated buffer
|
||||||
bstr mp_iconv_to_utf8(bstr buf, const char *cp, int flags)
|
bstr mp_iconv_to_utf8(struct mp_log *log, bstr buf, const char *cp, int flags)
|
||||||
{
|
{
|
||||||
#if HAVE_ICONV
|
#if HAVE_ICONV
|
||||||
if (!cp || !cp[0] || mp_charset_is_utf8(cp))
|
if (!cp || !cp[0] || mp_charset_is_utf8(cp))
|
||||||
@ -225,8 +228,7 @@ bstr mp_iconv_to_utf8(bstr buf, const char *cp, int flags)
|
|||||||
iconv_t icdsc;
|
iconv_t icdsc;
|
||||||
if ((icdsc = iconv_open("UTF-8", cp)) == (iconv_t) (-1)) {
|
if ((icdsc = iconv_open("UTF-8", cp)) == (iconv_t) (-1)) {
|
||||||
if (flags & MP_ICONV_VERBOSE)
|
if (flags & MP_ICONV_VERBOSE)
|
||||||
mp_msg(MSGT_SUBREADER, MSGL_ERR,
|
mp_err(log, "Error opening iconv with codepage '%s'\n", cp);
|
||||||
"Error opening iconv with codepage '%s'\n", cp);
|
|
||||||
goto failure;
|
goto failure;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -265,8 +267,7 @@ bstr mp_iconv_to_utf8(bstr buf, const char *cp, int flags)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (flags & MP_ICONV_VERBOSE) {
|
if (flags & MP_ICONV_VERBOSE) {
|
||||||
mp_msg(MSGT_SUBREADER, MSGL_ERR,
|
mp_err(log, "Error recoding text with codepage '%s'\n", cp);
|
||||||
"Error recoding text with codepage '%s'\n", cp);
|
|
||||||
}
|
}
|
||||||
talloc_free(outbuf);
|
talloc_free(outbuf);
|
||||||
iconv_close(icdsc);
|
iconv_close(icdsc);
|
||||||
|
@ -4,6 +4,8 @@
|
|||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include "bstr/bstr.h"
|
#include "bstr/bstr.h"
|
||||||
|
|
||||||
|
struct mp_log;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
MP_ICONV_VERBOSE = 1, // print errors instead of failing silently
|
MP_ICONV_VERBOSE = 1, // print errors instead of failing silently
|
||||||
MP_ICONV_ALLOW_CUTOFF = 2, // allow partial input data
|
MP_ICONV_ALLOW_CUTOFF = 2, // allow partial input data
|
||||||
@ -12,8 +14,10 @@ enum {
|
|||||||
|
|
||||||
bool mp_charset_is_utf8(const char *user_cp);
|
bool mp_charset_is_utf8(const char *user_cp);
|
||||||
bool mp_charset_requires_guess(const char *user_cp);
|
bool mp_charset_requires_guess(const char *user_cp);
|
||||||
const char *mp_charset_guess(bstr buf, const char *user_cp, int flags);
|
const char *mp_charset_guess(struct mp_log *log, bstr buf, const char *user_cp,
|
||||||
bstr mp_charset_guess_and_conv_to_utf8(bstr buf, const char *user_cp, int flags);
|
int flags);
|
||||||
bstr mp_iconv_to_utf8(bstr buf, const char *cp, int flags);
|
bstr mp_charset_guess_and_conv_to_utf8(struct mp_log *log, bstr buf,
|
||||||
|
const char *user_cp, int flags);
|
||||||
|
bstr mp_iconv_to_utf8(struct mp_log *log, bstr buf, const char *cp, int flags);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -233,7 +233,7 @@ static struct demux_packet *recode_packet(struct mp_log *log,
|
|||||||
{
|
{
|
||||||
struct demux_packet *pkt = NULL;
|
struct demux_packet *pkt = NULL;
|
||||||
bstr in_buf = {in->buffer, in->len};
|
bstr in_buf = {in->buffer, in->len};
|
||||||
bstr conv = mp_iconv_to_utf8(in_buf, charset, MP_ICONV_VERBOSE);
|
bstr conv = mp_iconv_to_utf8(log, in_buf, charset, MP_ICONV_VERBOSE);
|
||||||
if (conv.start && conv.start != in_buf.start) {
|
if (conv.start && conv.start != in_buf.start) {
|
||||||
pkt = talloc_ptrtype(NULL, pkt);
|
pkt = talloc_ptrtype(NULL, pkt);
|
||||||
talloc_steal(pkt, conv.start);
|
talloc_steal(pkt, conv.start);
|
||||||
@ -292,7 +292,7 @@ static const char *guess_sub_cp(struct mp_log *log, struct packet_list *subs,
|
|||||||
memcpy(text.start + text.len + pkt->len, sep, sep_len);
|
memcpy(text.start + text.len + pkt->len, sep, sep_len);
|
||||||
text.len += pkt->len + sep_len;
|
text.len += pkt->len + sep_len;
|
||||||
}
|
}
|
||||||
const char *guess = mp_charset_guess(text, usercp, 0);
|
const char *guess = mp_charset_guess(log, text, usercp, 0);
|
||||||
talloc_free(text.start);
|
talloc_free(text.start);
|
||||||
return guess;
|
return guess;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user