1
0
mirror of https://github.com/mpv-player/mpv synced 2025-04-29 14:50:14 +00:00

charset_conv: mp_msg conversions

This commit is contained in:
wm4 2013-12-21 20:37:16 +01:00
parent 0335011f11
commit 33c8fd789d
4 changed files with 33 additions and 27 deletions

View File

@ -37,6 +37,7 @@ static int d_check_file(struct demuxer *demuxer, enum demux_check check)
{ {
const char *user_cp = demuxer->opts->sub_cp; const char *user_cp = demuxer->opts->sub_cp;
struct stream *s = demuxer->stream; struct stream *s = demuxer->stream;
struct mp_log *log = demuxer->log;
// Older versions of libass will behave strange if renderer and track // Older versions of libass will behave strange if renderer and track
// library handles mismatch, so make sure everything uses a global handle. // library handles mismatch, so make sure everything uses a global handle.
ASS_Library *lib = demuxer->params ? demuxer->params->ass_library : NULL; ASS_Library *lib = demuxer->params ? demuxer->params->ass_library : NULL;
@ -56,7 +57,7 @@ static int d_check_file(struct demuxer *demuxer, enum demux_check check)
memcpy(tmp, buf.start, buf.len); memcpy(tmp, buf.start, buf.len);
buf.start = tmp; buf.start = tmp;
buf.start[buf.len] = '\0'; buf.start[buf.len] = '\0';
bstr cbuf = mp_charset_guess_and_conv_to_utf8(buf, user_cp, bstr cbuf = mp_charset_guess_and_conv_to_utf8(log, buf, user_cp,
MP_ICONV_ALLOW_CUTOFF); MP_ICONV_ALLOW_CUTOFF);
if (cbuf.start == NULL) if (cbuf.start == NULL)
cbuf = buf; cbuf = buf;
@ -77,7 +78,7 @@ static int d_check_file(struct demuxer *demuxer, enum demux_check check)
"larger than 100 MB: %s\n", demuxer->filename); "larger than 100 MB: %s\n", demuxer->filename);
return -1; return -1;
} }
bstr cbuf = mp_charset_guess_and_conv_to_utf8(buf, user_cp, bstr cbuf = mp_charset_guess_and_conv_to_utf8(log, buf, user_cp,
MP_ICONV_VERBOSE); MP_ICONV_VERBOSE);
if (cbuf.start == NULL) if (cbuf.start == NULL)
cbuf = buf; cbuf = buf;

View File

@ -86,7 +86,7 @@ bool mp_charset_requires_guess(const char *user_cp)
} }
#if HAVE_ENCA #if HAVE_ENCA
static const char *enca_guess(bstr buf, const char *language) static const char *enca_guess(struct mp_log *log, bstr buf, const char *language)
{ {
if (!language || !language[0]) if (!language || !language[0])
language = "__"; // neutral language language = "__"; // neutral language
@ -102,14 +102,14 @@ static const char *enca_guess(bstr buf, const char *language)
detected_cp = tmp; detected_cp = tmp;
enca_analyser_free(analyser); enca_analyser_free(analyser);
} else { } else {
mp_msg(MSGT_SUBREADER, MSGL_ERR, "ENCA doesn't know language '%s'\n", mp_err(log, "ENCA doesn't know language '%s'\n",
language); language);
size_t langcnt; size_t langcnt;
const char **languages = enca_get_languages(&langcnt); const char **languages = enca_get_languages(&langcnt);
mp_msg(MSGT_SUBREADER, MSGL_ERR, "ENCA supported languages:"); mp_err(log, "ENCA supported languages:");
for (int i = 0; i < langcnt; i++) for (int i = 0; i < langcnt; i++)
mp_msg(MSGT_SUBREADER, MSGL_ERR, " %s", languages[i]); mp_err(log, " %s", languages[i]);
mp_msg(MSGT_SUBREADER, MSGL_ERR, "\n"); mp_err(log, "\n");
free(languages); free(languages);
} }
@ -118,10 +118,11 @@ static const char *enca_guess(bstr buf, const char *language)
#endif #endif
#if HAVE_LIBGUESS #if HAVE_LIBGUESS
static const char *libguess_guess(bstr buf, const char *language) static const char *libguess_guess(struct mp_log *log, bstr buf,
const char *language)
{ {
if (!language || !language[0] || strcmp(language, "help") == 0) { if (!language || !language[0] || strcmp(language, "help") == 0) {
mp_msg(MSGT_SUBREADER, MSGL_ERR, "libguess needs a language: " mp_err(log, "libguess needs a language: "
"japanese taiwanese chinese korean russian arabic turkish " "japanese taiwanese chinese korean russian arabic turkish "
"greek hebrew polish baltic\n"); "greek hebrew polish baltic\n");
return NULL; return NULL;
@ -136,7 +137,8 @@ static const char *libguess_guess(bstr buf, const char *language)
// If user_cp doesn't refer to any known auto-detection (for example because // If user_cp doesn't refer to any known auto-detection (for example because
// it's a real iconv codepage), user_cp is returned without even looking at // it's a real iconv codepage), user_cp is returned without even looking at
// the buf data. // the buf data.
const char *mp_charset_guess(bstr buf, const char *user_cp, int flags) const char *mp_charset_guess(struct mp_log *log, bstr buf, const char *user_cp,
int flags)
{ {
if (!mp_charset_requires_guess(user_cp)) if (!mp_charset_requires_guess(user_cp))
return user_cp; return user_cp;
@ -159,11 +161,11 @@ const char *mp_charset_guess(bstr buf, const char *user_cp, int flags)
#if HAVE_ENCA #if HAVE_ENCA
if (bstrcasecmp0(type, "enca") == 0) if (bstrcasecmp0(type, "enca") == 0)
res = enca_guess(buf, lang); res = enca_guess(log, buf, lang);
#endif #endif
#if HAVE_LIBGUESS #if HAVE_LIBGUESS
if (bstrcasecmp0(type, "guess") == 0) if (bstrcasecmp0(type, "guess") == 0)
res = libguess_guess(buf, lang); res = libguess_guess(log, buf, lang);
#endif #endif
if (bstrcasecmp0(type, "utf8") == 0 || bstrcasecmp0(type, "utf-8") == 0) { if (bstrcasecmp0(type, "utf8") == 0 || bstrcasecmp0(type, "utf-8") == 0) {
if (!fallback) if (!fallback)
@ -171,12 +173,11 @@ const char *mp_charset_guess(bstr buf, const char *user_cp, int flags)
} }
if (res) { if (res) {
mp_msg(MSGT_SUBREADER, MSGL_DBG2, "%.*s detected charset: '%s'\n", mp_dbg(log, "%.*s detected charset: '%s'\n",
BSTR_P(type), res); BSTR_P(type), res);
} else { } else {
res = fallback; res = fallback;
mp_msg(MSGT_SUBREADER, MSGL_DBG2, mp_dbg(log, "Detection with %.*s failed: fallback to %s\n",
"Detection with %.*s failed: fallback to %s\n",
BSTR_P(type), res && res[0] ? res : "broken UTF-8/Latin1"); BSTR_P(type), res && res[0] ? res : "broken UTF-8/Latin1");
} }
@ -194,9 +195,11 @@ const char *mp_charset_guess(bstr buf, const char *user_cp, int flags)
// user_cp: iconv codepage, special value, NULL // user_cp: iconv codepage, special value, NULL
// flags: same as mp_iconv_to_utf8() // flags: same as mp_iconv_to_utf8()
// returns: same as mp_iconv_to_utf8() // returns: same as mp_iconv_to_utf8()
bstr mp_charset_guess_and_conv_to_utf8(bstr buf, const char *user_cp, int flags) bstr mp_charset_guess_and_conv_to_utf8(struct mp_log *log, bstr buf,
const char *user_cp, int flags)
{ {
return mp_iconv_to_utf8(buf, mp_charset_guess(buf, user_cp, flags), flags); return mp_iconv_to_utf8(log, buf, mp_charset_guess(log, buf, user_cp, flags),
flags);
} }
// Use iconv to convert buf to UTF-8. // Use iconv to convert buf to UTF-8.
@ -210,7 +213,7 @@ bstr mp_charset_guess_and_conv_to_utf8(bstr buf, const char *user_cp, int flags)
// cp: iconv codepage (or NULL) // cp: iconv codepage (or NULL)
// flags: combination of MP_ICONV_* flags // flags: combination of MP_ICONV_* flags
// returns: buf (no conversion), .start==NULL (error), or allocated buffer // returns: buf (no conversion), .start==NULL (error), or allocated buffer
bstr mp_iconv_to_utf8(bstr buf, const char *cp, int flags) bstr mp_iconv_to_utf8(struct mp_log *log, bstr buf, const char *cp, int flags)
{ {
#if HAVE_ICONV #if HAVE_ICONV
if (!cp || !cp[0] || mp_charset_is_utf8(cp)) if (!cp || !cp[0] || mp_charset_is_utf8(cp))
@ -225,8 +228,7 @@ bstr mp_iconv_to_utf8(bstr buf, const char *cp, int flags)
iconv_t icdsc; iconv_t icdsc;
if ((icdsc = iconv_open("UTF-8", cp)) == (iconv_t) (-1)) { if ((icdsc = iconv_open("UTF-8", cp)) == (iconv_t) (-1)) {
if (flags & MP_ICONV_VERBOSE) if (flags & MP_ICONV_VERBOSE)
mp_msg(MSGT_SUBREADER, MSGL_ERR, mp_err(log, "Error opening iconv with codepage '%s'\n", cp);
"Error opening iconv with codepage '%s'\n", cp);
goto failure; goto failure;
} }
@ -265,8 +267,7 @@ bstr mp_iconv_to_utf8(bstr buf, const char *cp, int flags)
break; break;
} }
if (flags & MP_ICONV_VERBOSE) { if (flags & MP_ICONV_VERBOSE) {
mp_msg(MSGT_SUBREADER, MSGL_ERR, mp_err(log, "Error recoding text with codepage '%s'\n", cp);
"Error recoding text with codepage '%s'\n", cp);
} }
talloc_free(outbuf); talloc_free(outbuf);
iconv_close(icdsc); iconv_close(icdsc);

View File

@ -4,6 +4,8 @@
#include <stdbool.h> #include <stdbool.h>
#include "bstr/bstr.h" #include "bstr/bstr.h"
struct mp_log;
enum { enum {
MP_ICONV_VERBOSE = 1, // print errors instead of failing silently MP_ICONV_VERBOSE = 1, // print errors instead of failing silently
MP_ICONV_ALLOW_CUTOFF = 2, // allow partial input data MP_ICONV_ALLOW_CUTOFF = 2, // allow partial input data
@ -12,8 +14,10 @@ enum {
bool mp_charset_is_utf8(const char *user_cp); bool mp_charset_is_utf8(const char *user_cp);
bool mp_charset_requires_guess(const char *user_cp); bool mp_charset_requires_guess(const char *user_cp);
const char *mp_charset_guess(bstr buf, const char *user_cp, int flags); const char *mp_charset_guess(struct mp_log *log, bstr buf, const char *user_cp,
bstr mp_charset_guess_and_conv_to_utf8(bstr buf, const char *user_cp, int flags); int flags);
bstr mp_iconv_to_utf8(bstr buf, const char *cp, int flags); bstr mp_charset_guess_and_conv_to_utf8(struct mp_log *log, bstr buf,
const char *user_cp, int flags);
bstr mp_iconv_to_utf8(struct mp_log *log, bstr buf, const char *cp, int flags);
#endif #endif

View File

@ -233,7 +233,7 @@ static struct demux_packet *recode_packet(struct mp_log *log,
{ {
struct demux_packet *pkt = NULL; struct demux_packet *pkt = NULL;
bstr in_buf = {in->buffer, in->len}; bstr in_buf = {in->buffer, in->len};
bstr conv = mp_iconv_to_utf8(in_buf, charset, MP_ICONV_VERBOSE); bstr conv = mp_iconv_to_utf8(log, in_buf, charset, MP_ICONV_VERBOSE);
if (conv.start && conv.start != in_buf.start) { if (conv.start && conv.start != in_buf.start) {
pkt = talloc_ptrtype(NULL, pkt); pkt = talloc_ptrtype(NULL, pkt);
talloc_steal(pkt, conv.start); talloc_steal(pkt, conv.start);
@ -292,7 +292,7 @@ static const char *guess_sub_cp(struct mp_log *log, struct packet_list *subs,
memcpy(text.start + text.len + pkt->len, sep, sep_len); memcpy(text.start + text.len + pkt->len, sep, sep_len);
text.len += pkt->len + sep_len; text.len += pkt->len + sep_len;
} }
const char *guess = mp_charset_guess(text, usercp, 0); const char *guess = mp_charset_guess(log, text, usercp, 0);
talloc_free(text.start); talloc_free(text.start);
return guess; return guess;
} }