sub: libguess support for -subcp

Actually this is rather disappointing.
This commit is contained in:
wm4 2013-06-24 23:06:34 +02:00
parent 709389ce65
commit f48829b546
3 changed files with 57 additions and 1 deletions

View File

@ -2044,6 +2044,15 @@
- ``--subcp=enca:pl`` guess the encoding for Polish, fall back on UTF-8.
- ``--subcp=enca`` try universal detection, fall back on UTF-8.
If the player was compiled with libguess support you can use it with:
``--subcp=guess:<language>:<fallback codepage>``
Note that libguess always needs a language. There is no universal detection
mode. Use ``--subcp=guess:help`` to get a list of languages (like with ENCA,
it will be printed only if the conversion code is somehow called, for
example when loading an external subtitle).
--sub-delay=<sec>
Delays subtitles by <sec> seconds. Can be negative.

21
configure vendored
View File

@ -292,6 +292,7 @@ Installation directories:
Optional features:
--disable-encoding disable encoding functionality [enable]
--disable-libguess disable libguess [autodetect]
--enable-termcap use termcap database for key codes [autodetect]
--enable-termios use termios database for key codes [autodetect]
--disable-iconv disable iconv for encoding conversion [autodetect]
@ -463,6 +464,7 @@ networking=yes
_winsock2_h=auto
_smb=auto
_libquvi=auto
_libguess=auto
_joystick=no
_lirc=auto
_lircc=auto
@ -663,6 +665,8 @@ for ac_option do
--disable-smb) _smb=no ;;
--enable-libquvi) _libquvi=yes ;;
--disable-libquvi) _libquvi=no ;;
--enable-libguess) _libguess=yes ;;
--disable-libguess) _libguess=no ;;
--enable-joystick) _joystick=yes ;;
--disable-joystick) _joystick=no ;;
--enable-libav) ffmpeg=yes ;;
@ -1685,6 +1689,21 @@ else
fi
echocheck "libguess support"
if test "$_libguess" = auto ; then
_libguess=no
if pkg_config_add 'libguess >= 1.0' ; then
_libguess=yes
fi
fi
if test "$_libguess" = yes; then
def_libguess="#define CONFIG_LIBGUESS 1"
else
def_libguess="#undef CONFIG_LIBGUESS"
fi
echores "$_libguess"
echocheck "Samba support (libsmbclient)"
if test "$_smb" = yes; then
libs_mplayer="$libs_mplayer -lsmbclient"
@ -3128,6 +3147,7 @@ VF_LAVFI = $vf_lavfi
AF_LAVFI = $af_lavfi
LIBSMBCLIENT = $_smb
LIBQUVI = $_libquvi
LIBGUESS = $_libguess
LIBTHEORA = $_theora
LIRC = $_lirc
MACOSX_BUNDLE = $_macosx_bundle
@ -3326,6 +3346,7 @@ $def_inet_pton
$def_networking
$def_smb
$def_libquvi
$def_libguess
$def_socklen_t
$def_vstream

View File

@ -31,6 +31,10 @@
#include <enca.h>
#endif
#ifdef CONFIG_LIBGUESS
#include <libguess.h>
#endif
#ifdef CONFIG_ICONV
#include <iconv.h>
#endif
@ -67,7 +71,8 @@ bool mp_charset_requires_guess(const char *user_cp)
{
bstr res[2] = {{0}};
split_colon(user_cp, 2, res);
return bstrcasecmp0(res[0], "enca") == 0;
return bstrcasecmp0(res[0], "enca") == 0 ||
bstrcasecmp0(res[0], "guess") == 0;
}
#ifdef CONFIG_ENCA
@ -102,6 +107,23 @@ static const char *enca_guess(bstr buf, const char *language)
}
#endif
#ifdef CONFIG_LIBGUESS
static const char *libguess_guess(bstr buf, const char *language)
{
if (libguess_validate_utf8(buf.start, buf.len))
return "UTF-8";
if (!language || !language[0] || strcmp(language, "help") == 0) {
mp_msg(MSGT_SUBREADER, MSGL_ERR, "libguess needs a language: "
"japanese taiwanese chinese korean russian arabic turkish "
"greek hebrew polish baltic\n");
return NULL;
}
return libguess_determine_encoding(buf.start, buf.len, language);
}
#endif
// Runs charset auto-detection on the input buffer, and returns the result.
// If auto-detection fails, NULL is returned.
// If user_cp doesn't refer to any known auto-detection (for example because
@ -126,6 +148,10 @@ const char *mp_charset_guess(bstr buf, const char *user_cp)
if (bstrcasecmp0(type, "enca") == 0)
res = enca_guess(buf, lang);
#endif
#ifdef CONFIG_LIBGUESS
if (bstrcasecmp0(type, "guess") == 0)
res = libguess_guess(buf, lang);
#endif
if (res) {
mp_msg(MSGT_SUBREADER, MSGL_DBG2, "%.*s detected charset: '%s'\n",