diff --git a/Makefile b/Makefile index 5a41fd114a..56a3b33d07 100644 --- a/Makefile +++ b/Makefile @@ -35,7 +35,7 @@ OBJS_MPLAYER = $(SRCS_MPLAYER:.c=.o) VO_LIBS = $(AA_LIB) $(X_LIB) $(SDL_LIB) $(GGI_LIB) $(MP1E_LIB) $(MLIB_LIB) $(SVGA_LIB) $(DIRECTFB_LIB) $(CACA_LIB) AO_LIBS = $(ARTS_LIB) $(ESD_LIB) $(NAS_LIB) $(SGIAUDIO_LIB) CODEC_LIBS = $(AV_LIB) $(FAME_LIB) $(MAD_LIB) $(VORBIS_LIB) $(THEORA_LIB) $(FAAD_LIB) $(LIBLZO_LIB) $(DECORE_LIB) $(XVID_LIB) $(PNG_LIB) $(Z_LIB) $(JPEG_LIB) $(ALSA_LIB) $(XMMS_LIB) $(MATROSKA_LIB) -COMMON_LIBS = libmpcodecs/libmpcodecs.a mp3lib/libMP3.a liba52/liba52.a libmpeg2/libmpeg2.a $(W32_LIB) $(DS_LIB) libaf/libaf.a libmpdemux/libmpdemux.a input/libinput.a postproc/libswscale.a osdep/libosdep.a $(DVDREAD_LIB) $(CODEC_LIBS) $(FREETYPE_LIB) $(TERMCAP_LIB) $(CDPARANOIA_LIB) $(MPLAYER_NETWORK_LIB) $(WIN32_LIB) $(GIF_LIB) $(MACOSX_FRAMEWORKS) $(SMBSUPPORT_LIB) $(FRIBIDI_LIB) $(FONTCONFIG_LIB) +COMMON_LIBS = libmpcodecs/libmpcodecs.a mp3lib/libMP3.a liba52/liba52.a libmpeg2/libmpeg2.a $(W32_LIB) $(DS_LIB) libaf/libaf.a libmpdemux/libmpdemux.a input/libinput.a postproc/libswscale.a osdep/libosdep.a $(DVDREAD_LIB) $(CODEC_LIBS) $(FREETYPE_LIB) $(TERMCAP_LIB) $(CDPARANOIA_LIB) $(MPLAYER_NETWORK_LIB) $(WIN32_LIB) $(GIF_LIB) $(MACOSX_FRAMEWORKS) $(SMBSUPPORT_LIB) $(FRIBIDI_LIB) $(FONTCONFIG_LIB) $(ENCA_LIB) CFLAGS = $(OPTFLAGS) -Ilibmpdemux -Iloader -Ilibvo $(FREETYPE_INC) $(EXTRA_INC) $(CDPARANOIA_INC) $(SDL_INC) $(X11_INC) $(FRIBIDI_INC) $(DVB_INC) $(XVID_INC) $(FONTCONFIG_INC) # -Wall diff --git a/configure b/configure index 6d1d11aee7..83523e1c2a 100755 --- a/configure +++ b/configure @@ -178,6 +178,7 @@ Optional features: --enable-menu Enable OSD menu support (NOT DVD MENU) [disabled] --disable-sortsub Disable subtitles sorting [enabled] --enable-fribidi Enable using the FriBiDi libs [disabled] + --disable-enca Disable using ENCA charset oracle library [autodetect] --disable-macosx Disable Mac OS X specific features [autodetect] --disable-inet6 Disable IPv6 support [autodetect] --disable-gethostbyname2 gethostbyname() function is not provided by the C @@ -1248,6 +1249,7 @@ _sortsub=yes _freetypeconfig='freetype-config' _fribidi=no _fribidiconfig='fribidi-config' +_enca=auto _inet6=auto _gethostbyname2=auto _ftp=yes @@ -1472,6 +1474,9 @@ for ac_option do --enable-fribidi) _fribidi=yes ;; --disable-fribidi) _fribidi=no ;; + --enable-enca) _enca=yes ;; + --disable-enca) _enca=no ;; + --enable-inet6) _inet6=yes ;; --disable-inet6) _inet6=no ;; @@ -4440,6 +4445,30 @@ fi echores "$_fribidi" +echocheck "ENCA" +if test "$_enca" = auto ; then + cat > $TMPC << EOF +#include +int main() +{ + const char **langs; + size_t langcnt; + langs = enca_get_languages(&langcnt); + return 0; +} +EOF + _enca=no + cc_check -lenca && _enca=yes + if test "$_enca" = yes ; then + _def_enca='#define HAVE_ENCA 1' + _ld_enca='-lenca' + else + _def_enca='#undef HAVE_ENCA' + fi +fi +echores "$_enca" + + echocheck "zlib" cat > $TMPC << EOF #include @@ -5966,6 +5995,7 @@ Z_LIB = $_ld_static $_ld_zlib HAVE_MLIB = $_mlib WIN32_LIB = $_ld_win32libs STATIC_LIB = $_ld_static +ENCA_LIB = $_ld_enca X11_INC = $_inc_x11 X11DIR = $_ld_x11 @@ -6515,6 +6545,9 @@ $_def_fontconfig /* enable FriBiDi usage */ $_def_fribidi +/* enable ENCA usage */ +$_def_enca + /* liblzo support */ $_def_liblzo diff --git a/libmpdemux/demux_mkv.c b/libmpdemux/demux_mkv.c index c3e1440f37..13ab9cbeab 100644 --- a/libmpdemux/demux_mkv.c +++ b/libmpdemux/demux_mkv.c @@ -1832,7 +1832,7 @@ demux_mkv_open (demuxer_t *demuxer) char *str; #ifdef USE_ICONV - subcp_open(); + subcp_open_noenca(); #endif stream_seek(s, s->start_pos); diff --git a/libmpdemux/demux_ogg.c b/libmpdemux/demux_ogg.c index c8ff7bc628..e6410327e6 100644 --- a/libmpdemux/demux_ogg.c +++ b/libmpdemux/demux_ogg.c @@ -664,7 +664,7 @@ int demux_ogg_open(demuxer_t* demuxer) { sh_video_t* sh_v; #ifdef USE_ICONV - subcp_open(); + subcp_open_noenca(); #endif clear_sub = -1; diff --git a/subreader.c b/subreader.c index 430cd66dae..b492705f46 100644 --- a/subreader.c +++ b/subreader.c @@ -19,6 +19,10 @@ #include "mp_msg.h" #include "subreader.h" +#ifdef HAVE_ENCA +#include +#endif + #define ERR ((void *) -1) #ifdef USE_ICONV @@ -1037,12 +1041,30 @@ extern float sub_fps; #ifdef USE_ICONV static iconv_t icdsc = (iconv_t)(-1); -void subcp_open (void) +#ifdef HAVE_ENCA +void subcp_open_noenca () +{ + char enca_lang[100], enca_fallback[100]; + if (sscanf(sub_cp, "enca:%2s:%s", enca_lang, enca_fallback) == 2 + || sscanf(sub_cp, "ENCA:%2s:%s", enca_lang, enca_fallback) == 2) { + subcp_open(enca_fallback); + } else { + subcp_open(sub_cp); + } +} +#else +void subcp_open_noenca () +{ + subcp_open(sub_cp); +} +#endif + +void subcp_open (char *current_sub_cp) { char *tocp = "UTF-8"; - if (sub_cp){ - if ((icdsc = iconv_open (tocp, sub_cp)) != (iconv_t)(-1)){ + if (current_sub_cp){ + if ((icdsc = iconv_open (tocp, current_sub_cp)) != (iconv_t)(-1)){ mp_msg(MSGT_SUBREADER,MSGL_V,"SUB: opened iconv descriptor.\n"); sub_utf8 = 2; } else @@ -1246,13 +1268,56 @@ struct subreader { const char *name; }; +#ifdef HAVE_ENCA +#define MAX_GUESS_BUFFER_SIZE (256*1024) +void* guess_cp(FILE *fd, char *preferred_language, char *fallback) +{ + const char **languages; + size_t langcnt, buflen; + EncaAnalyser analyser; + EncaEncoding encoding; + unsigned char *buffer; + char *detected_sub_cp = NULL; + int i; + + buffer = (unsigned char*)malloc(MAX_GUESS_BUFFER_SIZE*sizeof(char)); + buflen = fread(buffer, 1, MAX_GUESS_BUFFER_SIZE, fd); + + languages = enca_get_languages(&langcnt); + mp_msg(MSGT_SUBREADER, MSGL_V, "ENCA supported languages: "); + for (i = 0; i < langcnt; i++) { + mp_msg(MSGT_SUBREADER, MSGL_V, "%s ", languages[i]); + } + mp_msg(MSGT_SUBREADER, MSGL_V, "\n"); + + for (i = 0; i < langcnt; i++) { + if (strcasecmp(languages[i], preferred_language) != 0) continue; + analyser = enca_analyser_alloc(languages[i]); + encoding = enca_analyse_const(analyser, buffer, buflen); + mp_msg(MSGT_SUBREADER, MSGL_INFO, "ENCA detected charset: %s\n", enca_charset_name(encoding.charset, ENCA_NAME_STYLE_ICONV)); + detected_sub_cp = strdup(enca_charset_name(encoding.charset, ENCA_NAME_STYLE_ICONV)); + enca_analyser_free(analyser); + } + + free(languages); + free(buffer); + rewind(fd); + + if (!detected_sub_cp) detected_sub_cp = strdup(fallback); + + return detected_sub_cp; +} +#endif + sub_data* sub_read_file (char *filename, float fps) { //filename is assumed to be malloc'ed, free() is used in sub_free() FILE *fd; int n_max, n_first, i, j, sub_first, sub_orig; subtitle *first, *second, *sub, *return_sub; sub_data *subt_data; + char enca_lang[100], enca_fallback[100]; int uses_time = 0, sub_num = 0, sub_errs = 0; + char *current_sub_cp=NULL; struct subreader sr[]= { { sub_read_line_microdvd, NULL, "microdvd" }, @@ -1283,6 +1348,17 @@ sub_data* sub_read_file (char *filename, float fps) { rewind (fd); +#ifdef HAVE_ENCA + if (sscanf(sub_cp, "enca:%2s:%s", enca_lang, enca_fallback) == 2 + || sscanf(sub_cp, "ENCA:%2s:%s", enca_lang, enca_fallback) == 2) { + current_sub_cp = guess_cp(fd, enca_lang, enca_fallback); + } else { + current_sub_cp = strdup(sub_cp); + } +#else + current_sub_cp = strdup(sub_cp); +#endif + #ifdef USE_ICONV sub_utf8_prev=sub_utf8; { @@ -1296,9 +1372,10 @@ sub_data* sub_read_file (char *filename, float fps) { break; } } - if (k<0) subcp_open(); + if (k<0) subcp_open(current_sub_cp); } #endif + if (current_sub_cp) free(current_sub_cp); sub_num=0;n_max=32; first=(subtitle *)malloc(n_max*sizeof(subtitle)); @@ -1790,7 +1867,11 @@ char** sub_filenames(char* path, char *fname) // does it end with a subtitle extension? found = 0; #ifdef USE_ICONV +#ifdef HAVE_ENCA + for (i = ((sub_cp && strncasecmp(sub_cp, "enca", 4) != 0) ? 3 : 0); sub_exts[i]; i++) { +#else for (i = (sub_cp ? 3 : 0); sub_exts[i]; i++) { +#endif #else for (i = 0; sub_exts[i]; i++) { #endif diff --git a/subreader.h b/subreader.h index 5b83becf7b..6fca6c5869 100644 --- a/subreader.h +++ b/subreader.h @@ -53,7 +53,8 @@ typedef struct { sub_data* sub_read_file (char *filename, float pts); subtitle* subcp_recode1 (subtitle *sub); -void subcp_open (void); /* for demux_ogg.c */ +void subcp_open (char *current_sub_cp); /* for demux_ogg.c */ +void subcp_open_noenca (); /* for demux_ogg.c */ void subcp_close (void); /* for demux_ogg.c */ char ** sub_filenames(char *path, char *fname); void list_sub_file(sub_data* subd);