From f796399344a1bb2ecdf9e273ea62dfa53e33908a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20B=C5=93sch?= Date: Mon, 7 Jan 2013 18:08:56 +0100 Subject: [PATCH] lavc: support subtitles character encoding conversion. --- Changelog | 1 + configure | 2 + libavcodec/avcodec.h | 18 +++++++ libavcodec/options_table.h | 5 ++ libavcodec/utils.c | 106 +++++++++++++++++++++++++++++++++++-- libavcodec/version.h | 4 +- 6 files changed, 131 insertions(+), 5 deletions(-) diff --git a/Changelog b/Changelog index a2ff887582..57c97887b8 100644 --- a/Changelog +++ b/Changelog @@ -21,6 +21,7 @@ version : - encrypted TTA stream decoding support - RF64 support in WAV muxer - noise filter ported from libmpcodecs +- Subtitles character encoding conversion version 1.1: diff --git a/configure b/configure index e102b258c5..81b593bdbd 100755 --- a/configure +++ b/configure @@ -1390,6 +1390,7 @@ HAVE_LIST=" gnu_as gsm_h ibm_asm + iconv inet_aton io_h isatty @@ -3716,6 +3717,7 @@ check_func getopt check_func getrusage check_struct "sys/time.h sys/resource.h" "struct rusage" ru_maxrss check_func gettimeofday +check_func iconv check_func inet_aton $network_extralibs check_func isatty check_func localtime_r diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index fc7091c16c..31298ae984 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -3208,6 +3208,24 @@ typedef struct AVCodecContext { * - encoding: unused */ AVDictionary *metadata; + + /** + * Character encoding of the input subtitles file. + * - decoding: set by user + * - encoding: unused + */ + char *sub_charenc; + + /** + * Subtitles character encoding mode. Formats or codecs might be adjusting + * this setting (if they are doing the conversion themselves for instance). + * - decoding: set by libavcodec + * - encoding: unused + */ + int sub_charenc_mode; +#define FF_SUB_CHARENC_MODE_DO_NOTHING -1 ///< do nothing (demuxer outputs a stream supposed to be already in UTF-8, or the codec is bitmap for instance) +#define FF_SUB_CHARENC_MODE_AUTOMATIC 0 ///< libavcodec will select the mode itself +#define FF_SUB_CHARENC_MODE_PRE_DECODER 1 ///< the AVPacket data needs to be recoded to UTF-8 before being fed to the decoder, requires iconv } AVCodecContext; AVRational av_codec_get_pkt_timebase (const AVCodecContext *avctx); diff --git a/libavcodec/options_table.h b/libavcodec/options_table.h index 33cb4b2f0c..3e01082cf5 100644 --- a/libavcodec/options_table.h +++ b/libavcodec/options_table.h @@ -406,6 +406,11 @@ static const AVOption options[]={ {"ka", "Karaoke", 0, AV_OPT_TYPE_CONST, {.i64 = AV_AUDIO_SERVICE_TYPE_KARAOKE }, INT_MIN, INT_MAX, A|E, "audio_service_type"}, {"request_sample_fmt", "sample format audio decoders should prefer", OFFSET(request_sample_fmt), AV_OPT_TYPE_SAMPLE_FMT, {.i64=AV_SAMPLE_FMT_NONE}, -1, AV_SAMPLE_FMT_NB-1, A|D, "request_sample_fmt"}, {"pkt_timebase", NULL, OFFSET(pkt_timebase), AV_OPT_TYPE_RATIONAL, {.dbl = 0 }, 0, INT_MAX, 0}, +{"sub_charenc", "set input text subtitles character encoding", OFFSET(sub_charenc), AV_OPT_TYPE_STRING, {.str = NULL}, CHAR_MIN, CHAR_MAX, S|D}, +{"sub_charenc_mode", "set input text subtitles character encoding mode", OFFSET(sub_charenc_mode), AV_OPT_TYPE_FLAGS, {.i64 = FF_SUB_CHARENC_MODE_AUTOMATIC}, -1, INT_MAX, S|D, "sub_charenc_mode"}, +{"do_nothing", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_DO_NOTHING}, INT_MIN, INT_MAX, S|D, "sub_charenc_mode"}, +{"auto", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_AUTOMATIC}, INT_MIN, INT_MAX, S|D, "sub_charenc_mode"}, +{"pre_decoder", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_PRE_DECODER}, INT_MIN, INT_MAX, S|D, "sub_charenc_mode"}, {NULL}, }; diff --git a/libavcodec/utils.c b/libavcodec/utils.c index 7e753ac407..ec773a4df4 100644 --- a/libavcodec/utils.c +++ b/libavcodec/utils.c @@ -48,6 +48,9 @@ #include #include #include +#if HAVE_ICONV +# include +#endif volatile int ff_avcodec_locked; static int volatile entangled_thread_counter = 0; @@ -1089,6 +1092,32 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code ret = AVERROR(EINVAL); goto free_and_end; } + if (avctx->sub_charenc) { + if (avctx->codec_type != AVMEDIA_TYPE_SUBTITLE) { + av_log(avctx, AV_LOG_ERROR, "Character encoding is only " + "supported with subtitles codecs\n"); + ret = AVERROR(EINVAL); + goto free_and_end; + } else if (avctx->codec_descriptor->props & AV_CODEC_PROP_BITMAP_SUB) { + av_log(avctx, AV_LOG_WARNING, "Codec '%s' is bitmap-based, " + "subtitles character encoding will be ignored\n", + avctx->codec_descriptor->name); + avctx->sub_charenc_mode = FF_SUB_CHARENC_MODE_DO_NOTHING; + } else { + /* input character encoding is set for a text based subtitle + * codec at this point */ + if (avctx->sub_charenc_mode == FF_SUB_CHARENC_MODE_AUTOMATIC) + avctx->sub_charenc_mode = FF_SUB_CHARENC_MODE_PRE_DECODER; + + if (!HAVE_ICONV && avctx->sub_charenc_mode == FF_SUB_CHARENC_MODE_PRE_DECODER) { + av_log(avctx, AV_LOG_ERROR, "Character encoding subtitles " + "conversion needs a libavcodec built with iconv support " + "for this codec\n"); + ret = AVERROR(ENOSYS); + goto free_and_end; + } + } + } } end: ff_unlock_avcodec(); @@ -1847,6 +1876,68 @@ int attribute_align_arg avcodec_decode_audio4(AVCodecContext *avctx, return ret; } +#define UTF8_MAX_BYTES 4 /* 5 and 6 bytes sequences should not be used */ +static int recode_subtitle(AVCodecContext *avctx, + AVPacket *outpkt, const AVPacket *inpkt) +{ +#if HAVE_ICONV + iconv_t cd = (iconv_t)-1; + int ret = 0; + char *inb, *outb; + size_t inl, outl; + AVPacket tmp; +#endif + + if (avctx->sub_charenc_mode != FF_SUB_CHARENC_MODE_PRE_DECODER) + return 0; + +#if HAVE_ICONV + cd = iconv_open("UTF-8", avctx->sub_charenc); + if (cd == (iconv_t)-1) { + av_log(avctx, AV_LOG_ERROR, "Unable to open iconv context " + "with input character encoding \"%s\"\n", avctx->sub_charenc); + ret = AVERROR(errno); + goto end; + } + + inb = inpkt->data; + inl = inpkt->size; + + if (inl >= INT_MAX / UTF8_MAX_BYTES - FF_INPUT_BUFFER_PADDING_SIZE) { + av_log(avctx, AV_LOG_ERROR, "Subtitles packet is too big for recoding\n"); + ret = AVERROR(ENOMEM); + goto end; + } + + ret = av_new_packet(&tmp, inl * UTF8_MAX_BYTES); + if (ret < 0) + goto end; + outpkt->data = tmp.data; + outpkt->size = tmp.size; + outb = outpkt->data; + outl = outpkt->size; + + if (iconv(cd, &inb, &inl, &outb, &outl) == (size_t)-1 || + iconv(cd, NULL, NULL, &outb, &outl) == (size_t)-1 || + outl >= outpkt->size || inl != 0) { + av_log(avctx, AV_LOG_ERROR, "Unable to recode subtitle event \"%s\" " + "from %s to UTF-8\n", inpkt->data, avctx->sub_charenc); + av_free_packet(&tmp); + ret = AVERROR(errno); + goto end; + } + outpkt->size -= outl; + outpkt->data[outpkt->size - 1] = '\0'; + +end: + if (cd != (iconv_t)-1) + iconv_close(cd); + return ret; +#else + av_assert0(!"requesting subtitles recoding without iconv"); +#endif +} + int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub, int *got_sub_ptr, AVPacket *avpkt) @@ -1862,19 +1953,28 @@ int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub, avcodec_get_subtitle_defaults(sub); if (avpkt->size) { + AVPacket pkt_recoded; AVPacket tmp = *avpkt; int did_split = av_packet_split_side_data(&tmp); //apply_param_change(avctx, &tmp); - avctx->pkt = &tmp; + pkt_recoded = tmp; + ret = recode_subtitle(avctx, &pkt_recoded, &tmp); + if (ret < 0) { + *got_sub_ptr = 0; + } else { + avctx->pkt = &pkt_recoded; if (avctx->pkt_timebase.den && avpkt->pts != AV_NOPTS_VALUE) sub->pts = av_rescale_q(avpkt->pts, avctx->pkt_timebase, AV_TIME_BASE_Q); - ret = avctx->codec->decode(avctx, sub, got_sub_ptr, &tmp); + ret = avctx->codec->decode(avctx, sub, got_sub_ptr, &pkt_recoded); + if (tmp.data != pkt_recoded.data) + av_free(pkt_recoded.data); sub->format = !(avctx->codec_descriptor->props & AV_CODEC_PROP_BITMAP_SUB); - avctx->pkt = NULL; + } + if (did_split) { ff_packet_free_side_data(&tmp); if(ret == tmp.size) diff --git a/libavcodec/version.h b/libavcodec/version.h index 96e53ffe89..dceeaa47e5 100644 --- a/libavcodec/version.h +++ b/libavcodec/version.h @@ -29,8 +29,8 @@ #include "libavutil/avutil.h" #define LIBAVCODEC_VERSION_MAJOR 54 -#define LIBAVCODEC_VERSION_MINOR 91 -#define LIBAVCODEC_VERSION_MICRO 103 +#define LIBAVCODEC_VERSION_MINOR 92 +#define LIBAVCODEC_VERSION_MICRO 100 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ LIBAVCODEC_VERSION_MINOR, \