From 70feca926b93f339cb58c340550e9a2367263bf7 Mon Sep 17 00:00:00 2001 From: Nicolas George Date: Sun, 7 Apr 2013 09:51:23 +0200 Subject: [PATCH] lavc: check decoded subtitles encoding. Address trac ticket #2431. --- libavcodec/utils.c | 31 ++++++++++++++++++++++++++++++- tests/fate/subtitles.mak | 8 ++++---- tests/ref/fate/sub-aqtitle | 2 +- tests/ref/fate/sub-subviewer1 | 2 +- 4 files changed, 36 insertions(+), 7 deletions(-) diff --git a/libavcodec/utils.c b/libavcodec/utils.c index 1733d0f1a3..1f4793c1a3 100644 --- a/libavcodec/utils.c +++ b/libavcodec/utils.c @@ -2206,11 +2206,30 @@ end: #endif } +static int utf8_check(const uint8_t *str) +{ + const uint8_t *byte; + uint32_t codepoint, min; + + while (*str) { + byte = str; + GET_UTF8(codepoint, *(byte++), return 0;); + min = byte - str == 1 ? 0 : byte - str == 2 ? 0x80 : + 1 << (5 * (byte - str) - 4); + if (codepoint < min || codepoint >= 0x110000 || + codepoint == 0xFFFE /* BOM */ || + codepoint >= 0xD800 && codepoint <= 0xDFFF /* surrogates */) + return 0; + str = byte; + } + return 1; +} + int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub, int *got_sub_ptr, AVPacket *avpkt) { - int ret = 0; + int i, ret = 0; if (avctx->codec->type != AVMEDIA_TYPE_SUBTITLE) { av_log(avctx, AV_LOG_ERROR, "Invalid media type for subtitles\n"); @@ -2247,6 +2266,16 @@ int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub, avctx->pkt_timebase, ms); } + for (i = 0; i < sub->num_rects; i++) { + if (sub->rects[i]->ass && !utf8_check(sub->rects[i]->ass)) { + av_log(avctx, AV_LOG_ERROR, + "Invalid UTF-8 in decoded subtitles text; " + "maybe missing -sub_charenc option\n"); + avsubtitle_free(sub); + return AVERROR_INVALIDDATA; + } + } + if (tmp.data != pkt_recoded.data) { // did we recode? /* prevent from destroying side data from original packet */ pkt_recoded.side_data = NULL; diff --git a/tests/fate/subtitles.mak b/tests/fate/subtitles.mak index 8e586fbe8a..ac1f4ca026 100644 --- a/tests/fate/subtitles.mak +++ b/tests/fate/subtitles.mak @@ -1,5 +1,5 @@ -FATE_SUBTITLES_ASS-$(call DEMDEC, AQTITLE, TEXT) += fate-sub-aqtitle -fate-sub-aqtitle: CMD = md5 -i $(SAMPLES)/sub/AQTitle_capability_tester.aqt -f ass +FATE_SUBTITLES_ASS-$(call ALLYES, AQTITLE_DEMUXER TEXT_DECODER ICONV) += fate-sub-aqtitle +fate-sub-aqtitle: CMD = md5 -sub_charenc windows-1250 -i $(SAMPLES)/sub/AQTitle_capability_tester.aqt -f ass FATE_SUBTITLES_ASS-$(call DEMDEC, JACOSUB, JACOSUB) += fate-sub-jacosub fate-sub-jacosub: CMD = md5 -i $(SAMPLES)/sub/JACOsub_capability_tester.jss -f ass @@ -40,8 +40,8 @@ fate-sub-srt: CMD = md5 -i $(SAMPLES)/sub/SubRip_capability_tester.srt -f ass FATE_SUBTITLES-$(call ALLYES, MOV_DEMUXER MOVTEXT_DECODER SUBRIP_ENCODER) += fate-sub-subripenc fate-sub-subripenc: CMD = md5 -i $(SAMPLES)/sub/MovText_capability_tester.mp4 -scodec subrip -f srt -FATE_SUBTITLES_ASS-$(call DEMDEC, SUBVIEWER1, SUBVIEWER1) += fate-sub-subviewer1 -fate-sub-subviewer1: CMD = md5 -i $(SAMPLES)/sub/SubViewer1_capability_tester.sub -f ass +FATE_SUBTITLES_ASS-$(call ALLYES, SUBVIEWER1_DEMUXER SUBVIEWER1_DECODER ICONV) += fate-sub-subviewer1 +fate-sub-subviewer1: CMD = md5 -sub_charenc windows-1250 -i $(SAMPLES)/sub/SubViewer1_capability_tester.sub -f ass FATE_SUBTITLES_ASS-$(call DEMDEC, SUBVIEWER, SUBVIEWER) += fate-sub-subviewer fate-sub-subviewer: CMD = md5 -i $(SAMPLES)/sub/SubViewer_capability_tester.sub -f ass diff --git a/tests/ref/fate/sub-aqtitle b/tests/ref/fate/sub-aqtitle index f6900e7f0e..c477b0af8c 100644 --- a/tests/ref/fate/sub-aqtitle +++ b/tests/ref/fate/sub-aqtitle @@ -1 +1 @@ -e888e1354cd0968895ab89cb169fec31 +1c68def68db6536c235819cbe0638e00 diff --git a/tests/ref/fate/sub-subviewer1 b/tests/ref/fate/sub-subviewer1 index 116fce7e63..052431a81c 100644 --- a/tests/ref/fate/sub-subviewer1 +++ b/tests/ref/fate/sub-subviewer1 @@ -1 +1 @@ -0c2096fedf7c971742b2e879bb303ce9 +cbeb015b1125757eed814f212cfc6c9c