From 2599541b3d7367d70d30ba88f17295265daef579 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20B=C5=93sch?= Date: Sat, 1 Sep 2012 14:34:24 +0200 Subject: [PATCH] WebVTT demuxer and decoder. --- Changelog | 1 + doc/general.texi | 1 + libavcodec/Makefile | 1 + libavcodec/allcodecs.c | 1 + libavcodec/avcodec.h | 1 + libavcodec/codec_desc.c | 6 ++ libavcodec/version.h | 2 +- libavcodec/webvttdec.c | 100 ++++++++++++++++++++ libavformat/Makefile | 1 + libavformat/allformats.c | 1 + libavformat/version.h | 4 +- libavformat/webvttdec.c | 188 ++++++++++++++++++++++++++++++++++++++ tests/fate/subtitles.mak | 3 + tests/ref/fate/sub-webvtt | 1 + 14 files changed, 308 insertions(+), 3 deletions(-) create mode 100644 libavcodec/webvttdec.c create mode 100644 libavformat/webvttdec.c create mode 100644 tests/ref/fate/sub-webvtt diff --git a/Changelog b/Changelog index e8ea6799d4..85ca1921f6 100644 --- a/Changelog +++ b/Changelog @@ -63,6 +63,7 @@ version next: - Smooth Streaming live segmenter muxer - F4V muxer - sendcmd and asendcmd filters +- WebVTT demuxer and decoder (simple tags supported) version 0.11: diff --git a/doc/general.texi b/doc/general.texi index 45fd053698..74ab3e0790 100644 --- a/doc/general.texi +++ b/doc/general.texi @@ -898,6 +898,7 @@ performance on systems without hardware floating point support). @item SubRip (SRT) @tab X @tab X @tab X @tab X @item SubViewer @tab @tab X @tab @tab X @item 3GPP Timed Text @tab @tab @tab X @tab X +@item WebVTT @tab @tab X @tab @tab X @item XSUB @tab @tab @tab X @tab X @end multitable diff --git a/libavcodec/Makefile b/libavcodec/Makefile index dd881b6c5b..9b86f7cef3 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -463,6 +463,7 @@ OBJS-$(CONFIG_VP6_DECODER) += vp6.o vp56.o vp56data.o vp56dsp.o \ OBJS-$(CONFIG_VP8_DECODER) += vp8.o vp8dsp.o vp56rac.o OBJS-$(CONFIG_VQA_DECODER) += vqavideo.o OBJS-$(CONFIG_WAVPACK_DECODER) += wavpack.o +OBJS-$(CONFIG_WEBVTT_DECODER) += webvttdec.o OBJS-$(CONFIG_WMALOSSLESS_DECODER) += wmalosslessdec.o wma_common.o OBJS-$(CONFIG_WMAPRO_DECODER) += wmaprodec.o wma.o wma_common.o OBJS-$(CONFIG_WMAV1_DECODER) += wmadec.o wma.o wma_common.o aactab.o diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c index 8806c6a598..cba953f825 100644 --- a/libavcodec/allcodecs.c +++ b/libavcodec/allcodecs.c @@ -422,6 +422,7 @@ void avcodec_register_all(void) REGISTER_ENCDEC (SRT, srt); REGISTER_ENCDEC (SUBRIP, subrip); REGISTER_DECODER (SUBVIEWER, subviewer); + REGISTER_DECODER (WEBVTT, webvtt); REGISTER_ENCDEC (XSUB, xsub); /* external libraries */ diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index e4bba5b090..0d643478f6 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -448,6 +448,7 @@ enum AVCodecID { AV_CODEC_ID_REALTEXT = MKBETAG('R','T','X','T'), AV_CODEC_ID_SUBVIEWER = MKBETAG('S','u','b','V'), AV_CODEC_ID_SUBRIP = MKBETAG('S','R','i','p'), + AV_CODEC_ID_WEBVTT = MKBETAG('W','V','T','T'), /* other specific kind of codecs (generally used for attachments) */ AV_CODEC_ID_FIRST_UNKNOWN = 0x18000, ///< A dummy ID pointing at the start of various fake codecs. diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c index 1f22b17242..607dd96f81 100644 --- a/libavcodec/codec_desc.c +++ b/libavcodec/codec_desc.c @@ -2342,6 +2342,12 @@ static const AVCodecDescriptor codec_descriptors[] = { .name = "subviewer", .long_name = NULL_IF_CONFIG_SMALL("SubViewer subtitle"), }, + { + .id = AV_CODEC_ID_WEBVTT, + .type = AVMEDIA_TYPE_SUBTITLE, + .name = "webvtt", + .long_name = NULL_IF_CONFIG_SMALL("WebVTT subtitle"), + }, { .id = AV_CODEC_ID_BINTEXT, .type = AVMEDIA_TYPE_VIDEO, diff --git a/libavcodec/version.h b/libavcodec/version.h index 7a779e6bce..c1d0b7669d 100644 --- a/libavcodec/version.h +++ b/libavcodec/version.h @@ -27,7 +27,7 @@ */ #define LIBAVCODEC_VERSION_MAJOR 54 -#define LIBAVCODEC_VERSION_MINOR 57 +#define LIBAVCODEC_VERSION_MINOR 58 #define LIBAVCODEC_VERSION_MICRO 100 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ diff --git a/libavcodec/webvttdec.c b/libavcodec/webvttdec.c new file mode 100644 index 0000000000..6b86bedbe9 --- /dev/null +++ b/libavcodec/webvttdec.c @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2012 Clément Bœsch + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * WebVTT subtitle decoder + * @see http://dev.w3.org/html5/webvtt/ + * @todo need to support extended markups and cue settings + */ + +#include "avcodec.h" +#include "ass.h" +#include "libavutil/bprint.h" + +static const struct { + const char *from; + const char *to; +} webvtt_tag_replace[] = { + {"", "{\\i1}"}, {"", "{\\i0}"}, + {"", "{\\b1}"}, {"", "{\\b0}"}, + {"", "{\\u1}"}, {"", "{\\u0}"}, + {"{", "\\{"}, {"}", "\\}"}, // escape to avoid ASS markup conflicts +}; + +static int webvtt_event_to_ass(AVBPrint *buf, const char *p) +{ + int i, skip = 0; + + while (*p) { + + for (i = 0; i < FF_ARRAY_ELEMS(webvtt_tag_replace); i++) { + const char *from = webvtt_tag_replace[i].from; + const size_t len = strlen(from); + if (!strncmp(p, from, len)) { + av_bprintf(buf, "%s", webvtt_tag_replace[i].to); + p += len; + break; + } + } + if (!*p) + break; + + if (*p == '<') + skip = 1; + else if (*p == '>') + skip = 0; + else if (p[0] == '\n' && p[1]) + av_bprintf(buf, "\\N"); + else if (!skip && *p != '\r') + av_bprint_chars(buf, *p, 1); + p++; + } + av_bprintf(buf, "\r\n"); + return 0; +} + +static int webvtt_decode_frame(AVCodecContext *avctx, + void *data, int *got_sub_ptr, AVPacket *avpkt) +{ + AVSubtitle *sub = data; + const char *ptr = avpkt->data; + AVBPrint buf; + + av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED); + if (ptr && avpkt->size > 0 && !webvtt_event_to_ass(&buf, ptr)) { + int ts_start = av_rescale_q(avpkt->pts, avctx->time_base, (AVRational){1,100}); + int ts_duration = avpkt->duration != -1 ? + av_rescale_q(avpkt->duration, avctx->time_base, (AVRational){1,100}) : -1; + ff_ass_add_rect(sub, buf.str, ts_start, ts_duration, 0); + } + *got_sub_ptr = sub->num_rects > 0; + av_bprint_finalize(&buf, NULL); + return avpkt->size; +} + +AVCodec ff_webvtt_decoder = { + .name = "webvtt", + .long_name = NULL_IF_CONFIG_SMALL("WebVTT subtitle"), + .type = AVMEDIA_TYPE_SUBTITLE, + .id = AV_CODEC_ID_WEBVTT, + .decode = webvtt_decode_frame, + .init = ff_ass_subtitle_header_default, +}; diff --git a/libavformat/Makefile b/libavformat/Makefile index d6f8a1a32b..632721f4bb 100644 --- a/libavformat/Makefile +++ b/libavformat/Makefile @@ -355,6 +355,7 @@ OBJS-$(CONFIG_WC3_DEMUXER) += wc3movie.o OBJS-$(CONFIG_WEBM_MUXER) += matroskaenc.o matroska.o \ isom.o avc.o \ flacenc_header.o avlanguage.o +OBJS-$(CONFIG_WEBVTT_DEMUXER) += webvttdec.o OBJS-$(CONFIG_WSAUD_DEMUXER) += westwood_aud.o OBJS-$(CONFIG_WSVQA_DEMUXER) += westwood_vqa.o OBJS-$(CONFIG_WTV_DEMUXER) += wtvdec.o wtv.o asfdec.o asf.o asfcrypt.o \ diff --git a/libavformat/allformats.c b/libavformat/allformats.c index 8dd917b21a..36b6988f11 100644 --- a/libavformat/allformats.c +++ b/libavformat/allformats.c @@ -252,6 +252,7 @@ void av_register_all(void) REGISTER_MUXDEMUX (WAV, wav); REGISTER_DEMUXER (WC3, wc3); REGISTER_MUXER (WEBM, webm); + REGISTER_DEMUXER (WEBVTT, webvtt); REGISTER_DEMUXER (WSAUD, wsaud); REGISTER_DEMUXER (WSVQA, wsvqa); REGISTER_MUXDEMUX (WTV, wtv); diff --git a/libavformat/version.h b/libavformat/version.h index 656d3e705c..fe3e6f8671 100644 --- a/libavformat/version.h +++ b/libavformat/version.h @@ -30,8 +30,8 @@ #include "libavutil/avutil.h" #define LIBAVFORMAT_VERSION_MAJOR 54 -#define LIBAVFORMAT_VERSION_MINOR 27 -#define LIBAVFORMAT_VERSION_MICRO 101 +#define LIBAVFORMAT_VERSION_MINOR 28 +#define LIBAVFORMAT_VERSION_MICRO 100 #define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \ LIBAVFORMAT_VERSION_MINOR, \ diff --git a/libavformat/webvttdec.c b/libavformat/webvttdec.c new file mode 100644 index 0000000000..b1cd2938ee --- /dev/null +++ b/libavformat/webvttdec.c @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2012 Clément Bœsch + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * WebVTT subtitle demuxer + * @see http://dev.w3.org/html5/webvtt/ + */ + +#include "avformat.h" +#include "internal.h" +#include "subtitles.h" +#include "libavutil/bprint.h" +#include "libavutil/intreadwrite.h" + +typedef struct { + FFDemuxSubtitlesQueue q; +} WebVTTContext; + +static int webvtt_probe(AVProbeData *p) +{ + const uint8_t *ptr = p->buf; + + if (AV_RB24(ptr) == 0xEFBBBF) + ptr += 3; /* skip UTF-8 BOM */ + if (!strncmp(ptr, "WEBVTT", 6) && + (!ptr[6] || strchr("\n\r\t ", ptr[6]))) + return AVPROBE_SCORE_MAX; + return 0; +} + +static int64_t read_ts(const char *s) +{ + int hh, mm, ss, ms; + if (sscanf(s, "%u:%u:%u.%u", &hh, &mm, &ss, &ms) == 4) return (hh*3600 + mm*60 + ss) * 1000 + ms; + if (sscanf(s, "%u:%u.%u", &mm, &ss, &ms) == 3) return ( mm*60 + ss) * 1000 + ms; + return AV_NOPTS_VALUE; +} + +static int64_t extract_cue(AVBPrint *buf, AVIOContext *pb) +{ + int prev_chr_is_eol = 0; + int64_t pos = avio_tell(pb); + + av_bprint_clear(buf); + for (;;) { + char c = avio_r8(pb); + if (!c) + break; + if (c == '\r' || c == '\n') { + if (prev_chr_is_eol) + break; + prev_chr_is_eol = (c == '\n'); + } else + prev_chr_is_eol = 0; + if (c != '\r') + av_bprint_chars(buf, c, 1); + } + av_bprint_chars(buf, '\0', 1); + return pos; +} + +static int webvtt_read_header(AVFormatContext *s) +{ + WebVTTContext *webvtt = s->priv_data; + AVBPrint header, cue; + int res = 0; + AVStream *st = avformat_new_stream(s, NULL); + + if (!st) + return AVERROR(ENOMEM); + avpriv_set_pts_info(st, 64, 1, 1000); + st->codec->codec_type = AVMEDIA_TYPE_SUBTITLE; + st->codec->codec_id = AV_CODEC_ID_WEBVTT; + + av_bprint_init(&header, 0, AV_BPRINT_SIZE_UNLIMITED); + av_bprint_init(&cue, 0, AV_BPRINT_SIZE_UNLIMITED); + + for (;;) { + int i, len; + int64_t pos = extract_cue(&cue, s->pb); + AVPacket *sub; + const char *p = cue.str; + const char *identifier = p; + //const char *settings = NULL; + int64_t ts_start, ts_end; + + if (!*p) // EOF + break; + + /* ignore header chunk */ + if (!strncmp(p, "\xEF\xBB\xBFWEBVTT", 9) || + !strncmp(p, "WEBVTT", 6)) + continue; + + /* optional cue identifier (can be a number like in SRT or some kind of + * chaptering id), silently skip it */ + for (i = 0; p[i] && p[i] != '\n'; i++) { + if (!strncmp(p + i, "-->", 3)) { + identifier = NULL; + break; + } + } + if (identifier) + p += strcspn(p, "\n"); + + /* cue timestamps */ + if ((ts_start = read_ts(p)) == AV_NOPTS_VALUE) + break; + if (!(p = strstr(p, "-->"))) + break; + p += 3; + do p++; while (*p == ' ' || *p == '\t'); + if ((ts_end = read_ts(p)) == AV_NOPTS_VALUE) + break; + + /* optional cue settings, TODO: store in side_data */ + p += strcspn(p, "\n\t "); + while (*p == '\t' || *p == ' ') + p++; + if (*p != '\n') { + //settings = p; + p += strcspn(p, "\n"); + } + if (*p == '\n') + p++; + + /* create packet */ + len = cue.str + cue.len - p - 1; + sub = ff_subtitles_queue_insert(&webvtt->q, p, len, 0); + if (!sub) { + res = AVERROR(ENOMEM); + goto end; + } + sub->pos = pos; + sub->pts = ts_start; + sub->duration = ts_end - ts_start; + } + + ff_subtitles_queue_finalize(&webvtt->q); + +end: + av_bprint_finalize(&cue, NULL); + av_bprint_finalize(&header, NULL); + return res; +} + +static int webvtt_read_packet(AVFormatContext *s, AVPacket *pkt) +{ + WebVTTContext *webvtt = s->priv_data; + return ff_subtitles_queue_read_packet(&webvtt->q, pkt); +} + +static int webvtt_read_close(AVFormatContext *s) +{ + WebVTTContext *webvtt = s->priv_data; + ff_subtitles_queue_clean(&webvtt->q); + return 0; +} + +AVInputFormat ff_webvtt_demuxer = { + .name = "webvtt", + .long_name = NULL_IF_CONFIG_SMALL("WebVTT subtitle"), + .priv_data_size = sizeof(WebVTTContext), + .read_probe = webvtt_probe, + .read_header = webvtt_read_header, + .read_packet = webvtt_read_packet, + .read_close = webvtt_read_close, + .flags = AVFMT_GENERIC_INDEX, + .extensions = "vtt", +}; diff --git a/tests/fate/subtitles.mak b/tests/fate/subtitles.mak index aa0b50dddd..d73352ce10 100644 --- a/tests/fate/subtitles.mak +++ b/tests/fate/subtitles.mak @@ -25,5 +25,8 @@ fate-sub-subripenc: CMD = md5 -i $(SAMPLES)/sub/MovText_capability_tester.mp4 -s FATE_SUBTITLES += fate-sub-subviewer fate-sub-subviewer: CMD = md5 -i $(SAMPLES)/sub/SubViewer_capability_tester.sub -f ass +FATE_SUBTITLES += fate-sub-webvtt +fate-sub-webvtt: CMD = md5 -i $(SAMPLES)/sub/WebVTT_capability_tester.vtt -f ass + FATE_SAMPLES_FFMPEG += $(FATE_SUBTITLES) fate-subtitles: $(FATE_SUBTITLES) diff --git a/tests/ref/fate/sub-webvtt b/tests/ref/fate/sub-webvtt new file mode 100644 index 0000000000..2e32e55196 --- /dev/null +++ b/tests/ref/fate/sub-webvtt @@ -0,0 +1 @@ +5384a70c89ddca4b007fb7ffba95cffb