ffmpeg/libavcodec/ttmlenc.c

397 lines
12 KiB
C

/*
* TTML subtitle encoder
* Copyright (c) 2020 24i
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* TTML subtitle encoder
* @see https://www.w3.org/TR/ttml1/
* @see https://www.w3.org/TR/ttml2/
* @see https://www.w3.org/TR/ttml-imsc/rec
*/
#include "avcodec.h"
#include "codec_internal.h"
#include "libavutil/avstring.h"
#include "libavutil/bprint.h"
#include "libavutil/internal.h"
#include "ass_split.h"
#include "ass.h"
#include "ttmlenc.h"
typedef struct {
AVCodecContext *avctx;
ASSSplitContext *ass_ctx;
AVBPrint buffer;
} TTMLContext;
static void ttml_text_cb(void *priv, const char *text, int len)
{
TTMLContext *s = priv;
AVBPrint cur_line = { 0 };
AVBPrint *buffer = &s->buffer;
av_bprint_init(&cur_line, len, AV_BPRINT_SIZE_UNLIMITED);
av_bprint_append_data(&cur_line, text, len);
if (!av_bprint_is_complete(&cur_line)) {
av_log(s->avctx, AV_LOG_ERROR,
"Failed to move the current subtitle dialog to AVBPrint!\n");
av_bprint_finalize(&cur_line, NULL);
return;
}
av_bprint_escape(buffer, cur_line.str, NULL, AV_ESCAPE_MODE_XML,
0);
av_bprint_finalize(&cur_line, NULL);
}
static void ttml_new_line_cb(void *priv, int forced)
{
TTMLContext *s = priv;
av_bprintf(&s->buffer, "<br/>");
}
static const ASSCodesCallbacks ttml_callbacks = {
.text = ttml_text_cb,
.new_line = ttml_new_line_cb,
};
static int ttml_encode_frame(AVCodecContext *avctx, uint8_t *buf,
int bufsize, const AVSubtitle *sub)
{
TTMLContext *s = avctx->priv_data;
ASSDialog *dialog;
int i;
av_bprint_clear(&s->buffer);
for (i=0; i<sub->num_rects; i++) {
const char *ass = sub->rects[i]->ass;
int ret;
if (sub->rects[i]->type != SUBTITLE_ASS) {
av_log(avctx, AV_LOG_ERROR, "Only SUBTITLE_ASS type supported.\n");
return AVERROR(EINVAL);
}
dialog = ff_ass_split_dialog(s->ass_ctx, ass);
if (!dialog)
return AVERROR(ENOMEM);
if (dialog->style) {
av_bprintf(&s->buffer, "<span region=\"");
av_bprint_escape(&s->buffer, dialog->style, NULL,
AV_ESCAPE_MODE_XML,
AV_ESCAPE_FLAG_XML_DOUBLE_QUOTES);
av_bprintf(&s->buffer, "\">");
}
ret = ff_ass_split_override_codes(&ttml_callbacks, s, dialog->text);
if (ret < 0) {
int log_level = (ret != AVERROR_INVALIDDATA ||
avctx->err_recognition & AV_EF_EXPLODE) ?
AV_LOG_ERROR : AV_LOG_WARNING;
av_log(avctx, log_level,
"Splitting received ASS dialog text %s failed: %s\n",
dialog->text,
av_err2str(ret));
if (log_level == AV_LOG_ERROR) {
ff_ass_free_dialog(&dialog);
return ret;
}
}
if (dialog->style)
av_bprintf(&s->buffer, "</span>");
ff_ass_free_dialog(&dialog);
}
if (!av_bprint_is_complete(&s->buffer))
return AVERROR(ENOMEM);
if (!s->buffer.len)
return 0;
// force null-termination, so in case our destination buffer is
// too small, the return value is larger than bufsize minus null.
if (av_strlcpy(buf, s->buffer.str, bufsize) > bufsize - 1) {
av_log(avctx, AV_LOG_ERROR, "Buffer too small for TTML event.\n");
return AVERROR_BUFFER_TOO_SMALL;
}
return s->buffer.len;
}
static av_cold int ttml_encode_close(AVCodecContext *avctx)
{
TTMLContext *s = avctx->priv_data;
ff_ass_split_free(s->ass_ctx);
av_bprint_finalize(&s->buffer, NULL);
return 0;
}
static const char *ttml_get_display_alignment(int alignment)
{
switch (alignment) {
case 1:
case 2:
case 3:
return "after";
case 4:
case 5:
case 6:
return "center";
case 7:
case 8:
case 9:
return "before";
default:
return NULL;
}
}
static const char *ttml_get_text_alignment(int alignment)
{
switch (alignment) {
case 1:
case 4:
case 7:
return "left";
case 2:
case 5:
case 8:
return "center";
case 3:
case 6:
case 9:
return "right";
default:
return NULL;
}
}
static void ttml_get_origin(ASSScriptInfo script_info, ASSStyle style,
int *origin_left, int *origin_top)
{
*origin_left = av_rescale(style.margin_l, 100, script_info.play_res_x);
*origin_top =
av_rescale((style.alignment >= 7) ? style.margin_v : 0,
100, script_info.play_res_y);
}
static void ttml_get_extent(ASSScriptInfo script_info, ASSStyle style,
int *width, int *height)
{
*width = av_rescale(script_info.play_res_x - style.margin_r,
100, script_info.play_res_x);
*height = av_rescale((style.alignment <= 3) ?
script_info.play_res_y - style.margin_v :
script_info.play_res_y,
100, script_info.play_res_y);
}
static int ttml_write_region(AVCodecContext *avctx, AVBPrint *buf,
ASSScriptInfo script_info, ASSStyle style)
{
const char *display_alignment = NULL;
const char *text_alignment = NULL;
int origin_left = 0;
int origin_top = 0;
int width = 0;
int height = 0;
if (!style.name) {
av_log(avctx, AV_LOG_ERROR, "Subtitle style name not set!\n");
return AVERROR_INVALIDDATA;
}
if (style.font_size < 0) {
av_log(avctx, AV_LOG_ERROR, "Invalid font size for TTML: %d!\n",
style.font_size);
return AVERROR_INVALIDDATA;
}
if (style.margin_l < 0 || style.margin_r < 0 || style.margin_v < 0) {
av_log(avctx, AV_LOG_ERROR,
"One or more negative margin values in subtitle style: "
"left: %d, right: %d, vertical: %d!\n",
style.margin_l, style.margin_r, style.margin_v);
return AVERROR_INVALIDDATA;
}
display_alignment = ttml_get_display_alignment(style.alignment);
text_alignment = ttml_get_text_alignment(style.alignment);
if (!display_alignment || !text_alignment) {
av_log(avctx, AV_LOG_ERROR,
"Failed to convert ASS style alignment %d of style %s to "
"TTML display and text alignment!\n",
style.alignment,
style.name);
return AVERROR_INVALIDDATA;
}
ttml_get_origin(script_info, style, &origin_left, &origin_top);
ttml_get_extent(script_info, style, &width, &height);
av_bprintf(buf, " <region xml:id=\"");
av_bprint_escape(buf, style.name, NULL, AV_ESCAPE_MODE_XML,
AV_ESCAPE_FLAG_XML_DOUBLE_QUOTES);
av_bprintf(buf, "\"\n");
av_bprintf(buf, " tts:origin=\"%d%% %d%%\"\n",
origin_left, origin_top);
av_bprintf(buf, " tts:extent=\"%d%% %d%%\"\n",
width, height);
av_bprintf(buf, " tts:displayAlign=\"");
av_bprint_escape(buf, display_alignment, NULL, AV_ESCAPE_MODE_XML,
AV_ESCAPE_FLAG_XML_DOUBLE_QUOTES);
av_bprintf(buf, "\"\n");
av_bprintf(buf, " tts:textAlign=\"");
av_bprint_escape(buf, text_alignment, NULL, AV_ESCAPE_MODE_XML,
AV_ESCAPE_FLAG_XML_DOUBLE_QUOTES);
av_bprintf(buf, "\"\n");
// if we set cell resolution to our script reference resolution,
// then a single line is a single "point" on our canvas. Thus, by setting
// our font size to font size in cells, we should gain a similar enough
// scale without resorting to explicit pixel based font sizing, which is
// frowned upon in the TTML community.
av_bprintf(buf, " tts:fontSize=\"%dc\"\n",
style.font_size);
if (style.font_name) {
av_bprintf(buf, " tts:fontFamily=\"");
av_bprint_escape(buf, style.font_name, NULL, AV_ESCAPE_MODE_XML,
AV_ESCAPE_FLAG_XML_DOUBLE_QUOTES);
av_bprintf(buf, "\"\n");
}
av_bprintf(buf, " tts:overflow=\"visible\" />\n");
return 0;
}
static int ttml_write_header_content(AVCodecContext *avctx)
{
TTMLContext *s = avctx->priv_data;
ASS *ass = (ASS *)s->ass_ctx;
ASSScriptInfo script_info = ass->script_info;
const size_t base_extradata_size = TTMLENC_EXTRADATA_SIGNATURE_SIZE + 1 +
AV_INPUT_BUFFER_PADDING_SIZE;
size_t additional_extradata_size = 0;
if (script_info.play_res_x <= 0 || script_info.play_res_y <= 0) {
av_log(avctx, AV_LOG_ERROR,
"Invalid subtitle reference resolution %dx%d!\n",
script_info.play_res_x, script_info.play_res_y);
return AVERROR_INVALIDDATA;
}
// write the first string in extradata, attributes in the base "tt" element.
av_bprintf(&s->buffer, TTML_DEFAULT_NAMESPACING);
// the cell resolution is in character cells, so not exactly 1:1 against
// a pixel based resolution, but as the tts:extent in the root
// "tt" element is frowned upon (and disallowed in the EBU-TT profile),
// we mimic the reference resolution by setting it as the cell resolution.
av_bprintf(&s->buffer, " ttp:cellResolution=\"%d %d\"\n",
script_info.play_res_x, script_info.play_res_y);
av_bprint_chars(&s->buffer, '\0', 1);
// write the second string in extradata, head element containing the styles
av_bprintf(&s->buffer, " <head>\n");
av_bprintf(&s->buffer, " <layout>\n");
for (int i = 0; i < ass->styles_count; i++) {
int ret = ttml_write_region(avctx, &s->buffer, script_info,
ass->styles[i]);
if (ret < 0)
return ret;
}
av_bprintf(&s->buffer, " </layout>\n");
av_bprintf(&s->buffer, " </head>\n");
av_bprint_chars(&s->buffer, '\0', 1);
if (!av_bprint_is_complete(&s->buffer)) {
return AVERROR(ENOMEM);
}
additional_extradata_size = s->buffer.len;
if (!(avctx->extradata =
av_mallocz(base_extradata_size + additional_extradata_size))) {
return AVERROR(ENOMEM);
}
avctx->extradata_size =
TTMLENC_EXTRADATA_SIGNATURE_SIZE + additional_extradata_size;
memcpy(avctx->extradata, TTMLENC_EXTRADATA_SIGNATURE,
TTMLENC_EXTRADATA_SIGNATURE_SIZE);
if (additional_extradata_size)
memcpy(avctx->extradata + TTMLENC_EXTRADATA_SIGNATURE_SIZE,
s->buffer.str, additional_extradata_size);
av_bprint_clear(&s->buffer);
return 0;
}
static av_cold int ttml_encode_init(AVCodecContext *avctx)
{
TTMLContext *s = avctx->priv_data;
int ret = AVERROR_BUG;
s->avctx = avctx;
av_bprint_init(&s->buffer, 0, AV_BPRINT_SIZE_UNLIMITED);
if (!(s->ass_ctx = ff_ass_split(avctx->subtitle_header))) {
return AVERROR_INVALIDDATA;
}
if ((ret = ttml_write_header_content(avctx)) < 0) {
return ret;
}
return 0;
}
const FFCodec ff_ttml_encoder = {
.p.name = "ttml",
CODEC_LONG_NAME("TTML subtitle"),
.p.type = AVMEDIA_TYPE_SUBTITLE,
.p.id = AV_CODEC_ID_TTML,
.priv_data_size = sizeof(TTMLContext),
.init = ttml_encode_init,
FF_CODEC_ENCODE_SUB_CB(ttml_encode_frame),
.close = ttml_encode_close,
.caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
};