mirror of https://git.ffmpeg.org/ffmpeg.git
lavc: support subtitles character encoding conversion.
This commit is contained in:
parent
8732271e40
commit
f796399344
|
@ -21,6 +21,7 @@ version <next>:
|
||||||
- encrypted TTA stream decoding support
|
- encrypted TTA stream decoding support
|
||||||
- RF64 support in WAV muxer
|
- RF64 support in WAV muxer
|
||||||
- noise filter ported from libmpcodecs
|
- noise filter ported from libmpcodecs
|
||||||
|
- Subtitles character encoding conversion
|
||||||
|
|
||||||
|
|
||||||
version 1.1:
|
version 1.1:
|
||||||
|
|
|
@ -1390,6 +1390,7 @@ HAVE_LIST="
|
||||||
gnu_as
|
gnu_as
|
||||||
gsm_h
|
gsm_h
|
||||||
ibm_asm
|
ibm_asm
|
||||||
|
iconv
|
||||||
inet_aton
|
inet_aton
|
||||||
io_h
|
io_h
|
||||||
isatty
|
isatty
|
||||||
|
@ -3716,6 +3717,7 @@ check_func getopt
|
||||||
check_func getrusage
|
check_func getrusage
|
||||||
check_struct "sys/time.h sys/resource.h" "struct rusage" ru_maxrss
|
check_struct "sys/time.h sys/resource.h" "struct rusage" ru_maxrss
|
||||||
check_func gettimeofday
|
check_func gettimeofday
|
||||||
|
check_func iconv
|
||||||
check_func inet_aton $network_extralibs
|
check_func inet_aton $network_extralibs
|
||||||
check_func isatty
|
check_func isatty
|
||||||
check_func localtime_r
|
check_func localtime_r
|
||||||
|
|
|
@ -3208,6 +3208,24 @@ typedef struct AVCodecContext {
|
||||||
* - encoding: unused
|
* - encoding: unused
|
||||||
*/
|
*/
|
||||||
AVDictionary *metadata;
|
AVDictionary *metadata;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Character encoding of the input subtitles file.
|
||||||
|
* - decoding: set by user
|
||||||
|
* - encoding: unused
|
||||||
|
*/
|
||||||
|
char *sub_charenc;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Subtitles character encoding mode. Formats or codecs might be adjusting
|
||||||
|
* this setting (if they are doing the conversion themselves for instance).
|
||||||
|
* - decoding: set by libavcodec
|
||||||
|
* - encoding: unused
|
||||||
|
*/
|
||||||
|
int sub_charenc_mode;
|
||||||
|
#define FF_SUB_CHARENC_MODE_DO_NOTHING -1 ///< do nothing (demuxer outputs a stream supposed to be already in UTF-8, or the codec is bitmap for instance)
|
||||||
|
#define FF_SUB_CHARENC_MODE_AUTOMATIC 0 ///< libavcodec will select the mode itself
|
||||||
|
#define FF_SUB_CHARENC_MODE_PRE_DECODER 1 ///< the AVPacket data needs to be recoded to UTF-8 before being fed to the decoder, requires iconv
|
||||||
} AVCodecContext;
|
} AVCodecContext;
|
||||||
|
|
||||||
AVRational av_codec_get_pkt_timebase (const AVCodecContext *avctx);
|
AVRational av_codec_get_pkt_timebase (const AVCodecContext *avctx);
|
||||||
|
|
|
@ -406,6 +406,11 @@ static const AVOption options[]={
|
||||||
{"ka", "Karaoke", 0, AV_OPT_TYPE_CONST, {.i64 = AV_AUDIO_SERVICE_TYPE_KARAOKE }, INT_MIN, INT_MAX, A|E, "audio_service_type"},
|
{"ka", "Karaoke", 0, AV_OPT_TYPE_CONST, {.i64 = AV_AUDIO_SERVICE_TYPE_KARAOKE }, INT_MIN, INT_MAX, A|E, "audio_service_type"},
|
||||||
{"request_sample_fmt", "sample format audio decoders should prefer", OFFSET(request_sample_fmt), AV_OPT_TYPE_SAMPLE_FMT, {.i64=AV_SAMPLE_FMT_NONE}, -1, AV_SAMPLE_FMT_NB-1, A|D, "request_sample_fmt"},
|
{"request_sample_fmt", "sample format audio decoders should prefer", OFFSET(request_sample_fmt), AV_OPT_TYPE_SAMPLE_FMT, {.i64=AV_SAMPLE_FMT_NONE}, -1, AV_SAMPLE_FMT_NB-1, A|D, "request_sample_fmt"},
|
||||||
{"pkt_timebase", NULL, OFFSET(pkt_timebase), AV_OPT_TYPE_RATIONAL, {.dbl = 0 }, 0, INT_MAX, 0},
|
{"pkt_timebase", NULL, OFFSET(pkt_timebase), AV_OPT_TYPE_RATIONAL, {.dbl = 0 }, 0, INT_MAX, 0},
|
||||||
|
{"sub_charenc", "set input text subtitles character encoding", OFFSET(sub_charenc), AV_OPT_TYPE_STRING, {.str = NULL}, CHAR_MIN, CHAR_MAX, S|D},
|
||||||
|
{"sub_charenc_mode", "set input text subtitles character encoding mode", OFFSET(sub_charenc_mode), AV_OPT_TYPE_FLAGS, {.i64 = FF_SUB_CHARENC_MODE_AUTOMATIC}, -1, INT_MAX, S|D, "sub_charenc_mode"},
|
||||||
|
{"do_nothing", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_DO_NOTHING}, INT_MIN, INT_MAX, S|D, "sub_charenc_mode"},
|
||||||
|
{"auto", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_AUTOMATIC}, INT_MIN, INT_MAX, S|D, "sub_charenc_mode"},
|
||||||
|
{"pre_decoder", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_PRE_DECODER}, INT_MIN, INT_MAX, S|D, "sub_charenc_mode"},
|
||||||
{NULL},
|
{NULL},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -48,6 +48,9 @@
|
||||||
#include <stdarg.h>
|
#include <stdarg.h>
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
#include <float.h>
|
#include <float.h>
|
||||||
|
#if HAVE_ICONV
|
||||||
|
# include <iconv.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
volatile int ff_avcodec_locked;
|
volatile int ff_avcodec_locked;
|
||||||
static int volatile entangled_thread_counter = 0;
|
static int volatile entangled_thread_counter = 0;
|
||||||
|
@ -1089,6 +1092,32 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
|
||||||
ret = AVERROR(EINVAL);
|
ret = AVERROR(EINVAL);
|
||||||
goto free_and_end;
|
goto free_and_end;
|
||||||
}
|
}
|
||||||
|
if (avctx->sub_charenc) {
|
||||||
|
if (avctx->codec_type != AVMEDIA_TYPE_SUBTITLE) {
|
||||||
|
av_log(avctx, AV_LOG_ERROR, "Character encoding is only "
|
||||||
|
"supported with subtitles codecs\n");
|
||||||
|
ret = AVERROR(EINVAL);
|
||||||
|
goto free_and_end;
|
||||||
|
} else if (avctx->codec_descriptor->props & AV_CODEC_PROP_BITMAP_SUB) {
|
||||||
|
av_log(avctx, AV_LOG_WARNING, "Codec '%s' is bitmap-based, "
|
||||||
|
"subtitles character encoding will be ignored\n",
|
||||||
|
avctx->codec_descriptor->name);
|
||||||
|
avctx->sub_charenc_mode = FF_SUB_CHARENC_MODE_DO_NOTHING;
|
||||||
|
} else {
|
||||||
|
/* input character encoding is set for a text based subtitle
|
||||||
|
* codec at this point */
|
||||||
|
if (avctx->sub_charenc_mode == FF_SUB_CHARENC_MODE_AUTOMATIC)
|
||||||
|
avctx->sub_charenc_mode = FF_SUB_CHARENC_MODE_PRE_DECODER;
|
||||||
|
|
||||||
|
if (!HAVE_ICONV && avctx->sub_charenc_mode == FF_SUB_CHARENC_MODE_PRE_DECODER) {
|
||||||
|
av_log(avctx, AV_LOG_ERROR, "Character encoding subtitles "
|
||||||
|
"conversion needs a libavcodec built with iconv support "
|
||||||
|
"for this codec\n");
|
||||||
|
ret = AVERROR(ENOSYS);
|
||||||
|
goto free_and_end;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
end:
|
end:
|
||||||
ff_unlock_avcodec();
|
ff_unlock_avcodec();
|
||||||
|
@ -1847,6 +1876,68 @@ int attribute_align_arg avcodec_decode_audio4(AVCodecContext *avctx,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define UTF8_MAX_BYTES 4 /* 5 and 6 bytes sequences should not be used */
|
||||||
|
static int recode_subtitle(AVCodecContext *avctx,
|
||||||
|
AVPacket *outpkt, const AVPacket *inpkt)
|
||||||
|
{
|
||||||
|
#if HAVE_ICONV
|
||||||
|
iconv_t cd = (iconv_t)-1;
|
||||||
|
int ret = 0;
|
||||||
|
char *inb, *outb;
|
||||||
|
size_t inl, outl;
|
||||||
|
AVPacket tmp;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (avctx->sub_charenc_mode != FF_SUB_CHARENC_MODE_PRE_DECODER)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
#if HAVE_ICONV
|
||||||
|
cd = iconv_open("UTF-8", avctx->sub_charenc);
|
||||||
|
if (cd == (iconv_t)-1) {
|
||||||
|
av_log(avctx, AV_LOG_ERROR, "Unable to open iconv context "
|
||||||
|
"with input character encoding \"%s\"\n", avctx->sub_charenc);
|
||||||
|
ret = AVERROR(errno);
|
||||||
|
goto end;
|
||||||
|
}
|
||||||
|
|
||||||
|
inb = inpkt->data;
|
||||||
|
inl = inpkt->size;
|
||||||
|
|
||||||
|
if (inl >= INT_MAX / UTF8_MAX_BYTES - FF_INPUT_BUFFER_PADDING_SIZE) {
|
||||||
|
av_log(avctx, AV_LOG_ERROR, "Subtitles packet is too big for recoding\n");
|
||||||
|
ret = AVERROR(ENOMEM);
|
||||||
|
goto end;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = av_new_packet(&tmp, inl * UTF8_MAX_BYTES);
|
||||||
|
if (ret < 0)
|
||||||
|
goto end;
|
||||||
|
outpkt->data = tmp.data;
|
||||||
|
outpkt->size = tmp.size;
|
||||||
|
outb = outpkt->data;
|
||||||
|
outl = outpkt->size;
|
||||||
|
|
||||||
|
if (iconv(cd, &inb, &inl, &outb, &outl) == (size_t)-1 ||
|
||||||
|
iconv(cd, NULL, NULL, &outb, &outl) == (size_t)-1 ||
|
||||||
|
outl >= outpkt->size || inl != 0) {
|
||||||
|
av_log(avctx, AV_LOG_ERROR, "Unable to recode subtitle event \"%s\" "
|
||||||
|
"from %s to UTF-8\n", inpkt->data, avctx->sub_charenc);
|
||||||
|
av_free_packet(&tmp);
|
||||||
|
ret = AVERROR(errno);
|
||||||
|
goto end;
|
||||||
|
}
|
||||||
|
outpkt->size -= outl;
|
||||||
|
outpkt->data[outpkt->size - 1] = '\0';
|
||||||
|
|
||||||
|
end:
|
||||||
|
if (cd != (iconv_t)-1)
|
||||||
|
iconv_close(cd);
|
||||||
|
return ret;
|
||||||
|
#else
|
||||||
|
av_assert0(!"requesting subtitles recoding without iconv");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
|
int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
|
||||||
int *got_sub_ptr,
|
int *got_sub_ptr,
|
||||||
AVPacket *avpkt)
|
AVPacket *avpkt)
|
||||||
|
@ -1862,19 +1953,28 @@ int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
|
||||||
avcodec_get_subtitle_defaults(sub);
|
avcodec_get_subtitle_defaults(sub);
|
||||||
|
|
||||||
if (avpkt->size) {
|
if (avpkt->size) {
|
||||||
|
AVPacket pkt_recoded;
|
||||||
AVPacket tmp = *avpkt;
|
AVPacket tmp = *avpkt;
|
||||||
int did_split = av_packet_split_side_data(&tmp);
|
int did_split = av_packet_split_side_data(&tmp);
|
||||||
//apply_param_change(avctx, &tmp);
|
//apply_param_change(avctx, &tmp);
|
||||||
|
|
||||||
avctx->pkt = &tmp;
|
pkt_recoded = tmp;
|
||||||
|
ret = recode_subtitle(avctx, &pkt_recoded, &tmp);
|
||||||
|
if (ret < 0) {
|
||||||
|
*got_sub_ptr = 0;
|
||||||
|
} else {
|
||||||
|
avctx->pkt = &pkt_recoded;
|
||||||
|
|
||||||
if (avctx->pkt_timebase.den && avpkt->pts != AV_NOPTS_VALUE)
|
if (avctx->pkt_timebase.den && avpkt->pts != AV_NOPTS_VALUE)
|
||||||
sub->pts = av_rescale_q(avpkt->pts,
|
sub->pts = av_rescale_q(avpkt->pts,
|
||||||
avctx->pkt_timebase, AV_TIME_BASE_Q);
|
avctx->pkt_timebase, AV_TIME_BASE_Q);
|
||||||
ret = avctx->codec->decode(avctx, sub, got_sub_ptr, &tmp);
|
ret = avctx->codec->decode(avctx, sub, got_sub_ptr, &pkt_recoded);
|
||||||
|
if (tmp.data != pkt_recoded.data)
|
||||||
|
av_free(pkt_recoded.data);
|
||||||
sub->format = !(avctx->codec_descriptor->props & AV_CODEC_PROP_BITMAP_SUB);
|
sub->format = !(avctx->codec_descriptor->props & AV_CODEC_PROP_BITMAP_SUB);
|
||||||
|
|
||||||
avctx->pkt = NULL;
|
avctx->pkt = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
if (did_split) {
|
if (did_split) {
|
||||||
ff_packet_free_side_data(&tmp);
|
ff_packet_free_side_data(&tmp);
|
||||||
if(ret == tmp.size)
|
if(ret == tmp.size)
|
||||||
|
|
|
@ -29,8 +29,8 @@
|
||||||
#include "libavutil/avutil.h"
|
#include "libavutil/avutil.h"
|
||||||
|
|
||||||
#define LIBAVCODEC_VERSION_MAJOR 54
|
#define LIBAVCODEC_VERSION_MAJOR 54
|
||||||
#define LIBAVCODEC_VERSION_MINOR 91
|
#define LIBAVCODEC_VERSION_MINOR 92
|
||||||
#define LIBAVCODEC_VERSION_MICRO 103
|
#define LIBAVCODEC_VERSION_MICRO 100
|
||||||
|
|
||||||
#define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
|
#define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
|
||||||
LIBAVCODEC_VERSION_MINOR, \
|
LIBAVCODEC_VERSION_MINOR, \
|
||||||
|
|
Loading…
Reference in New Issue