ffmpeg: add video heartbeat capability to fix_sub_duration

Splits the currently handled subtitle at random access point
packets that can be configured to follow a specific output stream.
Currently only subtitle streams which are directly mapped into the
same output in which the heartbeat stream resides are affected.

This way the subtitle - which is known to be shown at this time
can be split and passed to muxer before its full duration is
yet known. This is also a drawback, as this essentially outputs
multiple subtitles from a single input subtitle that continues
over multiple random access points. Thus this feature should not
be utilized in cases where subtitle output latency does not matter.

Co-authored-by: Andrzej Nadachowski <andrzej.nadachowski@24i.com>
Co-authored-by: Bernard Boulay <bernard.boulay@24i.com>

Signed-off-by: Jan Ekström <jan.ekstrom@24i.com>
This commit is contained in:
Jan Ekström 2022-07-22 13:57:54 +03:00 committed by Jan Ekström
parent 746d27455b
commit 9a820ec8b1
8 changed files with 245 additions and 0 deletions

View File

@ -36,6 +36,7 @@ version <next>:
- hstack_vaapi, vstack_vaapi and xstack_vaapi filters
- XMD ADPCM decoder and demuxer
- media100 to mjpegb bsf
- ffmpeg CLI new option: -fix_sub_duration_heartbeat
version 5.1:

View File

@ -1342,6 +1342,22 @@ List all hardware acceleration components enabled in this build of ffmpeg.
Actual runtime availability depends on the hardware and its suitable driver
being installed.
@item -fix_sub_duration_heartbeat[:@var{stream_specifier}]
Set a specific output video stream as the heartbeat stream according to which
to split and push through currently in-progress subtitle upon receipt of a
random access packet.
This lowers the latency of subtitles for which the end packet or the following
subtitle has not yet been received. As a drawback, this will most likely lead
to duplication of subtitle events in order to cover the full duration, so
when dealing with use cases where latency of when the subtitle event is passed
on to output is not relevant this option should not be utilized.
Requires @option{-fix_sub_duration} to be set for the relevant input subtitle
stream for this to have any effect, as well as for the input subtitle stream
having to be directly mapped to the same output in which the heartbeat stream
resides.
@end table
@section Audio Options

View File

@ -126,6 +126,7 @@ typedef struct BenchmarkTimeStamps {
int64_t sys_usec;
} BenchmarkTimeStamps;
static int trigger_fix_sub_duration_heartbeat(OutputStream *ost, const AVPacket *pkt);
static BenchmarkTimeStamps get_benchmark_time_stamps(void);
static int64_t getmaxrss(void);
static int ifilter_has_all_input_formats(FilterGraph *fg);
@ -953,6 +954,13 @@ static int encode_frame(OutputFile *of, OutputStream *ost, AVFrame *frame)
av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, &enc->time_base));
}
if ((ret = trigger_fix_sub_duration_heartbeat(ost, pkt)) < 0) {
av_log(NULL, AV_LOG_ERROR,
"Subtitle heartbeat logic failed in %s! (%s)\n",
__func__, av_err2str(ret));
exit_program(1);
}
ost->data_size_enc += pkt->size;
ost->packets_encoded++;
@ -1912,6 +1920,16 @@ static void do_streamcopy(InputStream *ist, OutputStream *ost, const AVPacket *p
opkt->duration = av_rescale_q(pkt->duration, ist->st->time_base, ost->mux_timebase);
{
int ret = trigger_fix_sub_duration_heartbeat(ost, pkt);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR,
"Subtitle heartbeat logic failed in %s! (%s)\n",
__func__, av_err2str(ret));
exit_program(1);
}
}
of_output_packet(of, opkt, ost, 0);
ost->streamcopy_started = 1;
@ -2355,6 +2373,136 @@ out:
return ret;
}
static int copy_av_subtitle(AVSubtitle *dst, AVSubtitle *src)
{
int ret = AVERROR_BUG;
AVSubtitle tmp = {
.format = src->format,
.start_display_time = src->start_display_time,
.end_display_time = src->end_display_time,
.num_rects = 0,
.rects = NULL,
.pts = src->pts
};
if (!src->num_rects)
goto success;
if (!(tmp.rects = av_calloc(src->num_rects, sizeof(*tmp.rects))))
return AVERROR(ENOMEM);
for (int i = 0; i < src->num_rects; i++) {
AVSubtitleRect *src_rect = src->rects[i];
AVSubtitleRect *dst_rect;
if (!(dst_rect = tmp.rects[i] = av_mallocz(sizeof(*tmp.rects[0])))) {
ret = AVERROR(ENOMEM);
goto cleanup;
}
tmp.num_rects++;
dst_rect->type = src_rect->type;
dst_rect->flags = src_rect->flags;
dst_rect->x = src_rect->x;
dst_rect->y = src_rect->y;
dst_rect->w = src_rect->w;
dst_rect->h = src_rect->h;
dst_rect->nb_colors = src_rect->nb_colors;
if (src_rect->text)
if (!(dst_rect->text = av_strdup(src_rect->text))) {
ret = AVERROR(ENOMEM);
goto cleanup;
}
if (src_rect->ass)
if (!(dst_rect->ass = av_strdup(src_rect->ass))) {
ret = AVERROR(ENOMEM);
goto cleanup;
}
for (int j = 0; j < 4; j++) {
// SUBTITLE_BITMAP images are special in the sense that they
// are like PAL8 images. first pointer to data, second to
// palette. This makes the size calculation match this.
size_t buf_size = src_rect->type == SUBTITLE_BITMAP && j == 1 ?
AVPALETTE_SIZE :
src_rect->h * src_rect->linesize[j];
if (!src_rect->data[j])
continue;
if (!(dst_rect->data[j] = av_memdup(src_rect->data[j], buf_size))) {
ret = AVERROR(ENOMEM);
goto cleanup;
}
dst_rect->linesize[j] = src_rect->linesize[j];
}
}
success:
*dst = tmp;
return 0;
cleanup:
avsubtitle_free(&tmp);
return ret;
}
static int fix_sub_duration_heartbeat(InputStream *ist, int64_t signal_pts)
{
int ret = AVERROR_BUG;
int got_output = 1;
AVSubtitle *prev_subtitle = &ist->prev_sub.subtitle;
AVSubtitle subtitle;
if (!ist->fix_sub_duration || !prev_subtitle->num_rects ||
signal_pts <= prev_subtitle->pts)
return 0;
if ((ret = copy_av_subtitle(&subtitle, prev_subtitle)) < 0)
return ret;
subtitle.pts = signal_pts;
return process_subtitle(ist, &subtitle, &got_output);
}
static int trigger_fix_sub_duration_heartbeat(OutputStream *ost, const AVPacket *pkt)
{
OutputFile *of = output_files[ost->file_index];
int64_t signal_pts = av_rescale_q(pkt->pts, ost->mux_timebase,
AV_TIME_BASE_Q);
if (!ost->fix_sub_duration_heartbeat || !(pkt->flags & AV_PKT_FLAG_KEY))
// we are only interested in heartbeats on streams configured, and
// only on random access points.
return 0;
for (int i = 0; i < of->nb_streams; i++) {
OutputStream *iter_ost = of->streams[i];
InputStream *ist = iter_ost->ist;
int ret = AVERROR_BUG;
if (iter_ost == ost || !ist || !ist->decoding_needed ||
ist->dec_ctx->codec_type != AVMEDIA_TYPE_SUBTITLE)
// We wish to skip the stream that causes the heartbeat,
// output streams without an input stream, streams not decoded
// (as fix_sub_duration is only done for decoded subtitles) as
// well as non-subtitle streams.
continue;
if ((ret = fix_sub_duration_heartbeat(ist, signal_pts)) < 0)
return ret;
}
return 0;
}
static int transcode_subtitles(InputStream *ist, AVPacket *pkt, int *got_output,
int *decode_failed)
{

View File

@ -224,6 +224,8 @@ typedef struct OptionsContext {
int nb_reinit_filters;
SpecifierOpt *fix_sub_duration;
int nb_fix_sub_duration;
SpecifierOpt *fix_sub_duration_heartbeat;
int nb_fix_sub_duration_heartbeat;
SpecifierOpt *canvas_sizes;
int nb_canvas_sizes;
SpecifierOpt *pass;
@ -675,6 +677,12 @@ typedef struct OutputStream {
EncStats enc_stats_pre;
EncStats enc_stats_post;
/*
* bool on whether this stream should be utilized for splitting
* subtitles utilizing fix_sub_duration at random access points.
*/
unsigned int fix_sub_duration_heartbeat;
} OutputStream;
typedef struct OutputFile {

View File

@ -61,6 +61,7 @@ static const char *const opt_name_enc_stats_pre_fmt[] = {"enc_stats_pre_
static const char *const opt_name_enc_stats_post_fmt[] = {"enc_stats_post_fmt", NULL};
static const char *const opt_name_filters[] = {"filter", "af", "vf", NULL};
static const char *const opt_name_filter_scripts[] = {"filter_script", NULL};
static const char *const opt_name_fix_sub_duration_heartbeat[] = {"fix_sub_duration_heartbeat", NULL};
static const char *const opt_name_fps_mode[] = {"fps_mode", NULL};
static const char *const opt_name_force_fps[] = {"force_fps", NULL};
static const char *const opt_name_forced_key_frames[] = {"forced_key_frames", NULL};
@ -614,6 +615,9 @@ static OutputStream *new_output_stream(Muxer *mux, const OptionsContext *o,
MATCH_PER_STREAM_OPT(bits_per_raw_sample, i, ost->bits_per_raw_sample,
oc, st);
MATCH_PER_STREAM_OPT(fix_sub_duration_heartbeat, i, ost->fix_sub_duration_heartbeat,
oc, st);
if (oc->oformat->flags & AVFMT_GLOBALHEADER && ost->enc_ctx)
ost->enc_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;

View File

@ -1658,6 +1658,11 @@ const OptionDef options[] = {
{ "autoscale", HAS_ARG | OPT_BOOL | OPT_SPEC |
OPT_EXPERT | OPT_OUTPUT, { .off = OFFSET(autoscale) },
"automatically insert a scale filter at the end of the filter graph" },
{ "fix_sub_duration_heartbeat", OPT_VIDEO | OPT_BOOL | OPT_EXPERT |
OPT_SPEC | OPT_OUTPUT, { .off = OFFSET(fix_sub_duration_heartbeat) },
"set this video output stream to be a heartbeat stream for "
"fix_sub_duration, according to which subtitles should be split at "
"random access points" },
/* audio options */
{ "aframes", OPT_AUDIO | HAS_ARG | OPT_PERFILE | OPT_OUTPUT, { .func_arg = opt_audio_frames },

View File

@ -117,6 +117,21 @@ fate-ffmpeg-fix_sub_duration: CMD = fmtstdout srt -fix_sub_duration \
-real_time 1 -f lavfi \
-i "movie=$(TARGET_SAMPLES)/sub/Closedcaption_rollup.m2v[out0+subcc]"
# Basic test for fix_sub_duration_heartbeat, which causes a buffered subtitle
# to be pushed out when a video keyframe is received from an encoder.
FATE_SAMPLES_FFMPEG-$(call FILTERDEMDECENCMUX, MOVIE, MPEGVIDEO, \
MPEG2VIDEO, SUBRIP, SRT, LAVFI_INDEV \
MPEGVIDEO_PARSER CCAPTION_DECODER \
MPEG2VIDEO_ENCODER NULL_MUXER PIPE_PROTOCOL) \
+= fate-ffmpeg-fix_sub_duration_heartbeat
fate-ffmpeg-fix_sub_duration_heartbeat: CMD = fmtstdout srt -fix_sub_duration \
-real_time 1 -f lavfi \
-i "movie=$(TARGET_SAMPLES)/sub/Closedcaption_rollup.m2v[out0+subcc]" \
-map 0:v -map 0:s -fix_sub_duration_heartbeat:v:0 \
-c:v mpeg2video -b:v 2M -g 30 -sc_threshold 1000000000 \
-c:s srt \
-f null -
FATE_STREAMCOPY-$(call REMUX, MP4 MOV, EAC3_DEMUXER) += fate-copy-trac3074
fate-copy-trac3074: CMD = transcode eac3 $(TARGET_SAMPLES)/eac3/csi_miami_stereo_128_spx.eac3\
mp4 "-codec copy -map 0" "-codec copy"

View File

@ -0,0 +1,48 @@
1
00:00:00,968 --> 00:00:01,001
<font face="Monospace">{\an7}(</font>
2
00:00:01,001 --> 00:00:01,168
<font face="Monospace">{\an7}(</font>
3
00:00:01,168 --> 00:00:01,368
<font face="Monospace">{\an7}(<i> inaudibl</i></font>
4
00:00:01,368 --> 00:00:01,568
<font face="Monospace">{\an7}(<i> inaudible radio chat</i></font>
5
00:00:01,568 --> 00:00:02,002
<font face="Monospace">{\an7}(<i> inaudible radio chatter</i> )</font>
6
00:00:02,002 --> 00:00:03,003
<font face="Monospace">{\an7}(<i> inaudible radio chatter</i> )</font>
7
00:00:03,003 --> 00:00:03,103
<font face="Monospace">{\an7}(<i> inaudible radio chatter</i> )</font>
8
00:00:03,103 --> 00:00:03,303
<font face="Monospace">{\an7}(<i> inaudible radio chatter</i> )
>></font>
9
00:00:03,303 --> 00:00:03,503
<font face="Monospace">{\an7}(<i> inaudible radio chatter</i> )
>> Safety rema</font>
10
00:00:03,504 --> 00:00:03,704
<font face="Monospace">{\an7}(<i> inaudible radio chatter</i> )
>> Safety remains our numb</font>
11
00:00:03,704 --> 00:00:04,004
<font face="Monospace">{\an7}(<i> inaudible radio chatter</i> )
>> Safety remains our number one</font>