mirror of https://git.ffmpeg.org/ffmpeg.git
movenc: Allow writing timed ID3 metadata
This is based on a spec at https://aomediacodec.github.io/id3-emsg/, further based on ISO/IEC 23009-1:2019. Within libavformat, timed ID3 metadata (already supported by the mpegts demuxer and muxer) is handled as a separate data AVStream with codec type AV_CODEC_ID_TIMED_ID3. However, it doesn't have a corresponding track in the mov file - instead, these events are written as separate toplevel 'emsg' boxes. Signed-off-by: Martin Storsjö <martin@martin.st>
This commit is contained in:
parent
e4e3d25d41
commit
fbd5e238d4
|
@ -5519,7 +5519,7 @@ static int mov_write_ftyp_tag(AVIOContext *pb, AVFormatContext *s)
|
||||||
{
|
{
|
||||||
MOVMuxContext *mov = s->priv_data;
|
MOVMuxContext *mov = s->priv_data;
|
||||||
int64_t pos = avio_tell(pb);
|
int64_t pos = avio_tell(pb);
|
||||||
int has_h264 = 0, has_av1 = 0, has_video = 0, has_dolby = 0;
|
int has_h264 = 0, has_av1 = 0, has_video = 0, has_dolby = 0, has_id3 = 0;
|
||||||
int has_iamf = 0;
|
int has_iamf = 0;
|
||||||
|
|
||||||
#if CONFIG_IAMFENC
|
#if CONFIG_IAMFENC
|
||||||
|
@ -5550,6 +5550,8 @@ static int mov_write_ftyp_tag(AVIOContext *pb, AVFormatContext *s)
|
||||||
st->codecpar->nb_coded_side_data,
|
st->codecpar->nb_coded_side_data,
|
||||||
AV_PKT_DATA_DOVI_CONF))
|
AV_PKT_DATA_DOVI_CONF))
|
||||||
has_dolby = 1;
|
has_dolby = 1;
|
||||||
|
if (st->codecpar->codec_id == AV_CODEC_ID_TIMED_ID3)
|
||||||
|
has_id3 = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
avio_wb32(pb, 0); /* size */
|
avio_wb32(pb, 0); /* size */
|
||||||
|
@ -5629,6 +5631,9 @@ static int mov_write_ftyp_tag(AVIOContext *pb, AVFormatContext *s)
|
||||||
if (mov->flags & FF_MOV_FLAG_DASH && mov->flags & FF_MOV_FLAG_GLOBAL_SIDX)
|
if (mov->flags & FF_MOV_FLAG_DASH && mov->flags & FF_MOV_FLAG_GLOBAL_SIDX)
|
||||||
ffio_wfourcc(pb, "dash");
|
ffio_wfourcc(pb, "dash");
|
||||||
|
|
||||||
|
if (has_id3)
|
||||||
|
ffio_wfourcc(pb, "aid3");
|
||||||
|
|
||||||
return update_size(pb, pos);
|
return update_size(pb, pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6712,6 +6717,34 @@ static int mov_build_iamf_packet(AVFormatContext *s, MOVTrack *trk, AVPacket *pk
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static int mov_write_emsg_tag(AVIOContext *pb, AVStream *st, AVPacket *pkt)
|
||||||
|
{
|
||||||
|
int64_t pos = avio_tell(pb);
|
||||||
|
const char *scheme_id_uri = "https://aomedia.org/emsg/ID3";
|
||||||
|
const char *value = "";
|
||||||
|
|
||||||
|
av_assert0(st->time_base.num == 1);
|
||||||
|
|
||||||
|
avio_write_marker(pb,
|
||||||
|
av_rescale_q(pkt->pts, st->time_base, AV_TIME_BASE_Q),
|
||||||
|
AVIO_DATA_MARKER_BOUNDARY_POINT);
|
||||||
|
|
||||||
|
avio_wb32(pb, 0); /* size */
|
||||||
|
ffio_wfourcc(pb, "emsg");
|
||||||
|
avio_w8(pb, 1); /* version */
|
||||||
|
avio_wb24(pb, 0);
|
||||||
|
avio_wb32(pb, st->time_base.den); /* timescale */
|
||||||
|
avio_wb64(pb, pkt->pts); /* presentation_time */
|
||||||
|
avio_wb32(pb, 0xFFFFFFFFU); /* event_duration */
|
||||||
|
avio_wb32(pb, 0); /* id */
|
||||||
|
/* null terminated UTF8 strings */
|
||||||
|
avio_write(pb, scheme_id_uri, strlen(scheme_id_uri) + 1);
|
||||||
|
avio_write(pb, value, strlen(value) + 1);
|
||||||
|
avio_write(pb, pkt->data, pkt->size);
|
||||||
|
|
||||||
|
return update_size(pb, pos);
|
||||||
|
}
|
||||||
|
|
||||||
static int mov_write_packet(AVFormatContext *s, AVPacket *pkt)
|
static int mov_write_packet(AVFormatContext *s, AVPacket *pkt)
|
||||||
{
|
{
|
||||||
MOVMuxContext *mov = s->priv_data;
|
MOVMuxContext *mov = s->priv_data;
|
||||||
|
@ -6722,6 +6755,11 @@ static int mov_write_packet(AVFormatContext *s, AVPacket *pkt)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (s->streams[pkt->stream_index]->codecpar->codec_id == AV_CODEC_ID_TIMED_ID3) {
|
||||||
|
mov_write_emsg_tag(s->pb, s->streams[pkt->stream_index], pkt);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
trk = s->streams[pkt->stream_index]->priv_data;
|
trk = s->streams[pkt->stream_index]->priv_data;
|
||||||
|
|
||||||
#if CONFIG_IAMFENC
|
#if CONFIG_IAMFENC
|
||||||
|
@ -7381,6 +7419,12 @@ static int mov_init(AVFormatContext *s)
|
||||||
AVStream *st = s->streams[i];
|
AVStream *st = s->streams[i];
|
||||||
if (st->priv_data)
|
if (st->priv_data)
|
||||||
continue;
|
continue;
|
||||||
|
// Don't produce a track in the output file for timed ID3 streams.
|
||||||
|
if (st->codecpar->codec_id == AV_CODEC_ID_TIMED_ID3) {
|
||||||
|
// Leave priv_data set to NULL for these AVStreams that don't
|
||||||
|
// have a corresponding track.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
st->priv_data = st;
|
st->priv_data = st;
|
||||||
mov->nb_tracks++;
|
mov->nb_tracks++;
|
||||||
}
|
}
|
||||||
|
@ -7480,6 +7524,9 @@ static int mov_init(AVFormatContext *s)
|
||||||
MOVTrack *track = st->priv_data;
|
MOVTrack *track = st->priv_data;
|
||||||
AVDictionaryEntry *lang = av_dict_get(st->metadata, "language", NULL,0);
|
AVDictionaryEntry *lang = av_dict_get(st->metadata, "language", NULL,0);
|
||||||
|
|
||||||
|
if (!track)
|
||||||
|
continue;
|
||||||
|
|
||||||
if (!track->st) {
|
if (!track->st) {
|
||||||
track->st = st;
|
track->st = st;
|
||||||
track->par = st->codecpar;
|
track->par = st->codecpar;
|
||||||
|
|
|
@ -58,7 +58,7 @@ struct AVMD5* md5;
|
||||||
uint8_t hash[HASH_SIZE];
|
uint8_t hash[HASH_SIZE];
|
||||||
|
|
||||||
AVPacket *pkt;
|
AVPacket *pkt;
|
||||||
AVStream *video_st, *audio_st;
|
AVStream *video_st, *audio_st, *id3_st;
|
||||||
int64_t audio_dts, video_dts;
|
int64_t audio_dts, video_dts;
|
||||||
|
|
||||||
int bframes;
|
int bframes;
|
||||||
|
@ -177,7 +177,7 @@ static void check_func(int value, int line, const char *msg, ...)
|
||||||
}
|
}
|
||||||
#define check(value, ...) check_func(value, __LINE__, __VA_ARGS__)
|
#define check(value, ...) check_func(value, __LINE__, __VA_ARGS__)
|
||||||
|
|
||||||
static void init_fps(int bf, int audio_preroll, int fps)
|
static void init_fps(int bf, int audio_preroll, int fps, int id3)
|
||||||
{
|
{
|
||||||
AVStream *st;
|
AVStream *st;
|
||||||
int iobuf_size = force_iobuf_size ? force_iobuf_size : sizeof(iobuf);
|
int iobuf_size = force_iobuf_size ? force_iobuf_size : sizeof(iobuf);
|
||||||
|
@ -225,6 +225,17 @@ static void init_fps(int bf, int audio_preroll, int fps)
|
||||||
memcpy(st->codecpar->extradata, aac_extradata, sizeof(aac_extradata));
|
memcpy(st->codecpar->extradata, aac_extradata, sizeof(aac_extradata));
|
||||||
audio_st = st;
|
audio_st = st;
|
||||||
|
|
||||||
|
if (id3) {
|
||||||
|
st = avformat_new_stream(ctx, NULL);
|
||||||
|
if (!st)
|
||||||
|
exit(1);
|
||||||
|
st->codecpar->codec_type = AVMEDIA_TYPE_DATA;
|
||||||
|
st->codecpar->codec_id = AV_CODEC_ID_TIMED_ID3;
|
||||||
|
st->time_base.num = 1;
|
||||||
|
st->time_base.den = 1000;
|
||||||
|
id3_st = st;
|
||||||
|
}
|
||||||
|
|
||||||
if (avformat_write_header(ctx, &opts) < 0)
|
if (avformat_write_header(ctx, &opts) < 0)
|
||||||
exit(1);
|
exit(1);
|
||||||
av_dict_free(&opts);
|
av_dict_free(&opts);
|
||||||
|
@ -243,7 +254,7 @@ static void init_fps(int bf, int audio_preroll, int fps)
|
||||||
|
|
||||||
static void init(int bf, int audio_preroll)
|
static void init(int bf, int audio_preroll)
|
||||||
{
|
{
|
||||||
init_fps(bf, audio_preroll, 30);
|
init_fps(bf, audio_preroll, 30, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mux_frames(int n, int c)
|
static void mux_frames(int n, int c)
|
||||||
|
@ -314,6 +325,23 @@ static void mux_frames(int n, int c)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void mux_id3(void)
|
||||||
|
{
|
||||||
|
uint8_t pktdata[8] = { 0 };
|
||||||
|
av_packet_unref(pkt);
|
||||||
|
|
||||||
|
pkt->dts = pkt->pts = av_rescale_q(video_dts + (bframes ? duration : 0),
|
||||||
|
video_st->time_base, id3_st->time_base);
|
||||||
|
pkt->stream_index = id3_st->index;
|
||||||
|
pkt->duration = 0;
|
||||||
|
|
||||||
|
AV_WB32(pktdata + 4, pkt->pts);
|
||||||
|
pkt->data = pktdata;
|
||||||
|
pkt->size = 8;
|
||||||
|
|
||||||
|
av_write_frame(ctx, pkt);
|
||||||
|
}
|
||||||
|
|
||||||
static void mux_gops(int n)
|
static void mux_gops(int n)
|
||||||
{
|
{
|
||||||
mux_frames(gop_size * n, 0);
|
mux_frames(gop_size * n, 0);
|
||||||
|
@ -702,7 +730,7 @@ int main(int argc, char **argv)
|
||||||
// by the edit list.
|
// by the edit list.
|
||||||
init_out("vfr");
|
init_out("vfr");
|
||||||
av_dict_set(&opts, "movflags", "+frag_keyframe+delay_moov+dash", 0);
|
av_dict_set(&opts, "movflags", "+frag_keyframe+delay_moov+dash", 0);
|
||||||
init_fps(1, 1, 3);
|
init_fps(1, 1, 3, 0);
|
||||||
mux_frames(gop_size/2, 0);
|
mux_frames(gop_size/2, 0);
|
||||||
duration /= 10;
|
duration /= 10;
|
||||||
mux_frames(gop_size/2, 0);
|
mux_frames(gop_size/2, 0);
|
||||||
|
@ -721,7 +749,7 @@ int main(int argc, char **argv)
|
||||||
clear_duration = 1;
|
clear_duration = 1;
|
||||||
init_out("vfr-noduration");
|
init_out("vfr-noduration");
|
||||||
av_dict_set(&opts, "movflags", "+frag_keyframe+delay_moov+dash", 0);
|
av_dict_set(&opts, "movflags", "+frag_keyframe+delay_moov+dash", 0);
|
||||||
init_fps(1, 1, 3);
|
init_fps(1, 1, 3, 0);
|
||||||
mux_frames(gop_size/2, 0);
|
mux_frames(gop_size/2, 0);
|
||||||
duration /= 10;
|
duration /= 10;
|
||||||
mux_frames(gop_size/2, 0);
|
mux_frames(gop_size/2, 0);
|
||||||
|
@ -737,7 +765,7 @@ int main(int argc, char **argv)
|
||||||
force_iobuf_size = 1500;
|
force_iobuf_size = 1500;
|
||||||
init_out("large_frag");
|
init_out("large_frag");
|
||||||
av_dict_set(&opts, "movflags", "+frag_keyframe+delay_moov", 0);
|
av_dict_set(&opts, "movflags", "+frag_keyframe+delay_moov", 0);
|
||||||
init_fps(1, 1, 3);
|
init_fps(1, 1, 3, 0);
|
||||||
mux_gops(2);
|
mux_gops(2);
|
||||||
finish();
|
finish();
|
||||||
close_out();
|
close_out();
|
||||||
|
@ -751,7 +779,7 @@ int main(int argc, char **argv)
|
||||||
init_out("vfr-noduration-interleave");
|
init_out("vfr-noduration-interleave");
|
||||||
av_dict_set(&opts, "movflags", "+frag_keyframe+delay_moov", 0);
|
av_dict_set(&opts, "movflags", "+frag_keyframe+delay_moov", 0);
|
||||||
av_dict_set(&opts, "frag_duration", "650000", 0);
|
av_dict_set(&opts, "frag_duration", "650000", 0);
|
||||||
init_fps(1, 1, 30);
|
init_fps(1, 1, 30, 0);
|
||||||
mux_frames(gop_size/2, 0);
|
mux_frames(gop_size/2, 0);
|
||||||
// Pretend that the packet duration is the normal, even if
|
// Pretend that the packet duration is the normal, even if
|
||||||
// we actually skip a bunch of frames. (I.e., simulate that
|
// we actually skip a bunch of frames. (I.e., simulate that
|
||||||
|
@ -788,6 +816,19 @@ int main(int argc, char **argv)
|
||||||
finish();
|
finish();
|
||||||
close_out();
|
close_out();
|
||||||
|
|
||||||
|
// Write a manually fragmented file, with timed ID3 packets at the head
|
||||||
|
// of each fragment.
|
||||||
|
init_out("emsg");
|
||||||
|
av_dict_set(&opts, "movflags", "+frag_custom+cmaf", 0);
|
||||||
|
init_fps(1, 0, 30, 1);
|
||||||
|
mux_id3();
|
||||||
|
mux_gops(2);
|
||||||
|
av_write_frame(ctx, NULL); // Flush fragment.
|
||||||
|
mux_id3();
|
||||||
|
mux_gops(2);
|
||||||
|
finish();
|
||||||
|
close_out();
|
||||||
|
|
||||||
av_free(md5);
|
av_free(md5);
|
||||||
av_packet_free(&pkt);
|
av_packet_free(&pkt);
|
||||||
|
|
||||||
|
|
|
@ -151,3 +151,11 @@ write_data len 900, time 0, type sync atom moof
|
||||||
write_data len 908, time 1000000, type sync atom moof
|
write_data len 908, time 1000000, type sync atom moof
|
||||||
write_data len 148, time nopts, type trailer atom -
|
write_data len 148, time nopts, type trailer atom -
|
||||||
3be575022e446855bca1e45b7942cc0c 3115 empty-moov-neg-cts
|
3be575022e446855bca1e45b7942cc0c 3115 empty-moov-neg-cts
|
||||||
|
write_data len 28, time nopts, type header atom ftyp
|
||||||
|
write_data len 1123, time nopts, type header atom -
|
||||||
|
write_data len 70, time 0, type boundary atom emsg
|
||||||
|
write_data len 1832, time 0, type sync atom moof
|
||||||
|
write_data len 70, time 2000000, type boundary atom emsg
|
||||||
|
write_data len 1840, time 2000000, type sync atom moof
|
||||||
|
write_data len 148, time nopts, type trailer atom -
|
||||||
|
b72c56c795693820b156f452354a51ff 5111 emsg
|
||||||
|
|
Loading…
Reference in New Issue