lavf/dv: use a more granular timebase for audio

One that is fine enough to represent all DV audio sample rates. Audio
packet durations are now sample-accurate.

This largely undoes commit 76fbb0052d. To
avoid breaking the issue fixed by that commit, resync audio timestamps
against video if they get more than one frame apart. The sample from
issue #8762 still works correctly after this commit.

Slightly changes the results of the lavf-dv seektest, due to the audio
timebase being more granular.
This commit is contained in:
Anton Khirnov 2023-04-24 16:47:50 +02:00
parent aae5ba31ca
commit 0ed1eeb744
3 changed files with 35 additions and 11 deletions

View File

@ -63,6 +63,9 @@ enum DVPackType {
// LCM of video framerate numerators
#define DV_TIMESCALE_VIDEO 60000
// LCM of audio sample rates
#define DV_TIMESCALE_AUDIO 14112000
/**
* maximum number of blocks per macroblock in any DV format
*/

View File

@ -71,6 +71,7 @@ struct DVDemuxContext {
int frames;
int64_t next_pts_video;
int64_t next_pts_audio;
};
static inline uint16_t dv_audio_12to16(uint16_t sample)
@ -282,7 +283,7 @@ static int dv_extract_audio_info(DVDemuxContext *c, const uint8_t *frame)
if (!c->ast[i])
return AVERROR(ENOMEM);
avpriv_set_pts_info(c->ast[i], 64, c->sys->time_base.num, c->sys->time_base.den);
avpriv_set_pts_info(c->ast[i], 64, 1, DV_TIMESCALE_AUDIO);
c->ast[i]->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
c->ast[i]->codecpar->codec_id = AV_CODEC_ID_PCM_S16LE;
c->ast[i]->codecpar->ch_layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO;
@ -421,6 +422,7 @@ int avpriv_dv_get_packet(DVDemuxContext *c, AVPacket *pkt)
int avpriv_dv_produce_packet(DVDemuxContext *c, AVPacket *pkt,
uint8_t *buf, int buf_size, int64_t pos)
{
int64_t pts, duration;
int size, i;
uint8_t *ppcm[5] = { 0 };
@ -436,13 +438,30 @@ int avpriv_dv_produce_packet(DVDemuxContext *c, AVPacket *pkt,
if (size < 0)
return size;
if (c->ach) {
int64_t next_pts_video = av_rescale_q(c->next_pts_video, c->vst->time_base,
c->ast[0]->time_base);
duration = av_rescale_q(size / 4,
(AVRational){ 1, c->audio_pkt[0].sample_rate },
c->ast[0]->time_base);
// if audio timestamps are more than one frame away from video,
// assume desync happened (e.g. due to dropped audio frames) and
// resynchronize
pts = (FFABS(next_pts_video - c->next_pts_audio) >= duration) ?
next_pts_video : c->next_pts_audio;
c->next_pts_audio = pts + duration;
}
for (i = 0; i < c->ach; i++) {
DVPacket *dpkt = &c->audio_pkt[i];
dpkt->pos = pos;
dpkt->size = size;
dpkt->pts = (c->sys->height == 720) ? (c->frames & ~1) : c->frames;
dpkt->duration = 1;
dpkt->pts = pts;
dpkt->duration = duration;
ppcm[i] = c->audio_buf[i];
}
@ -507,6 +526,8 @@ void ff_dv_ts_reset(DVDemuxContext *c, int64_t ts)
c->frames = !c->sys ? 0 :
av_rescale_q(ts, c->vst->time_base, c->sys->time_base);
c->next_pts_video = ts;
c->next_pts_audio = (!c->sys || !c->ast[0]) ? AV_NOPTS_VALUE :
av_rescale_q(ts, c->vst->time_base, c->ast[0]->time_base);
c->audio_pkt[0].size = c->audio_pkt[1].size = 0;
c->audio_pkt[2].size = c->audio_pkt[3].size = 0;

View File

@ -7,9 +7,9 @@ ret: 0 st: 0 flags:0 ts: 0.788333
ret: 0 st: 0 flags:1 dts: 0.800000 pts: 0.800000 pos:2880000 size:144000
ret: 0 st: 0 flags:1 ts:-0.317500
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 0 size:144000
ret: 0 st: 1 flags:0 ts: 2.560000
ret: 0 st: 1 flags:0 ts: 2.576668
ret: 0 st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:3456000 size:144000
ret: 0 st: 1 flags:1 ts: 1.480000
ret: 0 st: 1 flags:1 ts: 1.470835
ret: 0 st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:3456000 size:144000
ret: 0 st:-1 flags:0 ts: 0.365002
ret: 0 st: 0 flags:1 dts: 0.360000 pts: 0.360000 pos:1296000 size:144000
@ -19,9 +19,9 @@ ret: 0 st: 0 flags:0 ts: 2.153333
ret: 0 st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:3456000 size:144000
ret: 0 st: 0 flags:1 ts: 1.047500
ret: 0 st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:3456000 size:144000
ret: 0 st: 1 flags:0 ts:-0.040000
ret: 0 st: 1 flags:0 ts:-0.058330
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 0 size:144000
ret: 0 st: 1 flags:1 ts: 2.840000
ret: 0 st: 1 flags:1 ts: 2.835837
ret: 0 st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:3456000 size:144000
ret: 0 st:-1 flags:0 ts: 1.730004
ret: 0 st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:3456000 size:144000
@ -31,9 +31,9 @@ ret: 0 st: 0 flags:0 ts:-0.481667
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 0 size:144000
ret: 0 st: 0 flags:1 ts: 2.412500
ret: 0 st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:3456000 size:144000
ret: 0 st: 1 flags:0 ts: 1.320000
ret: 0 st: 1 flags:0 ts: 1.306672
ret: 0 st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:3456000 size:144000
ret: 0 st: 1 flags:1 ts: 0.200000
ret: 0 st: 1 flags:1 ts: 0.200839
ret: 0 st: 0 flags:1 dts: 0.200000 pts: 0.200000 pos: 720000 size:144000
ret: 0 st:-1 flags:0 ts:-0.904994
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 0 size:144000
@ -43,9 +43,9 @@ ret: 0 st: 0 flags:0 ts: 0.883333
ret: 0 st: 0 flags:1 dts: 0.880000 pts: 0.880000 pos:3168000 size:144000
ret: 0 st: 0 flags:1 ts:-0.222500
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 0 size:144000
ret: 0 st: 1 flags:0 ts: 2.680000
ret: 0 st: 1 flags:0 ts: 2.671674
ret: 0 st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:3456000 size:144000
ret: 0 st: 1 flags:1 ts: 1.560000
ret: 0 st: 1 flags:1 ts: 1.565841
ret: 0 st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:3456000 size:144000
ret: 0 st:-1 flags:0 ts: 0.460008
ret: 0 st: 0 flags:1 dts: 0.480000 pts: 0.480000 pos:1728000 size:144000