swresample: limit output size of audio frames

Similar to the previous commit, and for the same reasons. Unlike with
af_scaletempo, resampling does not have a natural frame size, so we set
an arbitrary size limit on output frames. We add a new option to control
this size, although I'm not sure whether anyone will use it, so mark it
for testing only.

Note that we go through some effort to avoid buffering data in
libswresample itself. One reason is that we might have to reinitialize
the resampler completely when changing speed, which drops the buffered
data. Another is that I'm not sure whether the resampler will do the
right thing when applying dynamic speed changes.
This commit is contained in:
wm4 2018-02-01 08:39:07 +01:00 committed by Kevin Mitchell
parent 171ec0a7e4
commit 7019e0dcfe
No known key found for this signature in database
GPG Key ID: 559A34B46A917232
3 changed files with 67 additions and 35 deletions

View File

@ -3496,6 +3496,15 @@ It also sets the defaults for the ``lavrresample`` audio filter.
(decoder downmixing), or in the audio output (system mixer), this has no
effect.
``--audio-resample-max-output-size=<length>``
Limit maximum size of audio frames filtered at once, in ms (default: 40).
The output size size is limited in order to make resample speed changes
react faster. This is necessary especially if decoders or filters output
very large frame sizes (like some lossless codecs or some DRC filters).
This option does not affect the resampling algorithm in any way.
For testing/debugging only. Can be removed or changed any time.
``--audio-swresample-o=<string>``
Set AVOptions on the SwrContext or AVAudioResampleContext. These should
be documented by FFmpeg or Libav.

View File

@ -80,6 +80,7 @@ struct priv {
struct mp_chmap out_channels;
double current_pts;
struct mp_aframe *input;
double cmd_speed;
double speed;
@ -96,6 +97,7 @@ const struct m_sub_options resample_conf = {
OPT_DOUBLE("audio-resample-cutoff", cutoff, M_OPT_RANGE,
.min = 0, .max = 1),
OPT_FLAG("audio-normalize-downmix", normalize, 0),
OPT_DOUBLE("audio-resample-max-output-size", max_output_frame_size, 0),
OPT_KEYVALUELIST("audio-swresample-o", avopts, 0),
{0}
},
@ -357,6 +359,7 @@ static void reset(struct mp_filter *f)
struct priv *p = f->priv;
p->current_pts = MP_NOPTS_VALUE;
TA_FREEP(&p->input);
if (!p->avrctx)
return;
@ -426,7 +429,8 @@ static bool reorder_planes(struct mp_aframe *mpa, int *reorder,
}
static int resample_frame(struct AVAudioResampleContext *r,
struct mp_aframe *out, struct mp_aframe *in)
struct mp_aframe *out, struct mp_aframe *in,
int consume_in)
{
// Be aware that the channel layout and count can be different for in and
// out frames. In some situations the caller will fix up the frames before
@ -439,7 +443,7 @@ static int resample_frame(struct AVAudioResampleContext *r,
av_o ? av_o->nb_samples : 0,
av_i ? av_i->extended_data : NULL,
av_i ? av_i->linesize[0] : 0,
av_i ? av_i->nb_samples : 0);
av_i ? MPMIN(av_i->nb_samples, consume_in) : 0);
}
static struct mp_frame filter_resample_output(struct priv *p,
@ -450,7 +454,15 @@ static struct mp_frame filter_resample_output(struct priv *p,
if (!p->avrctx)
goto error;
int samples = get_out_samples(p, in ? mp_aframe_get_size(in) : 0);
// Limit the filtered data size for better latency when changing speed.
// Avoid buffering data within the resampler => restrict input size.
// p->in_rate already includes the speed factor.
double s = p->opts->max_output_frame_size / 1000 * p->in_rate;
int max_in = lrint(MPCLAMP(s, 128, INT_MAX));
int consume_in = in ? mp_aframe_get_size(in) : 0;
consume_in = MPMIN(consume_in, max_in);
int samples = get_out_samples(p, consume_in);
out = mp_aframe_create();
mp_aframe_config_copy(out, p->pool_fmt);
if (mp_aframe_pool_allocate(p->out_pool, out, samples) < 0)
@ -458,7 +470,7 @@ static struct mp_frame filter_resample_output(struct priv *p,
int out_samples = 0;
if (samples) {
out_samples = resample_frame(p->avrctx, out, in);
out_samples = resample_frame(p->avrctx, out, in, consume_in);
if (out_samples < 0 || out_samples > samples)
goto error;
mp_aframe_set_size(out, out_samples);
@ -479,7 +491,7 @@ static struct mp_frame filter_resample_output(struct priv *p,
}
int got = 0;
if (out_samples)
got = resample_frame(p->avrctx_out, new, out);
got = resample_frame(p->avrctx_out, new, out, out_samples);
talloc_free(out);
out = new;
if (got != out_samples)
@ -491,12 +503,14 @@ static struct mp_frame filter_resample_output(struct priv *p,
if (in) {
mp_aframe_copy_attributes(out, in);
p->current_pts = mp_aframe_end_pts(in);
mp_aframe_skip_samples(in, consume_in);
}
if (out_samples) {
if (p->current_pts != MP_NOPTS_VALUE) {
double delay = get_delay(p) * mp_aframe_get_speed(out) +
mp_aframe_duration(out);
mp_aframe_duration(out) +
(p->input ? mp_aframe_duration(p->input) : 0);
mp_aframe_set_pts(out, p->current_pts - delay);
mp_aframe_mul_speed(out, p->speed);
}
@ -516,16 +530,19 @@ static void process(struct mp_filter *f)
{
struct priv *p = f->priv;
if (!mp_pin_can_transfer_data(f->ppins[1], f->ppins[0]))
if (!mp_pin_in_needs_data(f->ppins[1]))
return;
p->speed = p->cmd_speed * p->public.speed;
struct mp_aframe *input = NULL;
if (!p->input) {
struct mp_frame frame = mp_pin_out_read(f->ppins[0]);
struct mp_aframe *input = NULL;
if (frame.type == MP_FRAME_AUDIO) {
input = frame.data;
} else if (!frame.type) {
return; // no new data
} else if (frame.type != MP_FRAME_EOF) {
MP_ERR(p, "Unsupported frame type.\n");
mp_frame_unref(&frame);
@ -538,8 +555,11 @@ static void process(struct mp_filter *f)
mp_pin_in_write(f->ppins[1], MP_EOF_FRAME);
return;
}
}
if (input) {
assert(!p->input);
struct mp_swresample *s = &p->public;
int in_rate = mp_aframe_get_rate(input);
@ -549,7 +569,7 @@ static void process(struct mp_filter *f)
if (!in_rate || !in_format || !in_channels.num) {
MP_ERR(p, "Frame with invalid format unsupported\n");
mp_frame_unref(&frame);
talloc_free(input);
mp_filter_internal_mark_failed(f);
return;
}
@ -573,7 +593,8 @@ static void process(struct mp_filter *f)
if (out.type) {
mp_pin_in_write(f->ppins[1], out);
// continue filtering next time.
mp_pin_out_unread(f->ppins[0], frame);
mp_pin_out_unread(f->ppins[0],
MAKE_FRAME(MP_FRAME_AUDIO, input));
input = NULL;
}
}
@ -598,6 +619,8 @@ static void process(struct mp_filter *f)
return;
}
}
p->input = input;
}
int new_rate = rate_from_speed(p->in_rate_user, p->speed);
@ -624,32 +647,29 @@ static void process(struct mp_filter *f)
// in the resampler.
struct mp_frame out = filter_resample_output(p, NULL);
bool need_drain = !!out.type;
if (need_drain) {
if (need_drain)
mp_pin_in_write(f->ppins[1], out);
// Drain; continue filtering next time.
mp_pin_out_unread(f->ppins[0], frame);
}
// Reinitialize resampler.
configure_lavrr(p, false);
if (need_drain) {
mp_filter_internal_mark_progress(f);
// If we've written output, we must continue filtering next time.
if (need_drain)
return;
}
}
struct mp_frame out = filter_resample_output(p, input);
struct mp_frame out = filter_resample_output(p, p->input);
if (input && out.type) {
if (p->input && out.type) {
mp_pin_in_write(f->ppins[1], out);
mp_pin_out_request_data(f->ppins[0]);
} else if (!input && out.type) {
} else if (!p->input && out.type) {
mp_pin_in_write(f->ppins[1], out);
mp_pin_out_repeat_eof(f->ppins[0]);
} else if (!input) {
} else if (!p->input) {
mp_pin_in_write(f->ppins[1], MP_EOF_FRAME);
}
talloc_free(input);
if (p->input && !mp_aframe_get_size(p->input))
TA_FREEP(&p->input);
}
double mp_swresample_get_delay(struct mp_swresample *s)
@ -676,6 +696,7 @@ static void destroy(struct mp_filter *f)
struct priv *p = f->priv;
close_lavrr(p);
TA_FREEP(&p->input);
}
static const struct mp_filter_info swresample_filter = {

View File

@ -23,6 +23,7 @@ struct mp_resample_opts {
double cutoff;
int normalize;
int allow_passthrough;
double max_output_frame_size;
char **avopts;
};
@ -31,6 +32,7 @@ struct mp_resample_opts {
.cutoff = 0.0, \
.phase_shift = 10, \
.normalize = 0, \
.max_output_frame_size = 40,\
}
// Create the filter. If opts==NULL, use the global options as defaults.