af_scaletempo2: fix processing of final packet

After the final input packet, the filter padded with silence to allow
one more iteration. That was not enough to process the final frames.

Continue padding the end of `input_buffer` with silence until the final
frames have been processed.

Implementation: Instead of padding when adding final samples, pad before
running WSOLA iteration. Count number of added silent frames and
remaining input frames for time keeping.
This commit is contained in:
ferreum 2023-08-13 13:10:58 +02:00 committed by Niklas Haas
parent cf8b7ff0d6
commit 8080d00d7f
3 changed files with 64 additions and 16 deletions

View File

@ -65,10 +65,13 @@ static void process(struct mp_filter *f)
int frame_size = mp_aframe_get_size(p->pending);
uint8_t **planes = mp_aframe_get_data_ro(p->pending);
int read = mp_scaletempo2_fill_input_buffer(&p->data,
planes, frame_size, final, p->speed);
planes, frame_size, p->speed);
mp_aframe_skip_samples(p->pending, read);
}
p->sent_final |= final;
if (final && p->pending && !p->sent_final) {
mp_scaletempo2_set_final(&p->data);
p->sent_final = true;
}
if (mp_scaletempo2_frames_available(&p->data, p->speed)) {
if (eof) {
@ -80,11 +83,8 @@ static void process(struct mp_filter *f)
if (eof) {
mp_pin_in_write(f->ppins[1], MP_EOF_FRAME);
return;
} else if (format_change) {
// go on with proper reinit on the next iteration
p->initialized = false;
p->sent_final = false;
}
// for format change go on with proper reinit on the next iteration
}
}

View File

@ -421,6 +421,9 @@ static void seek_buffer(struct mp_scaletempo2 *p, int frames)
{
assert(p->input_buffer_frames >= frames);
p->input_buffer_frames -= frames;
if (p->input_buffer_final_frames > 0) {
p->input_buffer_final_frames = MPMAX(0, p->input_buffer_final_frames - frames);
}
for (int i = 0; i < p->channels; ++i) {
memmove(p->input_buffer[i], p->input_buffer[i] + frames,
p->input_buffer_frames * sizeof(float));
@ -483,27 +486,53 @@ static void resize_input_buffer(struct mp_scaletempo2 *p, int size)
p->input_buffer = realloc_2d(p->input_buffer, p->channels, size);
}
// pad end with silence until a wsola iteration can be performed
static void add_input_buffer_final_silence(struct mp_scaletempo2 *p, double playback_rate)
{
int needed = frames_needed(p, playback_rate);
if (needed <= 0)
return; // no silence needed for iteration
int required_size = needed + p->input_buffer_frames;
if (required_size > p->input_buffer_size)
resize_input_buffer(p, required_size);
for (int i = 0; i < p->channels; ++i) {
float *ch_input = p->input_buffer[i];
for (int j = 0; j < needed; ++j) {
ch_input[p->input_buffer_frames + j] = 0.0f;
}
}
p->input_buffer_added_silence += needed;
p->input_buffer_frames += needed;
}
void mp_scaletempo2_set_final(struct mp_scaletempo2 *p)
{
if (p->input_buffer_final_frames <= 0) {
p->input_buffer_final_frames = p->input_buffer_frames;
}
}
int mp_scaletempo2_fill_input_buffer(struct mp_scaletempo2 *p,
uint8_t **planes, int frame_size, bool final, double playback_rate)
uint8_t **planes, int frame_size, double playback_rate)
{
int needed = frames_needed(p, playback_rate);
int read = MPMIN(needed, frame_size);
int total_fill = final ? needed : read;
if (total_fill == 0) return 0;
if (read == 0)
return 0;
int required_size = total_fill + p->input_buffer_frames;
int required_size = read + p->input_buffer_frames;
if (required_size > p->input_buffer_size)
resize_input_buffer(p, required_size);
for (int i = 0; i < p->channels; ++i) {
memcpy(p->input_buffer[i] + p->input_buffer_frames,
planes[i], read * sizeof(float));
for (int j = read; j < total_fill; ++j) {
p->input_buffer[i][p->input_buffer_frames + j] = 0.0f;
}
}
p->input_buffer_frames += total_fill;
p->input_buffer_frames += read;
return read;
}
@ -669,6 +698,10 @@ int mp_scaletempo2_fill_buffer(struct mp_scaletempo2 *p,
{
if (playback_rate == 0) return 0;
if (p->input_buffer_final_frames > 0) {
add_input_buffer_final_silence(p, playback_rate);
}
// Optimize the muted case to issue a single clear instead of performing
// the full crossfade and clearing each crossfaded frame.
if (playback_rate < p->opts->min_playback_rate
@ -726,12 +759,15 @@ int mp_scaletempo2_fill_buffer(struct mp_scaletempo2 *p,
double mp_scaletempo2_get_latency(struct mp_scaletempo2 *p, double playback_rate)
{
return p->input_buffer_frames - p->output_time
- p->input_buffer_added_silence
+ p->num_complete_frames * playback_rate;
}
bool mp_scaletempo2_frames_available(struct mp_scaletempo2 *p, double playback_rate)
{
return can_perform_wsola(p, playback_rate) || p->num_complete_frames > 0;
return p->input_buffer_final_frames > p->target_block_index
|| can_perform_wsola(p, playback_rate)
|| p->num_complete_frames > 0;
}
void mp_scaletempo2_destroy(struct mp_scaletempo2 *p)
@ -749,6 +785,8 @@ void mp_scaletempo2_destroy(struct mp_scaletempo2 *p)
void mp_scaletempo2_reset(struct mp_scaletempo2 *p)
{
p->input_buffer_frames = 0;
p->input_buffer_final_frames = 0;
p->input_buffer_added_silence = 0;
p->output_time = 0.0;
p->search_block_index = 0;
p->target_block_index = 0;
@ -827,6 +865,8 @@ void mp_scaletempo2_init(struct mp_scaletempo2 *p, int channels, int rate)
resize_input_buffer(p, 4 * MPMAX(p->ola_window_size, p->search_block_size));
p->input_buffer_frames = 0;
p->input_buffer_final_frames = 0;
p->input_buffer_added_silence = 0;
p->energy_candidate_blocks = realloc(p->energy_candidate_blocks,
sizeof(float) * p->channels * p->num_candidate_blocks);

View File

@ -112,6 +112,13 @@ struct mp_scaletempo2 {
float **input_buffer;
int input_buffer_size;
int input_buffer_frames;
// How many frames in |input_buffer| need to be flushed by padding with
// silence to process the final packet. While this is nonzero, the filter
// appends silence to |input_buffer| until these frames are processed.
int input_buffer_final_frames;
// How many additional frames of silence have been added to |input_buffer|
// for padding after the final packet.
int input_buffer_added_silence;
float *energy_candidate_blocks;
};
@ -120,7 +127,8 @@ void mp_scaletempo2_reset(struct mp_scaletempo2 *p);
void mp_scaletempo2_init(struct mp_scaletempo2 *p, int channels, int rate);
double mp_scaletempo2_get_latency(struct mp_scaletempo2 *p, double playback_rate);
int mp_scaletempo2_fill_input_buffer(struct mp_scaletempo2 *p,
uint8_t **planes, int frame_size, bool final, double playback_rate);
uint8_t **planes, int frame_size, double playback_rate);
void mp_scaletempo2_set_final(struct mp_scaletempo2 *p);
int mp_scaletempo2_fill_buffer(struct mp_scaletempo2 *p,
float **dest, int dest_size, double playback_rate);
bool mp_scaletempo2_frames_available(struct mp_scaletempo2 *p, double playback_rate);