From 456d48c752fb960508b373afa3f56d389c22a8e4 Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Mon, 28 Feb 2022 09:19:42 +0100 Subject: [PATCH] avfilter/af_dynaudnorm: add support for overlapping frames --- doc/filters.texi | 7 ++++++ libavfilter/af_dynaudnorm.c | 44 ++++++++++++++++++++++++++++++++----- 2 files changed, 46 insertions(+), 5 deletions(-) diff --git a/doc/filters.texi b/doc/filters.texi index b3ae139613..c3623dee89 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -4392,6 +4392,13 @@ This option is mostly useful if digital noise is not wanted to be amplified. @item channels, h Specify which channels to filter, by default all available channels are filtered. + +@item overlap, o +Specify overlap for frames. If set to 0 (default) no frame overlapping is done. +Using >0 and <1 values will make less conservative gain adjustments, like +when framelen option is set to smaller value, if framelen option value is +compensated for non-zero overlap then gain adjustments will be smoother across time +compared to zero overlap case. @end table @subsection Commands diff --git a/libavfilter/af_dynaudnorm.c b/libavfilter/af_dynaudnorm.c index 77899474bf..2adbcf3e10 100644 --- a/libavfilter/af_dynaudnorm.c +++ b/libavfilter/af_dynaudnorm.c @@ -64,6 +64,7 @@ typedef struct DynamicAudioNormalizerContext { int dc_correction; int channels_coupled; int alt_boundary_mode; + double overlap; double peak_value; double max_amplification; @@ -76,6 +77,7 @@ typedef struct DynamicAudioNormalizerContext { double *weights; int channels; + int sample_advance; int eof; uint64_t channels_to_filter; int64_t pts; @@ -86,6 +88,8 @@ typedef struct DynamicAudioNormalizerContext { cqueue **threshold_history; cqueue *is_enabled; + + AVFrame *window; } DynamicAudioNormalizerContext; #define OFFSET(x) offsetof(DynamicAudioNormalizerContext, x) @@ -114,6 +118,8 @@ static const AVOption dynaudnorm_options[] = { { "t", "set the threshold value", OFFSET(threshold), AV_OPT_TYPE_DOUBLE, {.dbl = 0.0}, 0.0, 1.0, FLAGS }, { "channels", "set channels to filter", OFFSET(channels_to_filter),AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=-1}, INT64_MIN, INT64_MAX, FLAGS }, { "h", "set channels to filter", OFFSET(channels_to_filter),AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=-1}, INT64_MIN, INT64_MAX, FLAGS }, + { "overlap", "set the frame overlap", OFFSET(overlap), AV_OPT_TYPE_DOUBLE, {.dbl=.0}, 0.0, 1.0, FLAGS }, + { "o", "set the frame overlap", OFFSET(overlap), AV_OPT_TYPE_DOUBLE, {.dbl=.0}, 0.0, 1.0, FLAGS }, { NULL } }; @@ -295,6 +301,8 @@ static av_cold void uninit(AVFilterContext *ctx) av_freep(&s->weights); ff_bufqueue_discard_all(&s->queue); + + av_frame_free(&s->window); } static int config_input(AVFilterLink *inlink) @@ -340,6 +348,11 @@ static int config_input(AVFilterLink *inlink) init_gaussian_filter(s); + s->window = ff_get_audio_buffer(ctx->outputs[0], s->frame_len * 2); + if (!s->window) + return AVERROR(ENOMEM); + s->sample_advance = FFMAX(1, lrint(s->frame_len * (1. - s->overlap))); + return 0; } @@ -649,6 +662,8 @@ static void perform_compression(DynamicAudioNormalizerContext *s, AVFrame *frame static int analyze_frame(DynamicAudioNormalizerContext *s, AVFilterLink *outlink, AVFrame **frame) { + AVFrame *analyze_frame; + if (s->dc_correction || s->compress_factor > DBL_EPSILON) { int ret; @@ -683,8 +698,26 @@ static int analyze_frame(DynamicAudioNormalizerContext *s, AVFilterLink *outlink if (s->compress_factor > DBL_EPSILON) perform_compression(s, *frame); + if (s->frame_len != s->sample_advance) { + const int offset = s->frame_len - s->sample_advance; + + for (int c = 0; c < s->channels; c++) { + double *src = (double *)s->window->extended_data[c]; + + memmove(src, &src[s->sample_advance], offset * sizeof(double)); + memcpy(&src[offset], (*frame)->extended_data[c], (*frame)->nb_samples * sizeof(double)); + memset(&src[offset + (*frame)->nb_samples], 0, (s->sample_advance - (*frame)->nb_samples) * sizeof(double)); + } + + analyze_frame = s->window; + } else { + av_samples_copy(s->window->extended_data, (*frame)->extended_data, 0, 0, + s->frame_len, (*frame)->channels, (*frame)->format); + analyze_frame = *frame; + } + if (s->channels_coupled) { - const local_gain gain = get_max_local_gain(s, *frame, -1); + const local_gain gain = get_max_local_gain(s, analyze_frame, -1); int c; for (c = 0; c < s->channels; c++) @@ -693,7 +726,7 @@ static int analyze_frame(DynamicAudioNormalizerContext *s, AVFilterLink *outlink int c; for (c = 0; c < s->channels; c++) - update_gain_history(s, c, get_max_local_gain(s, *frame, c)); + update_gain_history(s, c, get_max_local_gain(s, analyze_frame, c)); } return 0; @@ -777,7 +810,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) static int flush_buffer(DynamicAudioNormalizerContext *s, AVFilterLink *inlink, AVFilterLink *outlink) { - AVFrame *out = ff_get_audio_buffer(outlink, s->frame_len); + AVFrame *out = ff_get_audio_buffer(outlink, s->sample_advance); int c, i; if (!out) @@ -830,7 +863,7 @@ static int activate(AVFilterContext *ctx) FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink); if (!s->eof) { - ret = ff_inlink_consume_samples(inlink, s->frame_len, s->frame_len, &in); + ret = ff_inlink_consume_samples(inlink, s->sample_advance, s->sample_advance, &in); if (ret < 0) return ret; if (ret > 0) { @@ -839,7 +872,7 @@ static int activate(AVFilterContext *ctx) return ret; } - if (ff_inlink_check_available_samples(inlink, s->frame_len) > 0) { + if (ff_inlink_check_available_samples(inlink, s->sample_advance) > 0) { ff_filter_set_ready(ctx, 10); return 0; } @@ -888,6 +921,7 @@ static int process_command(AVFilterContext *ctx, const char *cmd, const char *ar } s->frame_len = frame_size(inlink->sample_rate, s->frame_len_msec); + s->sample_advance = FFMAX(1, lrint(s->frame_len * (1. - s->overlap))); return 0; }