diff --git a/doc/filters.texi b/doc/filters.texi index b3cdff46f0..537f8fee6f 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -2307,6 +2307,13 @@ This filter accepts the following options: @table @option @item model, m Set train model file to load. This option is always required. + +@item mix +Set how much to mix filtered samples into final output. +Allowed range is from -1 to 1. Default value is 1. +Negative values are special, they set how much to keep filtered noise +in the final filter output. Set this option to -1 to hear actual +noise removed from input signal. @end table @section asetnsamples diff --git a/libavfilter/af_arnndn.c b/libavfilter/af_arnndn.c index 00a509477c..4551ce2b83 100644 --- a/libavfilter/af_arnndn.c +++ b/libavfilter/af_arnndn.c @@ -127,6 +127,7 @@ typedef struct DenoiseState { int last_period; float mem_hp_x[2]; float lastg[NB_BANDS]; + float history[FRAME_SIZE]; RNNState rnn; AVTXContext *tx, *txi; av_tx_fn tx_fn, txi_fn; @@ -136,6 +137,7 @@ typedef struct AudioRNNContext { const AVClass *class; char *model_name; + float mix; int channels; DenoiseState *st; @@ -496,12 +498,18 @@ static void frame_analysis(AudioRNNContext *s, DenoiseState *st, AVComplexFloat static void frame_synthesis(AudioRNNContext *s, DenoiseState *st, float *out, const AVComplexFloat *y) { LOCAL_ALIGNED_32(float, x, [WINDOW_SIZE]); + const float *src = st->history; + const float mix = s->mix; + const float imix = 1.f - FFMAX(mix, 0.f); inverse_transform(st, x, y); s->fdsp->vector_fmul(x, x, s->window, WINDOW_SIZE); s->fdsp->vector_fmac_scalar(x, st->synthesis_mem, 1.f, FRAME_SIZE); RNN_COPY(out, x, FRAME_SIZE); RNN_COPY(st->synthesis_mem, &x[FRAME_SIZE], FRAME_SIZE); + + for (int n = 0; n < FRAME_SIZE; n++) + out[n] = out[n] * mix + src[n] * imix; } static inline void xcorr_kernel(const float *x, const float *y, float sum[4], int len) @@ -1350,6 +1358,7 @@ static float rnnoise_channel(AudioRNNContext *s, DenoiseState *st, float *out, c float g[NB_BANDS]; float gf[FREQ_SIZE]; float vad_prob = 0; + float *history = st->history; static const float a_hp[2] = {-1.99599, 0.99600}; static const float b_hp[2] = {-2, 1}; int silence; @@ -1376,6 +1385,7 @@ static float rnnoise_channel(AudioRNNContext *s, DenoiseState *st, float *out, c } frame_synthesis(s, st, out, X); + memcpy(history, in, FRAME_SIZE * sizeof(*history)); return vad_prob; } @@ -1526,6 +1536,7 @@ static const AVFilterPad outputs[] = { static const AVOption arnndn_options[] = { { "model", "set model name", OFFSET(model_name), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, AF }, { "m", "set model name", OFFSET(model_name), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, AF }, + { "mix", "set output vs input mix", OFFSET(mix), AV_OPT_TYPE_FLOAT, {.dbl=1.0},-1, 1, AF }, { NULL } };