From 1f9f0dc6ee19f4861d91f4a1a853bd8439e98dce Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Sun, 18 Sep 2022 22:30:48 +0200 Subject: [PATCH] avfilter/af_afftdn: add double sample format support --- libavfilter/af_afftdn.c | 218 +++++++++++++++++++++++++++++++--------- 1 file changed, 170 insertions(+), 48 deletions(-) diff --git a/libavfilter/af_afftdn.c b/libavfilter/af_afftdn.c index cdb256fa10..2b465afcef 100644 --- a/libavfilter/af_afftdn.c +++ b/libavfilter/af_afftdn.c @@ -81,8 +81,8 @@ typedef struct DeNoiseChannel { double *abs_var; double *rel_var; double *min_abs_var; - float *fft_in; - AVComplexFloat *fft_out; + void *fft_in; + void *fft_out; AVTXContext *fft, *ifft; av_tx_fn tx_fn, itx_fn; @@ -105,6 +105,9 @@ typedef struct DeNoiseChannel { typedef struct AudioFFTDeNoiseContext { const AVClass *class; + int format; + size_t sample_size; + float noise_reduction; float noise_floor; int noise_type; @@ -347,7 +350,6 @@ static double floor_offset(const double *S, int size, double mean) static void process_frame(AVFilterContext *ctx, AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch, - AVComplexFloat *fft_data, double *prior, double *prior_band_excit, int track_noise) { AVFilterLink *outlink = ctx->outputs[0]; @@ -359,12 +361,22 @@ static void process_frame(AVFilterContext *ctx, double *band_excit = dnch->band_excit; double *band_amt = dnch->band_amt; double *smoothed_gain = dnch->smoothed_gain; + AVComplexDouble *fft_data_dbl = dnch->fft_out; + AVComplexFloat *fft_data_flt = dnch->fft_out; double *gain = dnch->gain; for (int i = 0; i < s->bin_count; i++) { double sqr_new_gain, new_gain, power, mag, mag_abs_var, new_mag_abs_var; - noisy_data[i] = mag = hypot(fft_data[i].re, fft_data[i].im); + switch (s->format) { + case AV_SAMPLE_FMT_FLTP: + noisy_data[i] = mag = hypot(fft_data_flt[i].re, fft_data_flt[i].im); + break; + case AV_SAMPLE_FMT_DBLP: + noisy_data[i] = mag = hypot(fft_data_dbl[i].re, fft_data_dbl[i].im); + break; + } + power = mag * mag; mag_abs_var = power / abs_var[i]; new_mag_abs_var = ratio * prior[i] + rratio * fmax(mag_abs_var - 1.0, 0.0); @@ -446,11 +458,23 @@ static void process_frame(AVFilterContext *ctx, } } - for (int i = 0; i < s->bin_count; i++) { - const double new_gain = smoothed_gain[i]; + switch (s->format) { + case AV_SAMPLE_FMT_FLTP: + for (int i = 0; i < s->bin_count; i++) { + const float new_gain = smoothed_gain[i]; - fft_data[i].re *= new_gain; - fft_data[i].im *= new_gain; + fft_data_flt[i].re *= new_gain; + fft_data_flt[i].im *= new_gain; + } + break; + case AV_SAMPLE_FMT_DBLP: + for (int i = 0; i < s->bin_count; i++) { + const double new_gain = smoothed_gain[i]; + + fft_data_dbl[i].re *= new_gain; + fft_data_dbl[i].im *= new_gain; + } + break; } } @@ -603,8 +627,29 @@ static int config_input(AVFilterLink *inlink) { AVFilterContext *ctx = inlink->dst; AudioFFTDeNoiseContext *s = ctx->priv; + size_t complex_sample_size; double wscale, sar, sum, sdiv; - int i, j, k, m, n, ret; + int i, j, k, m, n, ret, tx_type; + double dscale = 1.; + float fscale = 1.f; + void *scale; + + s->format = inlink->format; + + switch (s->format) { + case AV_SAMPLE_FMT_FLTP: + s->sample_size = sizeof(float); + complex_sample_size = sizeof(AVComplexFloat); + tx_type = AV_TX_FLOAT_RDFT; + scale = &fscale; + break; + case AV_SAMPLE_FMT_DBLP: + s->sample_size = sizeof(double); + complex_sample_size = sizeof(AVComplexDouble); + tx_type = AV_TX_DOUBLE_RDFT; + scale = &dscale; + break; + } s->dnch = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->dnch)); if (!s->dnch) @@ -671,7 +716,6 @@ static int config_input(AVFilterLink *inlink) for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { DeNoiseChannel *dnch = &s->dnch[ch]; - float scale = 1.f; switch (s->noise_type) { case WHITE_NOISE: @@ -708,12 +752,12 @@ static int config_input(AVFilterLink *inlink) dnch->abs_var = av_calloc(s->bin_count, sizeof(*dnch->abs_var)); dnch->rel_var = av_calloc(s->bin_count, sizeof(*dnch->rel_var)); dnch->min_abs_var = av_calloc(s->bin_count, sizeof(*dnch->min_abs_var)); - dnch->fft_in = av_calloc(s->fft_length2, sizeof(*dnch->fft_in)); - dnch->fft_out = av_calloc(s->fft_length2 + 1, sizeof(*dnch->fft_out)); - ret = av_tx_init(&dnch->fft, &dnch->tx_fn, AV_TX_FLOAT_RDFT, 0, s->fft_length2, &scale, 0); + dnch->fft_in = av_calloc(s->fft_length2, s->sample_size); + dnch->fft_out = av_calloc(s->fft_length2 + 1, complex_sample_size); + ret = av_tx_init(&dnch->fft, &dnch->tx_fn, tx_type, 0, s->fft_length2, scale, 0); if (ret < 0) return ret; - ret = av_tx_init(&dnch->ifft, &dnch->itx_fn, AV_TX_FLOAT_RDFT, 1, s->fft_length2, &scale, 0); + ret = av_tx_init(&dnch->ifft, &dnch->itx_fn, tx_type, 1, s->fft_length2, scale, 0); if (ret < 0) return ret; dnch->spread_function = av_calloc(s->number_of_bands * s->number_of_bands, @@ -861,17 +905,33 @@ static void sample_noise_block(AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch, AVFrame *in, int ch) { - float *src = (float *)in->extended_data[ch]; + double *src_dbl = (double *)in->extended_data[ch]; + float *src_flt = (float *)in->extended_data[ch]; double mag2, var = 0.0, avr = 0.0, avi = 0.0; + AVComplexDouble *fft_out_dbl = dnch->fft_out; + AVComplexFloat *fft_out_flt = dnch->fft_out; + double *fft_in_dbl = dnch->fft_in; + float *fft_in_flt = dnch->fft_in; int edge, j, k, n, edgemax; - for (int i = 0; i < s->window_length; i++) - dnch->fft_in[i] = s->window[i] * src[i] * (1LL << 23); + switch (s->format) { + case AV_SAMPLE_FMT_FLTP: + for (int i = 0; i < s->window_length; i++) + fft_in_flt[i] = s->window[i] * src_flt[i] * (1LL << 23); - for (int i = s->window_length; i < s->fft_length2; i++) - dnch->fft_in[i] = 0.0; + for (int i = s->window_length; i < s->fft_length2; i++) + fft_in_flt[i] = 0.f; + break; + case AV_SAMPLE_FMT_DBLP: + for (int i = 0; i < s->window_length; i++) + fft_in_dbl[i] = s->window[i] * src_dbl[i] * (1LL << 23); - dnch->tx_fn(dnch->fft, dnch->fft_out, dnch->fft_in, sizeof(float)); + for (int i = s->window_length; i < s->fft_length2; i++) + fft_in_dbl[i] = 0.; + break; + } + + dnch->tx_fn(dnch->fft, dnch->fft_out, dnch->fft_in, sizeof(s->sample_size)); edge = s->noise_band_edge[0]; j = edge; @@ -896,10 +956,21 @@ static void sample_noise_block(AudioFFTDeNoiseContext *s, avr = 0.0; avi = 0.0; } - avr += dnch->fft_out[n].re; - avi += dnch->fft_out[n].im; - mag2 = dnch->fft_out[n].re * dnch->fft_out[n].re + - dnch->fft_out[n].im * dnch->fft_out[n].im; + + switch (s->format) { + case AV_SAMPLE_FMT_FLTP: + avr += fft_out_flt[n].re; + avi += fft_out_flt[n].im; + mag2 = fft_out_flt[n].re * fft_out_flt[n].re + + fft_out_flt[n].im * fft_out_flt[n].im; + break; + case AV_SAMPLE_FMT_DBLP: + avr += fft_out_dbl[n].re; + avi += fft_out_dbl[n].im; + mag2 = fft_out_dbl[n].re * fft_out_dbl[n].re + + fft_out_dbl[n].im * fft_out_dbl[n].im; + break; + } mag2 = fmax(mag2, s->sample_floor); @@ -980,27 +1051,48 @@ static int filter_channel(AVFilterContext *ctx, void *arg, int jobnr, int nb_job for (int ch = start; ch < end; ch++) { DeNoiseChannel *dnch = &s->dnch[ch]; - const float *src = (const float *)in->extended_data[ch]; + const double *src_dbl = (const double *)in->extended_data[ch]; + const float *src_flt = (const float *)in->extended_data[ch]; double *dst = dnch->out_samples; - float *fft_in = dnch->fft_in; + double *fft_in_dbl = dnch->fft_in; + float *fft_in_flt = dnch->fft_in; - for (int m = 0; m < window_length; m++) - fft_in[m] = window[m] * src[m] * (1LL << 23); + switch (s->format) { + case AV_SAMPLE_FMT_FLTP: + for (int m = 0; m < window_length; m++) + fft_in_flt[m] = window[m] * src_flt[m] * (1LL << 23); - for (int m = window_length; m < s->fft_length2; m++) - fft_in[m] = 0; + for (int m = window_length; m < s->fft_length2; m++) + fft_in_flt[m] = 0.f; + break; + case AV_SAMPLE_FMT_DBLP: + for (int m = 0; m < window_length; m++) + fft_in_dbl[m] = window[m] * src_dbl[m] * (1LL << 23); - dnch->tx_fn(dnch->fft, dnch->fft_out, fft_in, sizeof(float)); + for (int m = window_length; m < s->fft_length2; m++) + fft_in_dbl[m] = 0.; + break; + } - process_frame(ctx, s, dnch, dnch->fft_out, + dnch->tx_fn(dnch->fft, dnch->fft_out, dnch->fft_in, sizeof(s->sample_size)); + + process_frame(ctx, s, dnch, dnch->prior, dnch->prior_band_excit, s->track_noise); - dnch->itx_fn(dnch->ifft, fft_in, dnch->fft_out, sizeof(float)); + dnch->itx_fn(dnch->ifft, dnch->fft_in, dnch->fft_out, sizeof(s->sample_size)); - for (int m = 0; m < window_length; m++) - dst[m] += s->window[m] * fft_in[m] / (1LL << 23); + switch (s->format) { + case AV_SAMPLE_FMT_FLTP: + for (int m = 0; m < window_length; m++) + dst[m] += s->window[m] * fft_in_flt[m] / (1LL << 23); + break; + case AV_SAMPLE_FMT_DBLP: + for (int m = 0; m < window_length; m++) + dst[m] += s->window[m] * fft_in_dbl[m] / (1LL << 23); + break; + } } return 0; @@ -1016,11 +1108,14 @@ static int output_frame(AVFilterLink *inlink, AVFrame *in) AVFrame *out; for (int ch = 0; ch < s->channels; ch++) { - float *src = (float *)s->winframe->extended_data[ch]; + uint8_t *src = (uint8_t *)s->winframe->extended_data[ch]; - memmove(src, &src[s->sample_advance], offset * sizeof(float)); - memcpy(&src[offset], in->extended_data[ch], in->nb_samples * sizeof(float)); - memset(&src[offset + in->nb_samples], 0, (s->sample_advance - in->nb_samples) * sizeof(float)); + memmove(src, src + s->sample_advance * s->sample_size, + offset * s->sample_size); + memcpy(src + offset * s->sample_size, in->extended_data[ch], + in->nb_samples * s->sample_size); + memset(src + s->sample_size * (offset + in->nb_samples), 0, + (s->sample_advance - in->nb_samples) * s->sample_size); } if (s->track_noise) { @@ -1107,21 +1202,47 @@ static int output_frame(AVFilterLink *inlink, AVFrame *in) for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { DeNoiseChannel *dnch = &s->dnch[ch]; double *src = dnch->out_samples; - const float *orig = (const float *)s->winframe->extended_data[ch]; - float *dst = (float *)out->extended_data[ch]; + const double *orig_dbl = (const double *)s->winframe->extended_data[ch]; + const float *orig_flt = (const float *)s->winframe->extended_data[ch]; + double *dst_dbl = (double *)out->extended_data[ch]; + float *dst_flt = (float *)out->extended_data[ch]; switch (output_mode) { case IN_MODE: - for (int m = 0; m < out->nb_samples; m++) - dst[m] = orig[m]; + switch (s->format) { + case AV_SAMPLE_FMT_FLTP: + for (int m = 0; m < out->nb_samples; m++) + dst_flt[m] = orig_flt[m]; + break; + case AV_SAMPLE_FMT_DBLP: + for (int m = 0; m < out->nb_samples; m++) + dst_dbl[m] = orig_dbl[m]; + break; + } break; case OUT_MODE: - for (int m = 0; m < out->nb_samples; m++) - dst[m] = src[m]; + switch (s->format) { + case AV_SAMPLE_FMT_FLTP: + for (int m = 0; m < out->nb_samples; m++) + dst_flt[m] = src[m]; + break; + case AV_SAMPLE_FMT_DBLP: + for (int m = 0; m < out->nb_samples; m++) + dst_dbl[m] = src[m]; + break; + } break; case NOISE_MODE: - for (int m = 0; m < out->nb_samples; m++) - dst[m] = orig[m] - src[m]; + switch (s->format) { + case AV_SAMPLE_FMT_FLTP: + for (int m = 0; m < out->nb_samples; m++) + dst_flt[m] = orig_flt[m] - src[m]; + break; + case AV_SAMPLE_FMT_DBLP: + for (int m = 0; m < out->nb_samples; m++) + dst_dbl[m] = orig_dbl[m] - src[m]; + break; + } break; default: if (in != out) @@ -1129,6 +1250,7 @@ static int output_frame(AVFilterLink *inlink, AVFrame *in) av_frame_free(&out); return AVERROR_BUG; } + memmove(src, src + s->sample_advance, (s->window_length - s->sample_advance) * sizeof(*src)); memset(src + (s->window_length - s->sample_advance), 0, s->sample_advance * sizeof(*src)); } @@ -1251,7 +1373,7 @@ const AVFilter ff_af_afftdn = { .uninit = uninit, FILTER_INPUTS(inputs), FILTER_OUTPUTS(outputs), - FILTER_SINGLE_SAMPLEFMT(AV_SAMPLE_FMT_FLTP), + FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_DBLP), .process_command = process_command, .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | AVFILTER_FLAG_SLICE_THREADS,