From 1f9f0dc6ee19f4861d91f4a1a853bd8439e98dce Mon Sep 17 00:00:00 2001
From: Paul B Mahol <onemda@gmail.com>
Date: Sun, 18 Sep 2022 22:30:48 +0200
Subject: [PATCH] avfilter/af_afftdn: add double sample format support

---
 libavfilter/af_afftdn.c | 218 +++++++++++++++++++++++++++++++---------
 1 file changed, 170 insertions(+), 48 deletions(-)

diff --git a/libavfilter/af_afftdn.c b/libavfilter/af_afftdn.c
index cdb256fa10..2b465afcef 100644
--- a/libavfilter/af_afftdn.c
+++ b/libavfilter/af_afftdn.c
@@ -81,8 +81,8 @@ typedef struct DeNoiseChannel {
     double     *abs_var;
     double     *rel_var;
     double     *min_abs_var;
-    float      *fft_in;
-    AVComplexFloat *fft_out;
+    void       *fft_in;
+    void       *fft_out;
     AVTXContext *fft, *ifft;
     av_tx_fn   tx_fn, itx_fn;
 
@@ -105,6 +105,9 @@ typedef struct DeNoiseChannel {
 typedef struct AudioFFTDeNoiseContext {
     const AVClass *class;
 
+    int     format;
+    size_t  sample_size;
+
     float   noise_reduction;
     float   noise_floor;
     int     noise_type;
@@ -347,7 +350,6 @@ static double floor_offset(const double *S, int size, double mean)
 
 static void process_frame(AVFilterContext *ctx,
                           AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch,
-                          AVComplexFloat *fft_data,
                           double *prior, double *prior_band_excit, int track_noise)
 {
     AVFilterLink *outlink = ctx->outputs[0];
@@ -359,12 +361,22 @@ static void process_frame(AVFilterContext *ctx,
     double *band_excit = dnch->band_excit;
     double *band_amt = dnch->band_amt;
     double *smoothed_gain = dnch->smoothed_gain;
+    AVComplexDouble *fft_data_dbl = dnch->fft_out;
+    AVComplexFloat *fft_data_flt = dnch->fft_out;
     double *gain = dnch->gain;
 
     for (int i = 0; i < s->bin_count; i++) {
         double sqr_new_gain, new_gain, power, mag, mag_abs_var, new_mag_abs_var;
 
-        noisy_data[i] = mag = hypot(fft_data[i].re, fft_data[i].im);
+        switch (s->format) {
+        case AV_SAMPLE_FMT_FLTP:
+            noisy_data[i] = mag = hypot(fft_data_flt[i].re, fft_data_flt[i].im);
+            break;
+        case AV_SAMPLE_FMT_DBLP:
+            noisy_data[i] = mag = hypot(fft_data_dbl[i].re, fft_data_dbl[i].im);
+            break;
+        }
+
         power = mag * mag;
         mag_abs_var = power / abs_var[i];
         new_mag_abs_var = ratio * prior[i] + rratio * fmax(mag_abs_var - 1.0, 0.0);
@@ -446,11 +458,23 @@ static void process_frame(AVFilterContext *ctx,
         }
     }
 
-    for (int i = 0; i < s->bin_count; i++) {
-        const double new_gain = smoothed_gain[i];
+    switch (s->format) {
+    case AV_SAMPLE_FMT_FLTP:
+        for (int i = 0; i < s->bin_count; i++) {
+            const float new_gain = smoothed_gain[i];
 
-        fft_data[i].re *= new_gain;
-        fft_data[i].im *= new_gain;
+            fft_data_flt[i].re *= new_gain;
+            fft_data_flt[i].im *= new_gain;
+        }
+        break;
+    case AV_SAMPLE_FMT_DBLP:
+        for (int i = 0; i < s->bin_count; i++) {
+            const double new_gain = smoothed_gain[i];
+
+            fft_data_dbl[i].re *= new_gain;
+            fft_data_dbl[i].im *= new_gain;
+        }
+        break;
     }
 }
 
@@ -603,8 +627,29 @@ static int config_input(AVFilterLink *inlink)
 {
     AVFilterContext *ctx = inlink->dst;
     AudioFFTDeNoiseContext *s = ctx->priv;
+    size_t complex_sample_size;
     double wscale, sar, sum, sdiv;
-    int i, j, k, m, n, ret;
+    int i, j, k, m, n, ret, tx_type;
+    double dscale = 1.;
+    float fscale = 1.f;
+    void *scale;
+
+    s->format = inlink->format;
+
+    switch (s->format) {
+    case AV_SAMPLE_FMT_FLTP:
+        s->sample_size = sizeof(float);
+        complex_sample_size = sizeof(AVComplexFloat);
+        tx_type = AV_TX_FLOAT_RDFT;
+        scale = &fscale;
+        break;
+    case AV_SAMPLE_FMT_DBLP:
+        s->sample_size = sizeof(double);
+        complex_sample_size = sizeof(AVComplexDouble);
+        tx_type = AV_TX_DOUBLE_RDFT;
+        scale = &dscale;
+        break;
+    }
 
     s->dnch = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->dnch));
     if (!s->dnch)
@@ -671,7 +716,6 @@ static int config_input(AVFilterLink *inlink)
 
     for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
         DeNoiseChannel *dnch = &s->dnch[ch];
-        float scale = 1.f;
 
         switch (s->noise_type) {
         case WHITE_NOISE:
@@ -708,12 +752,12 @@ static int config_input(AVFilterLink *inlink)
         dnch->abs_var = av_calloc(s->bin_count, sizeof(*dnch->abs_var));
         dnch->rel_var = av_calloc(s->bin_count, sizeof(*dnch->rel_var));
         dnch->min_abs_var = av_calloc(s->bin_count, sizeof(*dnch->min_abs_var));
-        dnch->fft_in = av_calloc(s->fft_length2, sizeof(*dnch->fft_in));
-        dnch->fft_out = av_calloc(s->fft_length2 + 1, sizeof(*dnch->fft_out));
-        ret = av_tx_init(&dnch->fft, &dnch->tx_fn, AV_TX_FLOAT_RDFT, 0, s->fft_length2, &scale, 0);
+        dnch->fft_in = av_calloc(s->fft_length2, s->sample_size);
+        dnch->fft_out = av_calloc(s->fft_length2 + 1, complex_sample_size);
+        ret = av_tx_init(&dnch->fft, &dnch->tx_fn, tx_type, 0, s->fft_length2, scale, 0);
         if (ret < 0)
             return ret;
-        ret = av_tx_init(&dnch->ifft, &dnch->itx_fn, AV_TX_FLOAT_RDFT, 1, s->fft_length2, &scale, 0);
+        ret = av_tx_init(&dnch->ifft, &dnch->itx_fn, tx_type, 1, s->fft_length2, scale, 0);
         if (ret < 0)
             return ret;
         dnch->spread_function = av_calloc(s->number_of_bands * s->number_of_bands,
@@ -861,17 +905,33 @@ static void sample_noise_block(AudioFFTDeNoiseContext *s,
                                DeNoiseChannel *dnch,
                                AVFrame *in, int ch)
 {
-    float *src = (float *)in->extended_data[ch];
+    double *src_dbl = (double *)in->extended_data[ch];
+    float *src_flt = (float *)in->extended_data[ch];
     double mag2, var = 0.0, avr = 0.0, avi = 0.0;
+    AVComplexDouble *fft_out_dbl = dnch->fft_out;
+    AVComplexFloat *fft_out_flt = dnch->fft_out;
+    double *fft_in_dbl = dnch->fft_in;
+    float *fft_in_flt = dnch->fft_in;
     int edge, j, k, n, edgemax;
 
-    for (int i = 0; i < s->window_length; i++)
-        dnch->fft_in[i] = s->window[i] * src[i] * (1LL << 23);
+    switch (s->format) {
+    case AV_SAMPLE_FMT_FLTP:
+        for (int i = 0; i < s->window_length; i++)
+            fft_in_flt[i] = s->window[i] * src_flt[i] * (1LL << 23);
 
-    for (int i = s->window_length; i < s->fft_length2; i++)
-        dnch->fft_in[i] = 0.0;
+        for (int i = s->window_length; i < s->fft_length2; i++)
+            fft_in_flt[i] = 0.f;
+        break;
+    case AV_SAMPLE_FMT_DBLP:
+        for (int i = 0; i < s->window_length; i++)
+            fft_in_dbl[i] = s->window[i] * src_dbl[i] * (1LL << 23);
 
-    dnch->tx_fn(dnch->fft, dnch->fft_out, dnch->fft_in, sizeof(float));
+        for (int i = s->window_length; i < s->fft_length2; i++)
+            fft_in_dbl[i] = 0.;
+        break;
+    }
+
+    dnch->tx_fn(dnch->fft, dnch->fft_out, dnch->fft_in, sizeof(s->sample_size));
 
     edge = s->noise_band_edge[0];
     j = edge;
@@ -896,10 +956,21 @@ static void sample_noise_block(AudioFFTDeNoiseContext *s,
             avr = 0.0;
             avi = 0.0;
         }
-        avr += dnch->fft_out[n].re;
-        avi += dnch->fft_out[n].im;
-        mag2 = dnch->fft_out[n].re * dnch->fft_out[n].re +
-               dnch->fft_out[n].im * dnch->fft_out[n].im;
+
+        switch (s->format) {
+        case AV_SAMPLE_FMT_FLTP:
+            avr += fft_out_flt[n].re;
+            avi += fft_out_flt[n].im;
+            mag2 = fft_out_flt[n].re * fft_out_flt[n].re +
+                   fft_out_flt[n].im * fft_out_flt[n].im;
+            break;
+        case AV_SAMPLE_FMT_DBLP:
+            avr += fft_out_dbl[n].re;
+            avi += fft_out_dbl[n].im;
+            mag2 = fft_out_dbl[n].re * fft_out_dbl[n].re +
+                   fft_out_dbl[n].im * fft_out_dbl[n].im;
+            break;
+        }
 
         mag2 = fmax(mag2, s->sample_floor);
 
@@ -980,27 +1051,48 @@ static int filter_channel(AVFilterContext *ctx, void *arg, int jobnr, int nb_job
 
     for (int ch = start; ch < end; ch++) {
         DeNoiseChannel *dnch = &s->dnch[ch];
-        const float *src = (const float *)in->extended_data[ch];
+        const double *src_dbl = (const double *)in->extended_data[ch];
+        const float *src_flt = (const float *)in->extended_data[ch];
         double *dst = dnch->out_samples;
-        float *fft_in = dnch->fft_in;
+        double *fft_in_dbl = dnch->fft_in;
+        float *fft_in_flt = dnch->fft_in;
 
-        for (int m = 0; m < window_length; m++)
-            fft_in[m] = window[m] * src[m] * (1LL << 23);
+        switch (s->format) {
+        case AV_SAMPLE_FMT_FLTP:
+            for (int m = 0; m < window_length; m++)
+                fft_in_flt[m] = window[m] * src_flt[m] * (1LL << 23);
 
-        for (int m = window_length; m < s->fft_length2; m++)
-            fft_in[m] = 0;
+            for (int m = window_length; m < s->fft_length2; m++)
+                fft_in_flt[m] = 0.f;
+            break;
+        case AV_SAMPLE_FMT_DBLP:
+            for (int m = 0; m < window_length; m++)
+                fft_in_dbl[m] = window[m] * src_dbl[m] * (1LL << 23);
 
-        dnch->tx_fn(dnch->fft, dnch->fft_out, fft_in, sizeof(float));
+            for (int m = window_length; m < s->fft_length2; m++)
+                fft_in_dbl[m] = 0.;
+            break;
+        }
 
-        process_frame(ctx, s, dnch, dnch->fft_out,
+        dnch->tx_fn(dnch->fft, dnch->fft_out, dnch->fft_in, sizeof(s->sample_size));
+
+        process_frame(ctx, s, dnch,
                       dnch->prior,
                       dnch->prior_band_excit,
                       s->track_noise);
 
-        dnch->itx_fn(dnch->ifft, fft_in, dnch->fft_out, sizeof(float));
+        dnch->itx_fn(dnch->ifft, dnch->fft_in, dnch->fft_out, sizeof(s->sample_size));
 
-        for (int m = 0; m < window_length; m++)
-            dst[m] += s->window[m] * fft_in[m] / (1LL << 23);
+        switch (s->format) {
+        case AV_SAMPLE_FMT_FLTP:
+            for (int m = 0; m < window_length; m++)
+                dst[m] += s->window[m] * fft_in_flt[m] / (1LL << 23);
+            break;
+        case AV_SAMPLE_FMT_DBLP:
+            for (int m = 0; m < window_length; m++)
+                dst[m] += s->window[m] * fft_in_dbl[m] / (1LL << 23);
+            break;
+        }
     }
 
     return 0;
@@ -1016,11 +1108,14 @@ static int output_frame(AVFilterLink *inlink, AVFrame *in)
     AVFrame *out;
 
     for (int ch = 0; ch < s->channels; ch++) {
-        float *src = (float *)s->winframe->extended_data[ch];
+        uint8_t *src = (uint8_t *)s->winframe->extended_data[ch];
 
-        memmove(src, &src[s->sample_advance], offset * sizeof(float));
-        memcpy(&src[offset], in->extended_data[ch], in->nb_samples * sizeof(float));
-        memset(&src[offset + in->nb_samples], 0, (s->sample_advance - in->nb_samples) * sizeof(float));
+        memmove(src, src + s->sample_advance * s->sample_size,
+                offset * s->sample_size);
+        memcpy(src + offset * s->sample_size, in->extended_data[ch],
+               in->nb_samples * s->sample_size);
+        memset(src + s->sample_size * (offset + in->nb_samples), 0,
+               (s->sample_advance - in->nb_samples) * s->sample_size);
     }
 
     if (s->track_noise) {
@@ -1107,21 +1202,47 @@ static int output_frame(AVFilterLink *inlink, AVFrame *in)
     for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
         DeNoiseChannel *dnch = &s->dnch[ch];
         double *src = dnch->out_samples;
-        const float *orig = (const float *)s->winframe->extended_data[ch];
-        float *dst = (float *)out->extended_data[ch];
+        const double *orig_dbl = (const double *)s->winframe->extended_data[ch];
+        const float *orig_flt = (const float *)s->winframe->extended_data[ch];
+        double *dst_dbl = (double *)out->extended_data[ch];
+        float *dst_flt = (float *)out->extended_data[ch];
 
         switch (output_mode) {
         case IN_MODE:
-            for (int m = 0; m < out->nb_samples; m++)
-                dst[m] = orig[m];
+            switch (s->format) {
+            case AV_SAMPLE_FMT_FLTP:
+                for (int m = 0; m < out->nb_samples; m++)
+                    dst_flt[m] = orig_flt[m];
+                break;
+            case AV_SAMPLE_FMT_DBLP:
+                for (int m = 0; m < out->nb_samples; m++)
+                    dst_dbl[m] = orig_dbl[m];
+                break;
+            }
             break;
         case OUT_MODE:
-            for (int m = 0; m < out->nb_samples; m++)
-                dst[m] = src[m];
+            switch (s->format) {
+            case AV_SAMPLE_FMT_FLTP:
+                for (int m = 0; m < out->nb_samples; m++)
+                    dst_flt[m] = src[m];
+                break;
+            case AV_SAMPLE_FMT_DBLP:
+                for (int m = 0; m < out->nb_samples; m++)
+                    dst_dbl[m] = src[m];
+                break;
+            }
             break;
         case NOISE_MODE:
-            for (int m = 0; m < out->nb_samples; m++)
-                dst[m] = orig[m] - src[m];
+            switch (s->format) {
+            case AV_SAMPLE_FMT_FLTP:
+                for (int m = 0; m < out->nb_samples; m++)
+                    dst_flt[m] = orig_flt[m] - src[m];
+                break;
+            case AV_SAMPLE_FMT_DBLP:
+                for (int m = 0; m < out->nb_samples; m++)
+                    dst_dbl[m] = orig_dbl[m] - src[m];
+                break;
+            }
             break;
         default:
             if (in != out)
@@ -1129,6 +1250,7 @@ static int output_frame(AVFilterLink *inlink, AVFrame *in)
             av_frame_free(&out);
             return AVERROR_BUG;
         }
+
         memmove(src, src + s->sample_advance, (s->window_length - s->sample_advance) * sizeof(*src));
         memset(src + (s->window_length - s->sample_advance), 0, s->sample_advance * sizeof(*src));
     }
@@ -1251,7 +1373,7 @@ const AVFilter ff_af_afftdn = {
     .uninit          = uninit,
     FILTER_INPUTS(inputs),
     FILTER_OUTPUTS(outputs),
-    FILTER_SINGLE_SAMPLEFMT(AV_SAMPLE_FMT_FLTP),
+    FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_DBLP),
     .process_command = process_command,
     .flags           = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL |
                        AVFILTER_FLAG_SLICE_THREADS,