avfilter/af_atempo: switch to rdft from lavu/tx

This commit is contained in:
Paul B Mahol 2022-02-06 12:22:40 +01:00
parent e8f439631f
commit 800dfd2eac
2 changed files with 64 additions and 65 deletions

3
configure vendored
View File

@ -3610,8 +3610,6 @@ amovie_filter_deps="avcodec avformat"
aresample_filter_deps="swresample" aresample_filter_deps="swresample"
asr_filter_deps="pocketsphinx" asr_filter_deps="pocketsphinx"
ass_filter_deps="libass" ass_filter_deps="libass"
atempo_filter_deps="avcodec"
atempo_filter_select="rdft"
avgblur_opencl_filter_deps="opencl" avgblur_opencl_filter_deps="opencl"
avgblur_vulkan_filter_deps="vulkan spirv_compiler" avgblur_vulkan_filter_deps="vulkan spirv_compiler"
azmq_filter_deps="libzmq" azmq_filter_deps="libzmq"
@ -7386,7 +7384,6 @@ enabled zlib && add_cppflags -DZLIB_CONST
# conditional library dependencies, in any order # conditional library dependencies, in any order
enabled amovie_filter && prepend avfilter_deps "avformat avcodec" enabled amovie_filter && prepend avfilter_deps "avformat avcodec"
enabled aresample_filter && prepend avfilter_deps "swresample" enabled aresample_filter && prepend avfilter_deps "swresample"
enabled atempo_filter && prepend avfilter_deps "avcodec"
enabled bm3d_filter && prepend avfilter_deps "avcodec" enabled bm3d_filter && prepend avfilter_deps "avcodec"
enabled cover_rect_filter && prepend avfilter_deps "avformat avcodec" enabled cover_rect_filter && prepend avfilter_deps "avformat avcodec"
enabled ebur128_filter && enabled swresample && prepend avfilter_deps "swresample" enabled ebur128_filter && enabled swresample && prepend avfilter_deps "swresample"

View File

@ -39,13 +39,13 @@
*/ */
#include <float.h> #include <float.h>
#include "libavcodec/avfft.h"
#include "libavutil/avassert.h" #include "libavutil/avassert.h"
#include "libavutil/avstring.h" #include "libavutil/avstring.h"
#include "libavutil/channel_layout.h" #include "libavutil/channel_layout.h"
#include "libavutil/eval.h" #include "libavutil/eval.h"
#include "libavutil/opt.h" #include "libavutil/opt.h"
#include "libavutil/samplefmt.h" #include "libavutil/samplefmt.h"
#include "libavutil/tx.h"
#include "avfilter.h" #include "avfilter.h"
#include "audio.h" #include "audio.h"
#include "internal.h" #include "internal.h"
@ -67,7 +67,8 @@ typedef struct AudioFragment {
// rDFT transform of the down-mixed mono fragment, used for // rDFT transform of the down-mixed mono fragment, used for
// fast waveform alignment via correlation in frequency domain: // fast waveform alignment via correlation in frequency domain:
FFTSample *xdat; float *xdat_in;
float *xdat;
} AudioFragment; } AudioFragment;
/** /**
@ -140,9 +141,11 @@ typedef struct ATempoContext {
FilterState state; FilterState state;
// for fast correlation calculation in frequency domain: // for fast correlation calculation in frequency domain:
RDFTContext *real_to_complex; AVTXContext *real_to_complex;
RDFTContext *complex_to_real; AVTXContext *complex_to_real;
FFTSample *correlation; av_tx_fn r2c_fn, c2r_fn;
float *correlation_in;
float *correlation;
// for managing AVFilterPad.request_frame and AVFilterPad.filter_frame // for managing AVFilterPad.request_frame and AVFilterPad.filter_frame
AVFrame *dst_buffer; AVFrame *dst_buffer;
@ -228,18 +231,18 @@ static void yae_release_buffers(ATempoContext *atempo)
av_freep(&atempo->frag[0].data); av_freep(&atempo->frag[0].data);
av_freep(&atempo->frag[1].data); av_freep(&atempo->frag[1].data);
av_freep(&atempo->frag[0].xdat_in);
av_freep(&atempo->frag[1].xdat_in);
av_freep(&atempo->frag[0].xdat); av_freep(&atempo->frag[0].xdat);
av_freep(&atempo->frag[1].xdat); av_freep(&atempo->frag[1].xdat);
av_freep(&atempo->buffer); av_freep(&atempo->buffer);
av_freep(&atempo->hann); av_freep(&atempo->hann);
av_freep(&atempo->correlation_in);
av_freep(&atempo->correlation); av_freep(&atempo->correlation);
av_rdft_end(atempo->real_to_complex); av_tx_uninit(&atempo->real_to_complex);
atempo->real_to_complex = NULL; av_tx_uninit(&atempo->complex_to_real);
av_rdft_end(atempo->complex_to_real);
atempo->complex_to_real = NULL;
} }
/* av_realloc is not aligned enough; fortunately, the data does not need to /* av_realloc is not aligned enough; fortunately, the data does not need to
@ -247,7 +250,7 @@ static void yae_release_buffers(ATempoContext *atempo)
#define RE_MALLOC_OR_FAIL(field, field_size) \ #define RE_MALLOC_OR_FAIL(field, field_size) \
do { \ do { \
av_freep(&field); \ av_freep(&field); \
field = av_malloc(field_size); \ field = av_calloc(field_size, 1); \
if (!field) { \ if (!field) { \
yae_release_buffers(atempo); \ yae_release_buffers(atempo); \
return AVERROR(ENOMEM); \ return AVERROR(ENOMEM); \
@ -265,6 +268,7 @@ static int yae_reset(ATempoContext *atempo,
{ {
const int sample_size = av_get_bytes_per_sample(format); const int sample_size = av_get_bytes_per_sample(format);
uint32_t nlevels = 0; uint32_t nlevels = 0;
float scale = 1.f, iscale = 1.f;
uint32_t pot; uint32_t pot;
int i; int i;
@ -288,29 +292,29 @@ static int yae_reset(ATempoContext *atempo,
// initialize audio fragment buffers: // initialize audio fragment buffers:
RE_MALLOC_OR_FAIL(atempo->frag[0].data, atempo->window * atempo->stride); RE_MALLOC_OR_FAIL(atempo->frag[0].data, atempo->window * atempo->stride);
RE_MALLOC_OR_FAIL(atempo->frag[1].data, atempo->window * atempo->stride); RE_MALLOC_OR_FAIL(atempo->frag[1].data, atempo->window * atempo->stride);
RE_MALLOC_OR_FAIL(atempo->frag[0].xdat, atempo->window * sizeof(FFTComplex)); RE_MALLOC_OR_FAIL(atempo->frag[0].xdat_in, (atempo->window + 1) * sizeof(AVComplexFloat));
RE_MALLOC_OR_FAIL(atempo->frag[1].xdat, atempo->window * sizeof(FFTComplex)); RE_MALLOC_OR_FAIL(atempo->frag[1].xdat_in, (atempo->window + 1) * sizeof(AVComplexFloat));
RE_MALLOC_OR_FAIL(atempo->frag[0].xdat, (atempo->window + 1) * sizeof(AVComplexFloat));
RE_MALLOC_OR_FAIL(atempo->frag[1].xdat, (atempo->window + 1) * sizeof(AVComplexFloat));
// initialize rDFT contexts: // initialize rDFT contexts:
av_rdft_end(atempo->real_to_complex); av_tx_uninit(&atempo->real_to_complex);
atempo->real_to_complex = NULL; av_tx_uninit(&atempo->complex_to_real);
av_rdft_end(atempo->complex_to_real); av_tx_init(&atempo->real_to_complex, &atempo->r2c_fn, AV_TX_FLOAT_RDFT, 0, 1 << (nlevels + 1), &scale, 0);
atempo->complex_to_real = NULL;
atempo->real_to_complex = av_rdft_init(nlevels + 1, DFT_R2C);
if (!atempo->real_to_complex) { if (!atempo->real_to_complex) {
yae_release_buffers(atempo); yae_release_buffers(atempo);
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
} }
atempo->complex_to_real = av_rdft_init(nlevels + 1, IDFT_C2R); av_tx_init(&atempo->complex_to_real, &atempo->c2r_fn, AV_TX_FLOAT_RDFT, 1, 1 << (nlevels + 1), &iscale, 0);
if (!atempo->complex_to_real) { if (!atempo->complex_to_real) {
yae_release_buffers(atempo); yae_release_buffers(atempo);
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
} }
RE_MALLOC_OR_FAIL(atempo->correlation, atempo->window * sizeof(FFTComplex)); RE_MALLOC_OR_FAIL(atempo->correlation_in, (atempo->window + 1) * sizeof(AVComplexFloat));
RE_MALLOC_OR_FAIL(atempo->correlation, atempo->window * sizeof(AVComplexFloat));
atempo->ring = atempo->window * 3; atempo->ring = atempo->window * 3;
RE_MALLOC_OR_FAIL(atempo->buffer, atempo->ring * atempo->stride); RE_MALLOC_OR_FAIL(atempo->buffer, atempo->ring * atempo->stride);
@ -348,7 +352,7 @@ static int yae_update(AVFilterContext *ctx)
const uint8_t *src_end = src + \ const uint8_t *src_end = src + \
frag->nsamples * atempo->channels * sizeof(scalar_type); \ frag->nsamples * atempo->channels * sizeof(scalar_type); \
\ \
FFTSample *xdat = frag->xdat; \ float *xdat = frag->xdat_in; \
scalar_type tmp; \ scalar_type tmp; \
\ \
if (atempo->channels == 1) { \ if (atempo->channels == 1) { \
@ -356,27 +360,27 @@ static int yae_update(AVFilterContext *ctx)
tmp = *(const scalar_type *)src; \ tmp = *(const scalar_type *)src; \
src += sizeof(scalar_type); \ src += sizeof(scalar_type); \
\ \
*xdat = (FFTSample)tmp; \ *xdat = (float)tmp; \
} \ } \
} else { \ } else { \
FFTSample s, max, ti, si; \ float s, max, ti, si; \
int i; \ int i; \
\ \
for (; src < src_end; xdat++) { \ for (; src < src_end; xdat++) { \
tmp = *(const scalar_type *)src; \ tmp = *(const scalar_type *)src; \
src += sizeof(scalar_type); \ src += sizeof(scalar_type); \
\ \
max = (FFTSample)tmp; \ max = (float)tmp; \
s = FFMIN((FFTSample)scalar_max, \ s = FFMIN((float)scalar_max, \
(FFTSample)fabsf(max)); \ (float)fabsf(max)); \
\ \
for (i = 1; i < atempo->channels; i++) { \ for (i = 1; i < atempo->channels; i++) { \
tmp = *(const scalar_type *)src; \ tmp = *(const scalar_type *)src; \
src += sizeof(scalar_type); \ src += sizeof(scalar_type); \
\ \
ti = (FFTSample)tmp; \ ti = (float)tmp; \
si = FFMIN((FFTSample)scalar_max, \ si = FFMIN((float)scalar_max, \
(FFTSample)fabsf(ti)); \ (float)fabsf(ti)); \
\ \
if (s < si) { \ if (s < si) { \
s = si; \ s = si; \
@ -399,7 +403,7 @@ static void yae_downmix(ATempoContext *atempo, AudioFragment *frag)
const uint8_t *src = frag->data; const uint8_t *src = frag->data;
// init complex data buffer used for FFT and Correlation: // init complex data buffer used for FFT and Correlation:
memset(frag->xdat, 0, sizeof(FFTComplex) * atempo->window); memset(frag->xdat_in, 0, sizeof(AVComplexFloat) * (atempo->window + 1));
if (atempo->format == AV_SAMPLE_FMT_U8) { if (atempo->format == AV_SAMPLE_FMT_U8) {
yae_init_xdat(uint8_t, 127); yae_init_xdat(uint8_t, 127);
@ -598,32 +602,24 @@ static void yae_advance_to_next_frag(ATempoContext *atempo)
* Multiply two vectors of complex numbers (result of real_to_complex rDFT) * Multiply two vectors of complex numbers (result of real_to_complex rDFT)
* and transform back via complex_to_real rDFT. * and transform back via complex_to_real rDFT.
*/ */
static void yae_xcorr_via_rdft(FFTSample *xcorr, static void yae_xcorr_via_rdft(float *xcorr_in,
RDFTContext *complex_to_real, float *xcorr,
const FFTComplex *xa, AVTXContext *complex_to_real,
const FFTComplex *xb, av_tx_fn c2r_fn,
const AVComplexFloat *xa,
const AVComplexFloat *xb,
const int window) const int window)
{ {
FFTComplex *xc = (FFTComplex *)xcorr; AVComplexFloat *xc = (AVComplexFloat *)xcorr_in;
int i; int i;
// NOTE: first element requires special care -- Given Y = rDFT(X), for (i = 0; i <= window; i++, xa++, xb++, xc++) {
// Im(Y[0]) and Im(Y[N/2]) are always zero, therefore av_rdft_calc
// stores Re(Y[N/2]) in place of Im(Y[0]).
xc->re = xa->re * xb->re;
xc->im = xa->im * xb->im;
xa++;
xb++;
xc++;
for (i = 1; i < window; i++, xa++, xb++, xc++) {
xc->re = (xa->re * xb->re + xa->im * xb->im); xc->re = (xa->re * xb->re + xa->im * xb->im);
xc->im = (xa->im * xb->re - xa->re * xb->im); xc->im = (xa->im * xb->re - xa->re * xb->im);
} }
// apply inverse rDFT: // apply inverse rDFT:
av_rdft_calc(complex_to_real, xcorr); c2r_fn(complex_to_real, xcorr, xcorr_in, sizeof(float));
} }
/** /**
@ -637,21 +633,25 @@ static int yae_align(AudioFragment *frag,
const int window, const int window,
const int delta_max, const int delta_max,
const int drift, const int drift,
FFTSample *correlation, float *correlation_in,
RDFTContext *complex_to_real) float *correlation,
AVTXContext *complex_to_real,
av_tx_fn c2r_fn)
{ {
int best_offset = -drift; int best_offset = -drift;
FFTSample best_metric = -FLT_MAX; float best_metric = -FLT_MAX;
FFTSample *xcorr; float *xcorr;
int i0; int i0;
int i1; int i1;
int i; int i;
yae_xcorr_via_rdft(correlation, yae_xcorr_via_rdft(correlation_in,
correlation,
complex_to_real, complex_to_real,
(const FFTComplex *)prev->xdat, c2r_fn,
(const FFTComplex *)frag->xdat, (const AVComplexFloat *)prev->xdat,
(const AVComplexFloat *)frag->xdat,
window); window);
// identify search window boundaries: // identify search window boundaries:
@ -665,11 +665,11 @@ static int yae_align(AudioFragment *frag,
xcorr = correlation + i0; xcorr = correlation + i0;
for (i = i0; i < i1; i++, xcorr++) { for (i = i0; i < i1; i++, xcorr++) {
FFTSample metric = *xcorr; float metric = *xcorr;
// normalize: // normalize:
FFTSample drifti = (FFTSample)(drift + i); float drifti = (float)(drift + i);
metric *= drifti * (FFTSample)(i - i0) * (FFTSample)(i1 - i); metric *= drifti * (float)(i - i0) * (float)(i1 - i);
if (metric > best_metric) { if (metric > best_metric) {
best_metric = metric; best_metric = metric;
@ -706,8 +706,10 @@ static int yae_adjust_position(ATempoContext *atempo)
atempo->window, atempo->window,
delta_max, delta_max,
drift, drift,
atempo->correlation_in,
atempo->correlation, atempo->correlation,
atempo->complex_to_real); atempo->complex_to_real,
atempo->c2r_fn);
if (correction) { if (correction) {
// adjust fragment position: // adjust fragment position:
@ -833,7 +835,7 @@ yae_apply(ATempoContext *atempo,
yae_downmix(atempo, yae_curr_frag(atempo)); yae_downmix(atempo, yae_curr_frag(atempo));
// apply rDFT: // apply rDFT:
av_rdft_calc(atempo->real_to_complex, yae_curr_frag(atempo)->xdat); atempo->r2c_fn(atempo->real_to_complex, yae_curr_frag(atempo)->xdat, yae_curr_frag(atempo)->xdat_in, sizeof(float));
// must load the second fragment before alignment can start: // must load the second fragment before alignment can start:
if (!atempo->nfrag) { if (!atempo->nfrag) {
@ -865,7 +867,7 @@ yae_apply(ATempoContext *atempo,
yae_downmix(atempo, yae_curr_frag(atempo)); yae_downmix(atempo, yae_curr_frag(atempo));
// apply rDFT: // apply rDFT:
av_rdft_calc(atempo->real_to_complex, yae_curr_frag(atempo)->xdat); atempo->r2c_fn(atempo->real_to_complex, yae_curr_frag(atempo)->xdat, yae_curr_frag(atempo)->xdat_in, sizeof(float));
atempo->state = YAE_OUTPUT_OVERLAP_ADD; atempo->state = YAE_OUTPUT_OVERLAP_ADD;
} }
@ -929,7 +931,7 @@ static int yae_flush(ATempoContext *atempo,
yae_downmix(atempo, frag); yae_downmix(atempo, frag);
// apply rDFT: // apply rDFT:
av_rdft_calc(atempo->real_to_complex, frag->xdat); atempo->r2c_fn(atempo->real_to_complex, frag->xdat, frag->xdat_in, sizeof(float));
// align current fragment to previous fragment: // align current fragment to previous fragment:
if (yae_adjust_position(atempo)) { if (yae_adjust_position(atempo)) {