avcodec/fft_template: improve performance of the ff_fft_init in fft_template

Before patch:
init nbits = 17, get 10000 samples, average cost: 16175 us
After patch:
init nbits = 17, get 10000 samples, average cost: 14989 us

Reviewed-by: Michael Niedermayer <michael@niedermayer.cc>
Signed-off-by: Steven Liu <lq@chinaffmpeg.org>
This commit is contained in:
Steven Liu 2018-12-26 16:09:49 +08:00
parent 2780cd33d9
commit eb81fd792f
1 changed files with 35 additions and 11 deletions

View File

@ -261,17 +261,41 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
if (s->fft_permutation == FF_FFT_PERM_AVX) {
fft_perm_avx(s);
} else {
for(i=0; i<n; i++) {
int k;
j = i;
if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS)
j = (j&~3) | ((j>>1)&1) | ((j<<1)&2);
k = -split_radix_permutation(i, n, s->inverse) & (n-1);
if (s->revtab)
s->revtab[k] = j;
if (s->revtab32)
s->revtab32[k] = j;
}
#define PROCESS_FFT_PERM_SWAP_LSBS(num) do {\
for(i = 0; i < n; i++) {\
int k;\
j = i;\
j = (j & ~3) | ((j >> 1) & 1) | ((j << 1) & 2);\
k = -split_radix_permutation(i, n, s->inverse) & (n - 1);\
s->revtab##num[k] = j;\
} \
} while(0);
#define PROCESS_FFT_PERM_DEFAULT(num) do {\
for(i = 0; i < n; i++) {\
int k;\
j = i;\
k = -split_radix_permutation(i, n, s->inverse) & (n - 1);\
s->revtab##num[k] = j;\
} \
} while(0);
#define SPLIT_RADIX_PERMUTATION(num) do { \
if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS) {\
PROCESS_FFT_PERM_SWAP_LSBS(num) \
} else {\
PROCESS_FFT_PERM_DEFAULT(num) \
}\
} while(0);
if (s->revtab)
SPLIT_RADIX_PERMUTATION()
if (s->revtab32)
SPLIT_RADIX_PERMUTATION(32)
#undef PROCESS_FFT_PERM_DEFAULT
#undef PROCESS_FFT_PERM_SWAP_LSBS
#undef SPLIT_RADIX_PERMUTATION
}
return 0;