diff --git a/libavcodec/Makefile b/libavcodec/Makefile index b440a00746..59029a853c 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -122,8 +122,7 @@ OBJS-$(CONFIG_QSV) += qsv.o OBJS-$(CONFIG_QSVDEC) += qsvdec.o OBJS-$(CONFIG_QSVENC) += qsvenc.o OBJS-$(CONFIG_RANGECODER) += rangecoder.o -RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o -OBJS-$(CONFIG_RDFT) += rdft.o $(RDFT-OBJS-yes) +OBJS-$(CONFIG_RDFT) += rdft.o OBJS-$(CONFIG_RV34DSP) += rv34dsp.o OBJS-$(CONFIG_SHARED) += log2_tab.o reverse.o OBJS-$(CONFIG_SINEWIN) += sinewin.o sinewin_fixed.o diff --git a/libavcodec/arm/rdft_neon.S b/libavcodec/arm/rdft_neon.S index 781d976354..eabb92b4bd 100644 --- a/libavcodec/arm/rdft_neon.S +++ b/libavcodec/arm/rdft_neon.S @@ -30,18 +30,21 @@ function ff_rdft_calc_neon, export=1 lsls r6, r6, #31 bne 1f - add r0, r4, #20 + add r0, r4, #24 bl X(ff_fft_permute_neon) - add r0, r4, #20 + add r0, r4, #24 mov r1, r5 bl X(ff_fft_calc_neon) 1: ldr r12, [r4, #0] @ nbits mov r2, #1 + ldr r8, [r4, #20] @ negative_sin lsl r12, r2, r12 add r0, r5, #8 + lsl r8, r8, #31 add r1, r5, r12, lsl #2 lsr r12, r12, #2 + vdup.32 d26, r8 ldr r2, [r4, #12] @ tcos sub r12, r12, #2 ldr r3, [r4, #16] @ tsin @@ -55,6 +58,7 @@ function ff_rdft_calc_neon, export=1 vld1.32 {d5}, [r3,:64]! @ tsin[i] vmov.f32 d18, #0.5 @ k1 vdup.32 d19, r6 + veor d5, d26, d5 pld [r0, #32] veor d19, d18, d19 @ k2 vmov.i32 d16, #0 @@ -90,6 +94,7 @@ function ff_rdft_calc_neon, export=1 vld1.32 {d5}, [r3,:64]! @ tsin[i] veor d24, d22, d17 @ ev.re,-ev.im vrev64.32 d3, d23 @ od.re, od.im + veor d5, d26, d5 pld [r2, #32] veor d2, d3, d16 @ -od.re, od.im pld [r3, #32] @@ -140,10 +145,10 @@ function ff_rdft_calc_neon, export=1 vmul.f32 d22, d22, d18 vst1.32 {d22}, [r5,:64] - add r0, r4, #20 + add r0, r4, #24 mov r1, r5 bl X(ff_fft_permute_neon) - add r0, r4, #20 + add r0, r4, #24 mov r1, r5 pop {r4-r8,lr} b X(ff_fft_calc_neon) diff --git a/libavcodec/rdft.c b/libavcodec/rdft.c index c318aa8394..194e0bc4ee 100644 --- a/libavcodec/rdft.c +++ b/libavcodec/rdft.c @@ -28,28 +28,6 @@ * (Inverse) Real Discrete Fourier Transforms. */ -/* sin(2*pi*x/n) for 0<=x>2); i++) { - i1 = 2*i; - i2 = n-i1; - /* Separate even and odd FFTs */ - ev.re = k1*(data[i1 ]+data[i2 ]); - od.im = -k2*(data[i1 ]-data[i2 ]); - ev.im = k1*(data[i1+1]-data[i2+1]); - od.re = k2*(data[i1+1]+data[i2+1]); - /* Apply twiddle factors to the odd FFT and add to the even FFT */ - data[i1 ] = ev.re + od.re*tcos[i] - od.im*tsin[i]; - data[i1+1] = ev.im + od.im*tcos[i] + od.re*tsin[i]; - data[i2 ] = ev.re - od.re*tcos[i] + od.im*tsin[i]; - data[i2+1] = -ev.im + od.im*tcos[i] + od.re*tsin[i]; + +#define RDFT_UNMANGLE(sign0, sign1) \ + for (i = 1; i < (n>>2); i++) { \ + i1 = 2*i; \ + i2 = n-i1; \ + /* Separate even and odd FFTs */ \ + ev.re = k1*(data[i1 ]+data[i2 ]); \ + od.im = -k2*(data[i1 ]-data[i2 ]); \ + ev.im = k1*(data[i1+1]-data[i2+1]); \ + od.re = k2*(data[i1+1]+data[i2+1]); \ + /* Apply twiddle factors to the odd FFT and add to the even FFT */ \ + data[i1 ] = ev.re + od.re*tcos[i] sign0 od.im*tsin[i]; \ + data[i1+1] = ev.im + od.im*tcos[i] sign1 od.re*tsin[i]; \ + data[i2 ] = ev.re - od.re*tcos[i] sign1 od.im*tsin[i]; \ + data[i2+1] = -ev.im + od.im*tcos[i] sign1 od.re*tsin[i]; \ } + + if (s->negative_sin) { + RDFT_UNMANGLE(+,-) + } else { + RDFT_UNMANGLE(-,+) + } + data[2*i+1]=s->sign_convention*data[2*i+1]; if (s->inverse) { data[0] *= k1; @@ -104,6 +91,7 @@ av_cold int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans) s->nbits = nbits; s->inverse = trans == IDFT_C2R || trans == DFT_C2R; s->sign_convention = trans == IDFT_R2C || trans == DFT_C2R ? 1 : -1; + s->negative_sin = trans == DFT_C2R || trans == DFT_R2C; if (nbits < 4 || nbits > 16) return AVERROR(EINVAL); @@ -113,15 +101,7 @@ av_cold int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans) ff_init_ff_cos_tabs(nbits); s->tcos = ff_cos_tabs[nbits]; - s->tsin = ff_sin_tabs[nbits]+(trans == DFT_R2C || trans == DFT_C2R)*(n>>2); -#if !CONFIG_HARDCODED_TABLES - { - int i; - const double theta = (trans == DFT_R2C || trans == DFT_C2R ? -1 : 1) * 2 * M_PI / n; - for (i = 0; i < (n >> 2); i++) - s->tsin[i] = sin(i * theta); - } -#endif + s->tsin = ff_cos_tabs[nbits] + (n >> 2); s->rdft_calc = rdft_calc_c; if (ARCH_ARM) ff_rdft_init_arm(s); diff --git a/libavcodec/rdft.h b/libavcodec/rdft.h index 37c40e7c80..ffafca7f24 100644 --- a/libavcodec/rdft.h +++ b/libavcodec/rdft.h @@ -25,29 +25,6 @@ #include "config.h" #include "fft.h" -#if CONFIG_HARDCODED_TABLES -# define SINTABLE_CONST const -#else -# define SINTABLE_CONST -#endif - -#define SINTABLE(size) \ - SINTABLE_CONST DECLARE_ALIGNED(16, FFTSample, ff_sin_##size)[size/2] - -extern SINTABLE(16); -extern SINTABLE(32); -extern SINTABLE(64); -extern SINTABLE(128); -extern SINTABLE(256); -extern SINTABLE(512); -extern SINTABLE(1024); -extern SINTABLE(2048); -extern SINTABLE(4096); -extern SINTABLE(8192); -extern SINTABLE(16384); -extern SINTABLE(32768); -extern SINTABLE(65536); - struct RDFTContext { int nbits; int inverse; @@ -55,7 +32,8 @@ struct RDFTContext { /* pre/post rotation tables */ const FFTSample *tcos; - SINTABLE_CONST FFTSample *tsin; + const FFTSample *tsin; + int negative_sin; FFTContext fft; void (*rdft_calc)(struct RDFTContext *s, FFTSample *z); };