mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2025-01-27 09:43:34 +00:00
avcodec/rdft: remove sintable
It is redundant with costable. The first half of sintable is identical with the second half of costable. The second half of sintable is negative value of the first half of sintable. The computation is changed to handle sign of sin values, in C code and ARM assembly code. Signed-off-by: Muhammad Faiz <mfcc64@gmail.com>
This commit is contained in:
parent
e7d977b446
commit
0780ad9c68
@ -122,8 +122,7 @@ OBJS-$(CONFIG_QSV) += qsv.o
|
|||||||
OBJS-$(CONFIG_QSVDEC) += qsvdec.o
|
OBJS-$(CONFIG_QSVDEC) += qsvdec.o
|
||||||
OBJS-$(CONFIG_QSVENC) += qsvenc.o
|
OBJS-$(CONFIG_QSVENC) += qsvenc.o
|
||||||
OBJS-$(CONFIG_RANGECODER) += rangecoder.o
|
OBJS-$(CONFIG_RANGECODER) += rangecoder.o
|
||||||
RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o
|
OBJS-$(CONFIG_RDFT) += rdft.o
|
||||||
OBJS-$(CONFIG_RDFT) += rdft.o $(RDFT-OBJS-yes)
|
|
||||||
OBJS-$(CONFIG_RV34DSP) += rv34dsp.o
|
OBJS-$(CONFIG_RV34DSP) += rv34dsp.o
|
||||||
OBJS-$(CONFIG_SHARED) += log2_tab.o reverse.o
|
OBJS-$(CONFIG_SHARED) += log2_tab.o reverse.o
|
||||||
OBJS-$(CONFIG_SINEWIN) += sinewin.o sinewin_fixed.o
|
OBJS-$(CONFIG_SINEWIN) += sinewin.o sinewin_fixed.o
|
||||||
|
@ -30,18 +30,21 @@ function ff_rdft_calc_neon, export=1
|
|||||||
|
|
||||||
lsls r6, r6, #31
|
lsls r6, r6, #31
|
||||||
bne 1f
|
bne 1f
|
||||||
add r0, r4, #20
|
add r0, r4, #24
|
||||||
bl X(ff_fft_permute_neon)
|
bl X(ff_fft_permute_neon)
|
||||||
add r0, r4, #20
|
add r0, r4, #24
|
||||||
mov r1, r5
|
mov r1, r5
|
||||||
bl X(ff_fft_calc_neon)
|
bl X(ff_fft_calc_neon)
|
||||||
1:
|
1:
|
||||||
ldr r12, [r4, #0] @ nbits
|
ldr r12, [r4, #0] @ nbits
|
||||||
mov r2, #1
|
mov r2, #1
|
||||||
|
ldr r8, [r4, #20] @ negative_sin
|
||||||
lsl r12, r2, r12
|
lsl r12, r2, r12
|
||||||
add r0, r5, #8
|
add r0, r5, #8
|
||||||
|
lsl r8, r8, #31
|
||||||
add r1, r5, r12, lsl #2
|
add r1, r5, r12, lsl #2
|
||||||
lsr r12, r12, #2
|
lsr r12, r12, #2
|
||||||
|
vdup.32 d26, r8
|
||||||
ldr r2, [r4, #12] @ tcos
|
ldr r2, [r4, #12] @ tcos
|
||||||
sub r12, r12, #2
|
sub r12, r12, #2
|
||||||
ldr r3, [r4, #16] @ tsin
|
ldr r3, [r4, #16] @ tsin
|
||||||
@ -55,6 +58,7 @@ function ff_rdft_calc_neon, export=1
|
|||||||
vld1.32 {d5}, [r3,:64]! @ tsin[i]
|
vld1.32 {d5}, [r3,:64]! @ tsin[i]
|
||||||
vmov.f32 d18, #0.5 @ k1
|
vmov.f32 d18, #0.5 @ k1
|
||||||
vdup.32 d19, r6
|
vdup.32 d19, r6
|
||||||
|
veor d5, d26, d5
|
||||||
pld [r0, #32]
|
pld [r0, #32]
|
||||||
veor d19, d18, d19 @ k2
|
veor d19, d18, d19 @ k2
|
||||||
vmov.i32 d16, #0
|
vmov.i32 d16, #0
|
||||||
@ -90,6 +94,7 @@ function ff_rdft_calc_neon, export=1
|
|||||||
vld1.32 {d5}, [r3,:64]! @ tsin[i]
|
vld1.32 {d5}, [r3,:64]! @ tsin[i]
|
||||||
veor d24, d22, d17 @ ev.re,-ev.im
|
veor d24, d22, d17 @ ev.re,-ev.im
|
||||||
vrev64.32 d3, d23 @ od.re, od.im
|
vrev64.32 d3, d23 @ od.re, od.im
|
||||||
|
veor d5, d26, d5
|
||||||
pld [r2, #32]
|
pld [r2, #32]
|
||||||
veor d2, d3, d16 @ -od.re, od.im
|
veor d2, d3, d16 @ -od.re, od.im
|
||||||
pld [r3, #32]
|
pld [r3, #32]
|
||||||
@ -140,10 +145,10 @@ function ff_rdft_calc_neon, export=1
|
|||||||
|
|
||||||
vmul.f32 d22, d22, d18
|
vmul.f32 d22, d22, d18
|
||||||
vst1.32 {d22}, [r5,:64]
|
vst1.32 {d22}, [r5,:64]
|
||||||
add r0, r4, #20
|
add r0, r4, #24
|
||||||
mov r1, r5
|
mov r1, r5
|
||||||
bl X(ff_fft_permute_neon)
|
bl X(ff_fft_permute_neon)
|
||||||
add r0, r4, #20
|
add r0, r4, #24
|
||||||
mov r1, r5
|
mov r1, r5
|
||||||
pop {r4-r8,lr}
|
pop {r4-r8,lr}
|
||||||
b X(ff_fft_calc_neon)
|
b X(ff_fft_calc_neon)
|
||||||
|
@ -28,28 +28,6 @@
|
|||||||
* (Inverse) Real Discrete Fourier Transforms.
|
* (Inverse) Real Discrete Fourier Transforms.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* sin(2*pi*x/n) for 0<=x<n/4, followed by n/2<=x<3n/4 */
|
|
||||||
#if !CONFIG_HARDCODED_TABLES
|
|
||||||
SINTABLE(16);
|
|
||||||
SINTABLE(32);
|
|
||||||
SINTABLE(64);
|
|
||||||
SINTABLE(128);
|
|
||||||
SINTABLE(256);
|
|
||||||
SINTABLE(512);
|
|
||||||
SINTABLE(1024);
|
|
||||||
SINTABLE(2048);
|
|
||||||
SINTABLE(4096);
|
|
||||||
SINTABLE(8192);
|
|
||||||
SINTABLE(16384);
|
|
||||||
SINTABLE(32768);
|
|
||||||
SINTABLE(65536);
|
|
||||||
#endif
|
|
||||||
static SINTABLE_CONST FFTSample * const ff_sin_tabs[] = {
|
|
||||||
NULL, NULL, NULL, NULL,
|
|
||||||
ff_sin_16, ff_sin_32, ff_sin_64, ff_sin_128, ff_sin_256, ff_sin_512, ff_sin_1024,
|
|
||||||
ff_sin_2048, ff_sin_4096, ff_sin_8192, ff_sin_16384, ff_sin_32768, ff_sin_65536,
|
|
||||||
};
|
|
||||||
|
|
||||||
/** Map one real FFT into two parallel real even and odd FFTs. Then interleave
|
/** Map one real FFT into two parallel real even and odd FFTs. Then interleave
|
||||||
* the two real FFTs into one complex FFT. Unmangle the results.
|
* the two real FFTs into one complex FFT. Unmangle the results.
|
||||||
* ref: http://www.engineeringproductivitytools.com/stuff/T0001/PT10.HTM
|
* ref: http://www.engineeringproductivitytools.com/stuff/T0001/PT10.HTM
|
||||||
@ -73,20 +51,29 @@ static void rdft_calc_c(RDFTContext *s, FFTSample *data)
|
|||||||
ev.re = data[0];
|
ev.re = data[0];
|
||||||
data[0] = ev.re+data[1];
|
data[0] = ev.re+data[1];
|
||||||
data[1] = ev.re-data[1];
|
data[1] = ev.re-data[1];
|
||||||
for (i = 1; i < (n>>2); i++) {
|
|
||||||
i1 = 2*i;
|
#define RDFT_UNMANGLE(sign0, sign1) \
|
||||||
i2 = n-i1;
|
for (i = 1; i < (n>>2); i++) { \
|
||||||
/* Separate even and odd FFTs */
|
i1 = 2*i; \
|
||||||
ev.re = k1*(data[i1 ]+data[i2 ]);
|
i2 = n-i1; \
|
||||||
od.im = -k2*(data[i1 ]-data[i2 ]);
|
/* Separate even and odd FFTs */ \
|
||||||
ev.im = k1*(data[i1+1]-data[i2+1]);
|
ev.re = k1*(data[i1 ]+data[i2 ]); \
|
||||||
od.re = k2*(data[i1+1]+data[i2+1]);
|
od.im = -k2*(data[i1 ]-data[i2 ]); \
|
||||||
/* Apply twiddle factors to the odd FFT and add to the even FFT */
|
ev.im = k1*(data[i1+1]-data[i2+1]); \
|
||||||
data[i1 ] = ev.re + od.re*tcos[i] - od.im*tsin[i];
|
od.re = k2*(data[i1+1]+data[i2+1]); \
|
||||||
data[i1+1] = ev.im + od.im*tcos[i] + od.re*tsin[i];
|
/* Apply twiddle factors to the odd FFT and add to the even FFT */ \
|
||||||
data[i2 ] = ev.re - od.re*tcos[i] + od.im*tsin[i];
|
data[i1 ] = ev.re + od.re*tcos[i] sign0 od.im*tsin[i]; \
|
||||||
data[i2+1] = -ev.im + od.im*tcos[i] + od.re*tsin[i];
|
data[i1+1] = ev.im + od.im*tcos[i] sign1 od.re*tsin[i]; \
|
||||||
|
data[i2 ] = ev.re - od.re*tcos[i] sign1 od.im*tsin[i]; \
|
||||||
|
data[i2+1] = -ev.im + od.im*tcos[i] sign1 od.re*tsin[i]; \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (s->negative_sin) {
|
||||||
|
RDFT_UNMANGLE(+,-)
|
||||||
|
} else {
|
||||||
|
RDFT_UNMANGLE(-,+)
|
||||||
|
}
|
||||||
|
|
||||||
data[2*i+1]=s->sign_convention*data[2*i+1];
|
data[2*i+1]=s->sign_convention*data[2*i+1];
|
||||||
if (s->inverse) {
|
if (s->inverse) {
|
||||||
data[0] *= k1;
|
data[0] *= k1;
|
||||||
@ -104,6 +91,7 @@ av_cold int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans)
|
|||||||
s->nbits = nbits;
|
s->nbits = nbits;
|
||||||
s->inverse = trans == IDFT_C2R || trans == DFT_C2R;
|
s->inverse = trans == IDFT_C2R || trans == DFT_C2R;
|
||||||
s->sign_convention = trans == IDFT_R2C || trans == DFT_C2R ? 1 : -1;
|
s->sign_convention = trans == IDFT_R2C || trans == DFT_C2R ? 1 : -1;
|
||||||
|
s->negative_sin = trans == DFT_C2R || trans == DFT_R2C;
|
||||||
|
|
||||||
if (nbits < 4 || nbits > 16)
|
if (nbits < 4 || nbits > 16)
|
||||||
return AVERROR(EINVAL);
|
return AVERROR(EINVAL);
|
||||||
@ -113,15 +101,7 @@ av_cold int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans)
|
|||||||
|
|
||||||
ff_init_ff_cos_tabs(nbits);
|
ff_init_ff_cos_tabs(nbits);
|
||||||
s->tcos = ff_cos_tabs[nbits];
|
s->tcos = ff_cos_tabs[nbits];
|
||||||
s->tsin = ff_sin_tabs[nbits]+(trans == DFT_R2C || trans == DFT_C2R)*(n>>2);
|
s->tsin = ff_cos_tabs[nbits] + (n >> 2);
|
||||||
#if !CONFIG_HARDCODED_TABLES
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
const double theta = (trans == DFT_R2C || trans == DFT_C2R ? -1 : 1) * 2 * M_PI / n;
|
|
||||||
for (i = 0; i < (n >> 2); i++)
|
|
||||||
s->tsin[i] = sin(i * theta);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
s->rdft_calc = rdft_calc_c;
|
s->rdft_calc = rdft_calc_c;
|
||||||
|
|
||||||
if (ARCH_ARM) ff_rdft_init_arm(s);
|
if (ARCH_ARM) ff_rdft_init_arm(s);
|
||||||
|
@ -25,29 +25,6 @@
|
|||||||
#include "config.h"
|
#include "config.h"
|
||||||
#include "fft.h"
|
#include "fft.h"
|
||||||
|
|
||||||
#if CONFIG_HARDCODED_TABLES
|
|
||||||
# define SINTABLE_CONST const
|
|
||||||
#else
|
|
||||||
# define SINTABLE_CONST
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define SINTABLE(size) \
|
|
||||||
SINTABLE_CONST DECLARE_ALIGNED(16, FFTSample, ff_sin_##size)[size/2]
|
|
||||||
|
|
||||||
extern SINTABLE(16);
|
|
||||||
extern SINTABLE(32);
|
|
||||||
extern SINTABLE(64);
|
|
||||||
extern SINTABLE(128);
|
|
||||||
extern SINTABLE(256);
|
|
||||||
extern SINTABLE(512);
|
|
||||||
extern SINTABLE(1024);
|
|
||||||
extern SINTABLE(2048);
|
|
||||||
extern SINTABLE(4096);
|
|
||||||
extern SINTABLE(8192);
|
|
||||||
extern SINTABLE(16384);
|
|
||||||
extern SINTABLE(32768);
|
|
||||||
extern SINTABLE(65536);
|
|
||||||
|
|
||||||
struct RDFTContext {
|
struct RDFTContext {
|
||||||
int nbits;
|
int nbits;
|
||||||
int inverse;
|
int inverse;
|
||||||
@ -55,7 +32,8 @@ struct RDFTContext {
|
|||||||
|
|
||||||
/* pre/post rotation tables */
|
/* pre/post rotation tables */
|
||||||
const FFTSample *tcos;
|
const FFTSample *tcos;
|
||||||
SINTABLE_CONST FFTSample *tsin;
|
const FFTSample *tsin;
|
||||||
|
int negative_sin;
|
||||||
FFTContext fft;
|
FFTContext fft;
|
||||||
void (*rdft_calc)(struct RDFTContext *s, FFTSample *z);
|
void (*rdft_calc)(struct RDFTContext *s, FFTSample *z);
|
||||||
};
|
};
|
||||||
|
Loading…
Reference in New Issue
Block a user