mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2025-01-22 23:33:11 +00:00
lavu/tx: improve 3-point fixed precision
There's just no reason not to when its so easy (albeit messy) and its also reducing the precision of all non-power-of-two transforms that use it.
This commit is contained in:
parent
34a6a36844
commit
e1c84856bb
@ -47,8 +47,6 @@ typedef void FFTComplex;
|
||||
|
||||
#if defined(TX_FLOAT) || defined(TX_DOUBLE)
|
||||
|
||||
#define MUL(x, y) ((x)*(y))
|
||||
|
||||
#define CMUL(dre, dim, are, aim, bre, bim) do { \
|
||||
(dre) = (are) * (bre) - (aim) * (bim); \
|
||||
(dim) = (are) * (bim) + (aim) * (bre); \
|
||||
@ -65,8 +63,6 @@ typedef void FFTComplex;
|
||||
|
||||
#elif defined(TX_INT32)
|
||||
|
||||
#define MUL(x, y) ((int32_t)(((int64_t)(x) * (int64_t)(y) + 0x40000000) >> 31))
|
||||
|
||||
/* Properly rounds the result */
|
||||
#define CMUL(dre, dim, are, aim, bre, bim) do { \
|
||||
int64_t accu; \
|
||||
|
@ -131,6 +131,9 @@ static av_always_inline void fft3(FFTComplex *out, FFTComplex *in,
|
||||
ptrdiff_t stride)
|
||||
{
|
||||
FFTComplex tmp[2];
|
||||
#ifdef TX_INT32
|
||||
int64_t mtmp[4];
|
||||
#endif
|
||||
|
||||
BF(tmp[0].re, tmp[1].im, in[1].im, in[2].im);
|
||||
BF(tmp[0].im, tmp[1].re, in[1].re, in[2].re);
|
||||
@ -138,15 +141,25 @@ static av_always_inline void fft3(FFTComplex *out, FFTComplex *in,
|
||||
out[0*stride].re = in[0].re + tmp[1].re;
|
||||
out[0*stride].im = in[0].im + tmp[1].im;
|
||||
|
||||
tmp[0].re = MUL(TX_NAME(ff_cos_53)[0].re, tmp[0].re);
|
||||
tmp[0].im = MUL(TX_NAME(ff_cos_53)[0].im, tmp[0].im);
|
||||
tmp[1].re = MUL(TX_NAME(ff_cos_53)[1].re, tmp[1].re);
|
||||
tmp[1].im = MUL(TX_NAME(ff_cos_53)[1].re, tmp[1].im);
|
||||
|
||||
#ifdef TX_INT32
|
||||
mtmp[0] = (int64_t)TX_NAME(ff_cos_53)[0].re * tmp[0].re;
|
||||
mtmp[1] = (int64_t)TX_NAME(ff_cos_53)[0].im * tmp[0].im;
|
||||
mtmp[2] = (int64_t)TX_NAME(ff_cos_53)[1].re * tmp[1].re;
|
||||
mtmp[3] = (int64_t)TX_NAME(ff_cos_53)[1].re * tmp[1].im;
|
||||
out[1*stride].re = in[0].re - (mtmp[2] + mtmp[0] + 0x40000000 >> 31);
|
||||
out[1*stride].im = in[0].im - (mtmp[3] - mtmp[1] + 0x40000000 >> 31);
|
||||
out[2*stride].re = in[0].re - (mtmp[2] - mtmp[0] + 0x40000000 >> 31);
|
||||
out[2*stride].im = in[0].im - (mtmp[3] + mtmp[1] + 0x40000000 >> 31);
|
||||
#else
|
||||
tmp[0].re = TX_NAME(ff_cos_53)[0].re * tmp[0].re;
|
||||
tmp[0].im = TX_NAME(ff_cos_53)[0].im * tmp[0].im;
|
||||
tmp[1].re = TX_NAME(ff_cos_53)[1].re * tmp[1].re;
|
||||
tmp[1].im = TX_NAME(ff_cos_53)[1].re * tmp[1].im;
|
||||
out[1*stride].re = in[0].re - tmp[1].re + tmp[0].re;
|
||||
out[1*stride].im = in[0].im - tmp[1].im - tmp[0].im;
|
||||
out[2*stride].re = in[0].re - tmp[1].re - tmp[0].re;
|
||||
out[2*stride].im = in[0].im - tmp[1].im + tmp[0].im;
|
||||
#endif
|
||||
}
|
||||
|
||||
#define DECL_FFT5(NAME, D0, D1, D2, D3, D4) \
|
||||
|
Loading…
Reference in New Issue
Block a user