mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2024-12-26 17:32:06 +00:00
lavu: support arbitrary-point FFTs and all even (i)MDCT transforms
This patch adds support for arbitrary-point FFTs and all even MDCT transforms. Odd MDCTs are not supported yet as they're based on the DCT-II and DCT-III and they're very niche. With this we can now write tests.
This commit is contained in:
parent
ca21cb1e36
commit
06a8596825
@ -51,6 +51,8 @@ enum AVTXType {
|
|||||||
* For inverse transforms, the stride specifies the spacing between each
|
* For inverse transforms, the stride specifies the spacing between each
|
||||||
* sample in the input array in bytes. The output will be a flat array.
|
* sample in the input array in bytes. The output will be a flat array.
|
||||||
* Stride must be a non-zero multiple of sizeof(float).
|
* Stride must be a non-zero multiple of sizeof(float).
|
||||||
|
* NOTE: the inverse transform is half-length, meaning the output will not
|
||||||
|
* contain redundant data. This is what most codecs work with.
|
||||||
*/
|
*/
|
||||||
AV_TX_FLOAT_MDCT = 1,
|
AV_TX_FLOAT_MDCT = 1,
|
||||||
/**
|
/**
|
||||||
@ -93,8 +95,7 @@ typedef void (*av_tx_fn)(AVTXContext *s, void *out, void *in, ptrdiff_t stride);
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Initialize a transform context with the given configuration
|
* Initialize a transform context with the given configuration
|
||||||
* Currently power of two lengths from 2 to 131072 are supported, along with
|
* (i)MDCTs with an odd length are currently not supported.
|
||||||
* any length decomposable to a power of two and either 3, 5 or 15.
|
|
||||||
*
|
*
|
||||||
* @param ctx the context to allocate, will be NULL on error
|
* @param ctx the context to allocate, will be NULL on error
|
||||||
* @param tx pointer to the transform function pointer to set
|
* @param tx pointer to the transform function pointer to set
|
||||||
|
@ -58,6 +58,7 @@ typedef void FFTComplex;
|
|||||||
(dim) = (are) * (bim) - (aim) * (bre); \
|
(dim) = (are) * (bim) - (aim) * (bre); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
#define UNSCALE(x) (x)
|
||||||
#define RESCALE(x) (x)
|
#define RESCALE(x) (x)
|
||||||
|
|
||||||
#define FOLD(a, b) ((a) + (b))
|
#define FOLD(a, b) ((a) + (b))
|
||||||
@ -85,6 +86,7 @@ typedef void FFTComplex;
|
|||||||
(dim) = (int)(((accu) + 0x40000000) >> 31); \
|
(dim) = (int)(((accu) + 0x40000000) >> 31); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
#define UNSCALE(x) ((double)x/2147483648.0)
|
||||||
#define RESCALE(x) (av_clip64(lrintf((x) * 2147483648.0), INT32_MIN, INT32_MAX))
|
#define RESCALE(x) (av_clip64(lrintf((x) * 2147483648.0), INT32_MIN, INT32_MAX))
|
||||||
|
|
||||||
#define FOLD(x, y) ((int)((x) + (unsigned)(y) + 32) >> 6)
|
#define FOLD(x, y) ((int)((x) + (unsigned)(y) + 32) >> 6)
|
||||||
@ -108,6 +110,7 @@ struct AVTXContext {
|
|||||||
int m; /* Ptwo part */
|
int m; /* Ptwo part */
|
||||||
int inv; /* Is inverted */
|
int inv; /* Is inverted */
|
||||||
int type; /* Type */
|
int type; /* Type */
|
||||||
|
double scale; /* Scale */
|
||||||
|
|
||||||
FFTComplex *exptab; /* MDCT exptab */
|
FFTComplex *exptab; /* MDCT exptab */
|
||||||
FFTComplex *tmp; /* Temporary buffer needed for all compound transforms */
|
FFTComplex *tmp; /* Temporary buffer needed for all compound transforms */
|
||||||
|
@ -397,6 +397,31 @@ static void monolithic_fft(AVTXContext *s, void *_out, void *_in,
|
|||||||
fft_dispatch[mb](out);
|
fft_dispatch[mb](out);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void naive_fft(AVTXContext *s, void *_out, void *_in,
|
||||||
|
ptrdiff_t stride)
|
||||||
|
{
|
||||||
|
FFTComplex *in = _in;
|
||||||
|
FFTComplex *out = _out;
|
||||||
|
const int n = s->n;
|
||||||
|
double phase = s->inv ? 2.0*M_PI/n : -2.0*M_PI/n;
|
||||||
|
|
||||||
|
for(int i = 0; i < n; i++) {
|
||||||
|
FFTComplex tmp = { 0 };
|
||||||
|
for(int j = 0; j < n; j++) {
|
||||||
|
const double factor = phase*i*j;
|
||||||
|
const FFTComplex mult = {
|
||||||
|
RESCALE(cos(factor)),
|
||||||
|
RESCALE(sin(factor)),
|
||||||
|
};
|
||||||
|
FFTComplex res;
|
||||||
|
CMUL3(res, in[j], mult);
|
||||||
|
tmp.re += res.re;
|
||||||
|
tmp.im += res.im;
|
||||||
|
}
|
||||||
|
out[i] = tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#define DECL_COMP_IMDCT(N) \
|
#define DECL_COMP_IMDCT(N) \
|
||||||
static void compound_imdct_##N##xM(AVTXContext *s, void *_dst, void *_src, \
|
static void compound_imdct_##N##xM(AVTXContext *s, void *_dst, void *_src, \
|
||||||
ptrdiff_t stride) \
|
ptrdiff_t stride) \
|
||||||
@ -553,6 +578,57 @@ static void monolithic_mdct(AVTXContext *s, void *_dst, void *_src,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void naive_imdct(AVTXContext *s, void *_dst, void *_src,
|
||||||
|
ptrdiff_t stride)
|
||||||
|
{
|
||||||
|
int len = s->n;
|
||||||
|
int len2 = len*2;
|
||||||
|
FFTSample *src = _src;
|
||||||
|
FFTSample *dst = _dst;
|
||||||
|
double scale = s->scale;
|
||||||
|
const double phase = M_PI/(4.0*len2);
|
||||||
|
|
||||||
|
stride /= sizeof(*src);
|
||||||
|
|
||||||
|
for (int i = 0; i < len; i++) {
|
||||||
|
double sum_d = 0.0;
|
||||||
|
double sum_u = 0.0;
|
||||||
|
double i_d = phase * (4*len - 2*i - 1);
|
||||||
|
double i_u = phase * (3*len2 + 2*i + 1);
|
||||||
|
for (int j = 0; j < len2; j++) {
|
||||||
|
double a = (2 * j + 1);
|
||||||
|
double a_d = cos(a * i_d);
|
||||||
|
double a_u = cos(a * i_u);
|
||||||
|
double val = UNSCALE(src[j*stride]);
|
||||||
|
sum_d += a_d * val;
|
||||||
|
sum_u += a_u * val;
|
||||||
|
}
|
||||||
|
dst[i + 0] = RESCALE( sum_d*scale);
|
||||||
|
dst[i + len] = RESCALE(-sum_u*scale);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void naive_mdct(AVTXContext *s, void *_dst, void *_src,
|
||||||
|
ptrdiff_t stride)
|
||||||
|
{
|
||||||
|
int len = s->n*2;
|
||||||
|
FFTSample *src = _src;
|
||||||
|
FFTSample *dst = _dst;
|
||||||
|
double scale = s->scale;
|
||||||
|
const double phase = M_PI/(4.0*len);
|
||||||
|
|
||||||
|
stride /= sizeof(*dst);
|
||||||
|
|
||||||
|
for (int i = 0; i < len; i++) {
|
||||||
|
double sum = 0.0;
|
||||||
|
for (int j = 0; j < len*2; j++) {
|
||||||
|
int a = (2*j + 1 + len) * (2*i + 1);
|
||||||
|
sum += UNSCALE(src[j]) * cos(a * phase);
|
||||||
|
}
|
||||||
|
dst[i*stride] = RESCALE(sum*scale);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int gen_mdct_exptab(AVTXContext *s, int len4, double scale)
|
static int gen_mdct_exptab(AVTXContext *s, int len4, double scale)
|
||||||
{
|
{
|
||||||
const double theta = (scale < 0 ? len4 : 0) + 1.0/8.0;
|
const double theta = (scale < 0 ? len4 : 0) + 1.0/8.0;
|
||||||
@ -575,11 +651,13 @@ int TX_NAME(ff_tx_init_mdct_fft)(AVTXContext *s, av_tx_fn *tx,
|
|||||||
const void *scale, uint64_t flags)
|
const void *scale, uint64_t flags)
|
||||||
{
|
{
|
||||||
const int is_mdct = ff_tx_type_is_mdct(type);
|
const int is_mdct = ff_tx_type_is_mdct(type);
|
||||||
int err, n = 1, m = 1, max_ptwo = 1 << (FF_ARRAY_ELEMS(fft_dispatch) - 1);
|
int err, l, n = 1, m = 1, max_ptwo = 1 << (FF_ARRAY_ELEMS(fft_dispatch) - 1);
|
||||||
|
|
||||||
if (is_mdct)
|
if (is_mdct)
|
||||||
len >>= 1;
|
len >>= 1;
|
||||||
|
|
||||||
|
l = len;
|
||||||
|
|
||||||
#define CHECK_FACTOR(DST, FACTOR, SRC) \
|
#define CHECK_FACTOR(DST, FACTOR, SRC) \
|
||||||
if (DST == 1 && !(SRC % FACTOR)) { \
|
if (DST == 1 && !(SRC % FACTOR)) { \
|
||||||
DST = FACTOR; \
|
DST = FACTOR; \
|
||||||
@ -601,12 +679,23 @@ int TX_NAME(ff_tx_init_mdct_fft)(AVTXContext *s, av_tx_fn *tx,
|
|||||||
s->inv = inv;
|
s->inv = inv;
|
||||||
s->type = type;
|
s->type = type;
|
||||||
|
|
||||||
/* Filter out direct 3, 5 and 15 transforms, too niche */
|
/* If we weren't able to split the length into factors we can handle,
|
||||||
|
* resort to using the naive and slow FT. This also filters out
|
||||||
|
* direct 3, 5 and 15 transforms as they're too niche. */
|
||||||
if (len > 1 || m == 1) {
|
if (len > 1 || m == 1) {
|
||||||
av_log(NULL, AV_LOG_ERROR, "Unsupported transform size: n = %i, "
|
if (is_mdct && (l & 1)) /* Odd (i)MDCTs are not supported yet */
|
||||||
"m = %i, residual = %i!\n", n, m, len);
|
return AVERROR(ENOTSUP);
|
||||||
return AVERROR(EINVAL);
|
s->n = l;
|
||||||
} else if (n > 1 && m > 1) { /* 2D transform case */
|
s->m = 1;
|
||||||
|
*tx = naive_fft;
|
||||||
|
if (is_mdct) {
|
||||||
|
s->scale = *((SCALE_TYPE *)scale);
|
||||||
|
*tx = inv ? naive_imdct : naive_mdct;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (n > 1 && m > 1) { /* 2D transform case */
|
||||||
if ((err = ff_tx_gen_compound_mapping(s)))
|
if ((err = ff_tx_gen_compound_mapping(s)))
|
||||||
return err;
|
return err;
|
||||||
if (!(s->tmp = av_malloc(n*m*sizeof(*s->tmp))))
|
if (!(s->tmp = av_malloc(n*m*sizeof(*s->tmp))))
|
||||||
|
@ -80,7 +80,7 @@
|
|||||||
|
|
||||||
#define LIBAVUTIL_VERSION_MAJOR 56
|
#define LIBAVUTIL_VERSION_MAJOR 56
|
||||||
#define LIBAVUTIL_VERSION_MINOR 63
|
#define LIBAVUTIL_VERSION_MINOR 63
|
||||||
#define LIBAVUTIL_VERSION_MICRO 100
|
#define LIBAVUTIL_VERSION_MICRO 101
|
||||||
|
|
||||||
#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
|
#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
|
||||||
LIBAVUTIL_VERSION_MINOR, \
|
LIBAVUTIL_VERSION_MINOR, \
|
||||||
|
Loading…
Reference in New Issue
Block a user