mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2024-12-28 18:32:22 +00:00
8e94b7cff0
out[lut[i]] = in[i] lookups were 4.04 times(!) slower than out[i] = in[lut[i]] lookups for an out-of-place FFT of length 4096. The permutes remain unchanged for anything but out-of-place monolithic FFT, as those benefit quite a lot from the current order (it means there's only 1 lookup necessary to add to an offset, rather than a full gather). The code was based around non-power-of-two FFTs, so this wasn't benchmarked early on.
198 lines
5.1 KiB
C
198 lines
5.1 KiB
C
/*
|
|
* This file is part of FFmpeg.
|
|
*
|
|
* FFmpeg is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* FFmpeg is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with FFmpeg; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#include "tx_priv.h"
|
|
|
|
int ff_tx_type_is_mdct(enum AVTXType type)
|
|
{
|
|
switch (type) {
|
|
case AV_TX_FLOAT_MDCT:
|
|
case AV_TX_DOUBLE_MDCT:
|
|
case AV_TX_INT32_MDCT:
|
|
return 1;
|
|
default:
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
/* Calculates the modular multiplicative inverse, not fast, replace */
|
|
static av_always_inline int mulinv(int n, int m)
|
|
{
|
|
n = n % m;
|
|
for (int x = 1; x < m; x++)
|
|
if (((n * x) % m) == 1)
|
|
return x;
|
|
av_assert0(0); /* Never reached */
|
|
}
|
|
|
|
/* Guaranteed to work for any n, m where gcd(n, m) == 1 */
|
|
int ff_tx_gen_compound_mapping(AVTXContext *s)
|
|
{
|
|
int *in_map, *out_map;
|
|
const int n = s->n;
|
|
const int m = s->m;
|
|
const int inv = s->inv;
|
|
const int len = n*m;
|
|
const int m_inv = mulinv(m, n);
|
|
const int n_inv = mulinv(n, m);
|
|
const int mdct = ff_tx_type_is_mdct(s->type);
|
|
|
|
if (!(s->pfatab = av_malloc(2*len*sizeof(*s->pfatab))))
|
|
return AVERROR(ENOMEM);
|
|
|
|
in_map = s->pfatab;
|
|
out_map = s->pfatab + n*m;
|
|
|
|
/* Ruritanian map for input, CRT map for output, can be swapped */
|
|
for (int j = 0; j < m; j++) {
|
|
for (int i = 0; i < n; i++) {
|
|
/* Shifted by 1 to simplify MDCTs */
|
|
in_map[j*n + i] = ((i*m + j*n) % len) << mdct;
|
|
out_map[(i*m*m_inv + j*n*n_inv) % len] = i*m + j;
|
|
}
|
|
}
|
|
|
|
/* Change transform direction by reversing all ACs */
|
|
if (inv) {
|
|
for (int i = 0; i < m; i++) {
|
|
int *in = &in_map[i*n + 1]; /* Skip the DC */
|
|
for (int j = 0; j < ((n - 1) >> 1); j++)
|
|
FFSWAP(int, in[j], in[n - j - 2]);
|
|
}
|
|
}
|
|
|
|
/* Our 15-point transform is also a compound one, so embed its input map */
|
|
if (n == 15) {
|
|
for (int k = 0; k < m; k++) {
|
|
int tmp[15];
|
|
memcpy(tmp, &in_map[k*15], 15*sizeof(*tmp));
|
|
for (int i = 0; i < 5; i++) {
|
|
for (int j = 0; j < 3; j++)
|
|
in_map[k*15 + i*3 + j] = tmp[(i*3 + j*5) % 15];
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int ff_tx_gen_ptwo_revtab(AVTXContext *s, int invert_lookup)
|
|
{
|
|
const int m = s->m, inv = s->inv;
|
|
|
|
if (!(s->revtab = av_malloc(m*sizeof(*s->revtab))))
|
|
return AVERROR(ENOMEM);
|
|
|
|
/* Default */
|
|
for (int i = 0; i < m; i++) {
|
|
int k = -split_radix_permutation(i, m, inv) & (m - 1);
|
|
if (invert_lookup)
|
|
s->revtab[i] = k;
|
|
else
|
|
s->revtab[k] = i;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s)
|
|
{
|
|
int nb_inplace_idx = 0;
|
|
|
|
if (!(s->inplace_idx = av_malloc(s->m*sizeof(*s->inplace_idx))))
|
|
return AVERROR(ENOMEM);
|
|
|
|
for (int src = 1; src < s->m; src++) {
|
|
int dst = s->revtab[src];
|
|
int found = 0;
|
|
|
|
if (dst <= src)
|
|
continue;
|
|
|
|
do {
|
|
for (int j = 0; j < nb_inplace_idx; j++) {
|
|
if (dst == s->inplace_idx[j]) {
|
|
found = 1;
|
|
break;
|
|
}
|
|
}
|
|
dst = s->revtab[dst];
|
|
} while (dst != src && !found);
|
|
|
|
if (!found)
|
|
s->inplace_idx[nb_inplace_idx++] = src;
|
|
}
|
|
|
|
s->inplace_idx[nb_inplace_idx++] = 0;
|
|
|
|
return 0;
|
|
}
|
|
|
|
av_cold void av_tx_uninit(AVTXContext **ctx)
|
|
{
|
|
if (!(*ctx))
|
|
return;
|
|
|
|
av_free((*ctx)->pfatab);
|
|
av_free((*ctx)->exptab);
|
|
av_free((*ctx)->revtab);
|
|
av_free((*ctx)->inplace_idx);
|
|
av_free((*ctx)->tmp);
|
|
|
|
av_freep(ctx);
|
|
}
|
|
|
|
av_cold int av_tx_init(AVTXContext **ctx, av_tx_fn *tx, enum AVTXType type,
|
|
int inv, int len, const void *scale, uint64_t flags)
|
|
{
|
|
int err;
|
|
AVTXContext *s = av_mallocz(sizeof(*s));
|
|
if (!s)
|
|
return AVERROR(ENOMEM);
|
|
|
|
switch (type) {
|
|
case AV_TX_FLOAT_FFT:
|
|
case AV_TX_FLOAT_MDCT:
|
|
if ((err = ff_tx_init_mdct_fft_float(s, tx, type, inv, len, scale, flags)))
|
|
goto fail;
|
|
break;
|
|
case AV_TX_DOUBLE_FFT:
|
|
case AV_TX_DOUBLE_MDCT:
|
|
if ((err = ff_tx_init_mdct_fft_double(s, tx, type, inv, len, scale, flags)))
|
|
goto fail;
|
|
break;
|
|
case AV_TX_INT32_FFT:
|
|
case AV_TX_INT32_MDCT:
|
|
if ((err = ff_tx_init_mdct_fft_int32(s, tx, type, inv, len, scale, flags)))
|
|
goto fail;
|
|
break;
|
|
default:
|
|
err = AVERROR(EINVAL);
|
|
goto fail;
|
|
}
|
|
|
|
*ctx = s;
|
|
|
|
return 0;
|
|
|
|
fail:
|
|
av_tx_uninit(&s);
|
|
*tx = NULL;
|
|
return err;
|
|
}
|