mirror of https://git.ffmpeg.org/ffmpeg.git
checkasm: add av_tx FFT SIMD testing code
This sadly required making changes to the code itself, due to the same context needing to be reused for both versions. The lookup table had to be duplicated for both versions.
This commit is contained in:
parent
ff71671d88
commit
1978b143eb
|
@ -106,22 +106,24 @@ int ff_tx_gen_ptwo_revtab(AVTXContext *s, int invert_lookup)
|
|||
{
|
||||
const int m = s->m, inv = s->inv;
|
||||
|
||||
if (!(s->revtab = av_malloc(m*sizeof(*s->revtab))))
|
||||
if (!(s->revtab = av_malloc(s->m*sizeof(*s->revtab))))
|
||||
return AVERROR(ENOMEM);
|
||||
if (!(s->revtab_c = av_malloc(m*sizeof(*s->revtab_c))))
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
/* Default */
|
||||
for (int i = 0; i < m; i++) {
|
||||
int k = -split_radix_permutation(i, m, inv) & (m - 1);
|
||||
if (invert_lookup)
|
||||
s->revtab[i] = k;
|
||||
s->revtab[i] = s->revtab_c[i] = k;
|
||||
else
|
||||
s->revtab[k] = i;
|
||||
s->revtab[i] = s->revtab_c[k] = i;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s)
|
||||
int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s, int *revtab)
|
||||
{
|
||||
int nb_inplace_idx = 0;
|
||||
|
||||
|
@ -130,7 +132,7 @@ int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s)
|
|||
|
||||
/* The first coefficient is always already in-place */
|
||||
for (int src = 1; src < s->m; src++) {
|
||||
int dst = s->revtab[src];
|
||||
int dst = revtab[src];
|
||||
int found = 0;
|
||||
|
||||
if (dst <= src)
|
||||
|
@ -146,7 +148,7 @@ int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s)
|
|||
break;
|
||||
}
|
||||
}
|
||||
dst = s->revtab[dst];
|
||||
dst = revtab[dst];
|
||||
} while (dst != src && !found);
|
||||
|
||||
if (!found)
|
||||
|
@ -215,6 +217,7 @@ av_cold void av_tx_uninit(AVTXContext **ctx)
|
|||
av_free((*ctx)->pfatab);
|
||||
av_free((*ctx)->exptab);
|
||||
av_free((*ctx)->revtab);
|
||||
av_free((*ctx)->revtab_c);
|
||||
av_free((*ctx)->inplace_idx);
|
||||
av_free((*ctx)->tmp);
|
||||
|
||||
|
|
|
@ -122,6 +122,9 @@ struct AVTXContext {
|
|||
int *revtab; /* Input mapping for power of two transforms */
|
||||
int *inplace_idx; /* Required indices to revtab for in-place transforms */
|
||||
|
||||
int *revtab_c; /* Revtab for only the C transforms, needed because
|
||||
* checkasm makes us reuse the same context. */
|
||||
|
||||
av_tx_fn top_tx; /* Used for computing transforms derived from other
|
||||
* transforms, like full-length iMDCTs and RDFTs.
|
||||
* NOTE: Do NOT use this to mix assembly with C code. */
|
||||
|
@ -147,7 +150,7 @@ int ff_tx_gen_ptwo_revtab(AVTXContext *s, int invert_lookup);
|
|||
* specific order, allows the revtab to be done in-place. AVTXContext->revtab
|
||||
* must already exist.
|
||||
*/
|
||||
int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s);
|
||||
int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s, int *revtab);
|
||||
|
||||
/*
|
||||
* This generates a parity-based revtab of length len and direction inv.
|
||||
|
|
|
@ -593,7 +593,7 @@ static void compound_fft_##N##xM(AVTXContext *s, void *_out, \
|
|||
for (int i = 0; i < m; i++) { \
|
||||
for (int j = 0; j < N; j++) \
|
||||
fft##N##in[j] = in[in_map[i*N + j]]; \
|
||||
fft##N(s->tmp + s->revtab[i], fft##N##in, m); \
|
||||
fft##N(s->tmp + s->revtab_c[i], fft##N##in, m); \
|
||||
} \
|
||||
\
|
||||
for (int i = 0; i < N; i++) \
|
||||
|
@ -624,16 +624,16 @@ static void split_radix_fft(AVTXContext *s, void *_out, void *_in,
|
|||
|
||||
do {
|
||||
tmp = out[src];
|
||||
dst = s->revtab[src];
|
||||
dst = s->revtab_c[src];
|
||||
do {
|
||||
FFSWAP(FFTComplex, tmp, out[dst]);
|
||||
dst = s->revtab[dst];
|
||||
dst = s->revtab_c[dst];
|
||||
} while (dst != src); /* Can be > as well, but is less predictable */
|
||||
out[dst] = tmp;
|
||||
} while ((src = *inplace_idx++));
|
||||
} else {
|
||||
for (int i = 0; i < m; i++)
|
||||
out[i] = in[s->revtab[i]];
|
||||
out[i] = in[s->revtab_c[i]];
|
||||
}
|
||||
|
||||
fft_dispatch[mb](out);
|
||||
|
@ -685,7 +685,7 @@ static void compound_imdct_##N##xM(AVTXContext *s, void *_dst, void *_src, \
|
|||
FFTComplex tmp = { in2[-k*stride], in1[k*stride] }; \
|
||||
CMUL3(fft##N##in[j], tmp, exp[k >> 1]); \
|
||||
} \
|
||||
fft##N(s->tmp + s->revtab[i], fft##N##in, m); \
|
||||
fft##N(s->tmp + s->revtab_c[i], fft##N##in, m); \
|
||||
} \
|
||||
\
|
||||
for (int i = 0; i < N; i++) \
|
||||
|
@ -733,7 +733,7 @@ static void compound_mdct_##N##xM(AVTXContext *s, void *_dst, void *_src, \
|
|||
CMUL(fft##N##in[j].im, fft##N##in[j].re, tmp.re, tmp.im, \
|
||||
exp[k >> 1].re, exp[k >> 1].im); \
|
||||
} \
|
||||
fft##N(s->tmp + s->revtab[i], fft##N##in, m); \
|
||||
fft##N(s->tmp + s->revtab_c[i], fft##N##in, m); \
|
||||
} \
|
||||
\
|
||||
for (int i = 0; i < N; i++) \
|
||||
|
@ -772,7 +772,7 @@ static void monolithic_imdct(AVTXContext *s, void *_dst, void *_src,
|
|||
|
||||
for (int i = 0; i < m; i++) {
|
||||
FFTComplex tmp = { in2[-2*i*stride], in1[2*i*stride] };
|
||||
CMUL3(z[s->revtab[i]], tmp, exp[i]);
|
||||
CMUL3(z[s->revtab_c[i]], tmp, exp[i]);
|
||||
}
|
||||
|
||||
fftp(z);
|
||||
|
@ -806,7 +806,7 @@ static void monolithic_mdct(AVTXContext *s, void *_dst, void *_src,
|
|||
tmp.re = FOLD(-src[ len4 + k], -src[5*len4 - 1 - k]);
|
||||
tmp.im = FOLD( src[-len4 + k], -src[1*len3 - 1 - k]);
|
||||
}
|
||||
CMUL(z[s->revtab[i]].im, z[s->revtab[i]].re, tmp.re, tmp.im,
|
||||
CMUL(z[s->revtab_c[i]].im, z[s->revtab_c[i]].re, tmp.re, tmp.im,
|
||||
exp[i].re, exp[i].im);
|
||||
}
|
||||
|
||||
|
@ -1005,7 +1005,7 @@ int TX_NAME(ff_tx_init_mdct_fft)(AVTXContext *s, av_tx_fn *tx,
|
|||
if (flags & AV_TX_INPLACE) {
|
||||
if (is_mdct) /* In-place MDCTs are not supported yet */
|
||||
return AVERROR(ENOSYS);
|
||||
if ((err = ff_tx_gen_ptwo_inplace_revtab_idx(s)))
|
||||
if ((err = ff_tx_gen_ptwo_inplace_revtab_idx(s, s->revtab_c)))
|
||||
return err;
|
||||
}
|
||||
for (int i = 4; i <= av_log2(m); i++)
|
||||
|
|
|
@ -50,6 +50,7 @@ SWSCALEOBJS += sw_rgb.o sw_scale.o
|
|||
CHECKASMOBJS-$(CONFIG_SWSCALE) += $(SWSCALEOBJS)
|
||||
|
||||
# libavutil tests
|
||||
AVUTILOBJS += av_tx.o
|
||||
AVUTILOBJS += fixed_dsp.o
|
||||
AVUTILOBJS += float_dsp.o
|
||||
|
||||
|
|
|
@ -0,0 +1,108 @@
|
|||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include "libavutil/mem_internal.h"
|
||||
#include "libavutil/tx.h"
|
||||
#include "libavutil/error.h"
|
||||
|
||||
#include "checkasm.h"
|
||||
|
||||
#define EPS 0.00005
|
||||
|
||||
#define SCALE_NOOP(x) (x)
|
||||
#define SCALE_INT20(x) (av_clip64(lrintf((x) * 2147483648.0), INT32_MIN, INT32_MAX) >> 12)
|
||||
|
||||
#define randomize_complex(BUF, LEN, TYPE, SCALE) \
|
||||
do { \
|
||||
TYPE *buf = (TYPE *)BUF; \
|
||||
for (int i = 0; i < LEN; i++) { \
|
||||
double fre = (double)rnd() / UINT_MAX; \
|
||||
double fim = (double)rnd() / UINT_MAX; \
|
||||
buf[i] = (TYPE){ SCALE(fre), SCALE(fim) }; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static const int check_lens[] = {
|
||||
2, 4, 8, 16, 32, 64, 1024, 16384,
|
||||
};
|
||||
|
||||
#define CHECK_TEMPLATE(PREFIX, TYPE, DATA_TYPE, SCALE, LENGTHS, CHECK_EXPRESSION) \
|
||||
do { \
|
||||
int err; \
|
||||
AVTXContext *tx; \
|
||||
av_tx_fn fn; \
|
||||
int num_checks = 0; \
|
||||
int last_check = 0; \
|
||||
const void *scale = &SCALE; \
|
||||
\
|
||||
for (int i = 0; i < FF_ARRAY_ELEMS(LENGTHS); i++) { \
|
||||
int len = LENGTHS[i]; \
|
||||
\
|
||||
if ((err = av_tx_init(&tx, &fn, TYPE, 0, len, &scale, 0x0)) < 0) { \
|
||||
fprintf(stderr, "av_tx: %s\n", av_err2str(err)); \
|
||||
return; \
|
||||
} \
|
||||
\
|
||||
if (check_func(fn, PREFIX "_%i", len)) { \
|
||||
num_checks++; \
|
||||
last_check = len; \
|
||||
call_ref(tx, out_ref, in, sizeof(DATA_TYPE)); \
|
||||
call_new(tx, out_new, in, sizeof(DATA_TYPE)); \
|
||||
if (CHECK_EXPRESSION) { \
|
||||
fail(); \
|
||||
break; \
|
||||
} \
|
||||
bench_new(tx, out_new, in, sizeof(DATA_TYPE)); \
|
||||
} \
|
||||
\
|
||||
av_tx_uninit(&tx); \
|
||||
fn = NULL; \
|
||||
} \
|
||||
\
|
||||
av_tx_uninit(&tx); \
|
||||
fn = NULL; \
|
||||
\
|
||||
if (num_checks == 1) \
|
||||
report(PREFIX "_%i", last_check); \
|
||||
else if (num_checks) \
|
||||
report(PREFIX); \
|
||||
} while (0)
|
||||
|
||||
void checkasm_check_av_tx(void)
|
||||
{
|
||||
const float scale_float = 1.0f;
|
||||
const double scale_double = 1.0f;
|
||||
|
||||
declare_func(void, AVTXContext *tx, void *out, void *in, ptrdiff_t stride);
|
||||
|
||||
void *in = av_malloc(16384*2*8);
|
||||
void *out_ref = av_malloc(16384*2*8);
|
||||
void *out_new = av_malloc(16384*2*8);
|
||||
|
||||
randomize_complex(in, 16384, AVComplexFloat, SCALE_NOOP);
|
||||
CHECK_TEMPLATE("float_fft", AV_TX_FLOAT_FFT, AVComplexFloat, scale_float, check_lens,
|
||||
!float_near_abs_eps_array(out_ref, out_new, EPS, len*2));
|
||||
|
||||
randomize_complex(in, 16384, AVComplexDouble, SCALE_NOOP);
|
||||
CHECK_TEMPLATE("double_fft", AV_TX_DOUBLE_FFT, AVComplexDouble, scale_double, check_lens,
|
||||
!double_near_abs_eps_array(out_ref, out_new, EPS, len*2));
|
||||
|
||||
av_free(in);
|
||||
av_free(out_ref);
|
||||
av_free(out_new);
|
||||
}
|
|
@ -198,6 +198,7 @@ static const struct {
|
|||
#if CONFIG_AVUTIL
|
||||
{ "fixed_dsp", checkasm_check_fixed_dsp },
|
||||
{ "float_dsp", checkasm_check_float_dsp },
|
||||
{ "av_tx", checkasm_check_av_tx },
|
||||
#endif
|
||||
{ NULL }
|
||||
};
|
||||
|
|
|
@ -43,6 +43,7 @@ void checkasm_check_aacpsdsp(void);
|
|||
void checkasm_check_afir(void);
|
||||
void checkasm_check_alacdsp(void);
|
||||
void checkasm_check_audiodsp(void);
|
||||
void checkasm_check_av_tx(void);
|
||||
void checkasm_check_blend(void);
|
||||
void checkasm_check_blockdsp(void);
|
||||
void checkasm_check_bswapdsp(void);
|
||||
|
|
|
@ -2,6 +2,7 @@ FATE_CHECKASM = fate-checkasm-aacpsdsp \
|
|||
fate-checkasm-af_afir \
|
||||
fate-checkasm-alacdsp \
|
||||
fate-checkasm-audiodsp \
|
||||
fate-checkasm-av_tx \
|
||||
fate-checkasm-blockdsp \
|
||||
fate-checkasm-bswapdsp \
|
||||
fate-checkasm-exrdsp \
|
||||
|
|
Loading…
Reference in New Issue