diff --git a/libavutil/tx_template.c b/libavutil/tx_template.c index 542c15e480..1d4c4d294b 100644 --- a/libavutil/tx_template.c +++ b/libavutil/tx_template.c @@ -940,12 +940,12 @@ static const FFTXCodelet TX_NAME(ff_tx_mdct_naive_inv_def) = { .prio = FF_TX_PRIO_MIN, }; -static av_cold int TX_NAME(ff_tx_mdct_sr_init)(AVTXContext *s, - const FFTXCodelet *cd, - uint64_t flags, - FFTXCodeletOptions *opts, - int len, int inv, - const void *scale) +static av_cold int TX_NAME(ff_tx_mdct_init)(AVTXContext *s, + const FFTXCodelet *cd, + uint64_t flags, + FFTXCodeletOptions *opts, + int len, int inv, + const void *scale) { int ret; FFTXCodeletOptions sub_opts = { .invert_lookup = inv }; @@ -955,32 +955,49 @@ static av_cold int TX_NAME(ff_tx_mdct_sr_init)(AVTXContext *s, flags &= ~FF_TX_OUT_OF_PLACE; /* We want the subtransform to be */ flags |= AV_TX_INPLACE; /* in-place */ - flags |= FF_TX_PRESHUFFLE; /* This function handles the permute step */ + flags |= FF_TX_PRESHUFFLE; /* First try with an in-place transform */ if ((ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, &sub_opts, len >> 1, - inv, scale))) - return ret; + inv, scale))) { + flags &= ~FF_TX_PRESHUFFLE; /* Now try with a generic FFT */ + if ((ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, &sub_opts, len >> 1, + inv, scale))) + return ret; + } - if ((ret = TX_TAB(ff_tx_mdct_gen_exp)(s, inv ? s->sub->map : NULL))) + /* If we need to preshuffle just steal the map from the subcontext */ + if (s->sub[0].flags & FF_TX_PRESHUFFLE) { + s->map = s->sub[0].map; + s->sub[0].map = NULL; + } else { + s->map = av_malloc((len >> 1)*sizeof(*s->map)); + if (!s->map) + return AVERROR(ENOMEM); + + for (int i = 0; i < len >> 1; i++) + s->map[i] = i; + } + + if ((ret = TX_TAB(ff_tx_mdct_gen_exp)(s, inv ? s->map : NULL))) return ret; /* Saves a multiply in a hot path. */ if (inv) for (int i = 0; i < (s->len >> 1); i++) - s->sub->map[i] <<= 1; + s->map[i] <<= 1; return 0; } -static void TX_NAME(ff_tx_mdct_sr_fwd)(AVTXContext *s, void *_dst, void *_src, - ptrdiff_t stride) +static void TX_NAME(ff_tx_mdct_fwd)(AVTXContext *s, void *_dst, void *_src, + ptrdiff_t stride) { TXSample *src = _src, *dst = _dst; TXComplex *exp = s->exp, tmp, *z = _dst; const int len2 = s->len >> 1; const int len4 = s->len >> 2; const int len3 = len2 * 3; - const int *sub_map = s->sub->map; + const int *sub_map = s->map; stride /= sizeof(*dst); @@ -1011,14 +1028,14 @@ static void TX_NAME(ff_tx_mdct_sr_fwd)(AVTXContext *s, void *_dst, void *_src, } } -static void TX_NAME(ff_tx_mdct_sr_inv)(AVTXContext *s, void *_dst, void *_src, - ptrdiff_t stride) +static void TX_NAME(ff_tx_mdct_inv)(AVTXContext *s, void *_dst, void *_src, + ptrdiff_t stride) { TXComplex *z = _dst, *exp = s->exp; const TXSample *src = _src, *in1, *in2; const int len2 = s->len >> 1; const int len4 = s->len >> 2; - const int *sub_map = s->sub->map; + const int *sub_map = s->map; stride /= sizeof(*src); in1 = src; @@ -1043,28 +1060,28 @@ static void TX_NAME(ff_tx_mdct_sr_inv)(AVTXContext *s, void *_dst, void *_src, } } -static const FFTXCodelet TX_NAME(ff_tx_mdct_sr_fwd_def) = { - .name = TX_NAME_STR("mdct_sr_fwd"), - .function = TX_NAME(ff_tx_mdct_sr_fwd), +static const FFTXCodelet TX_NAME(ff_tx_mdct_fwd_def) = { + .name = TX_NAME_STR("mdct_fwd"), + .function = TX_NAME(ff_tx_mdct_fwd), .type = TX_TYPE(MDCT), .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY, - .factors[0] = 2, + .factors = { 2, TX_FACTOR_ANY }, .min_len = 2, .max_len = TX_LEN_UNLIMITED, - .init = TX_NAME(ff_tx_mdct_sr_init), + .init = TX_NAME(ff_tx_mdct_init), .cpu_flags = FF_TX_CPU_FLAGS_ALL, .prio = FF_TX_PRIO_BASE, }; -static const FFTXCodelet TX_NAME(ff_tx_mdct_sr_inv_def) = { - .name = TX_NAME_STR("mdct_sr_inv"), - .function = TX_NAME(ff_tx_mdct_sr_inv), +static const FFTXCodelet TX_NAME(ff_tx_mdct_inv_def) = { + .name = TX_NAME_STR("mdct_inv"), + .function = TX_NAME(ff_tx_mdct_inv), .type = TX_TYPE(MDCT), .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY, - .factors[0] = 2, + .factors = { 2, TX_FACTOR_ANY }, .min_len = 2, .max_len = TX_LEN_UNLIMITED, - .init = TX_NAME(ff_tx_mdct_sr_init), + .init = TX_NAME(ff_tx_mdct_init), .cpu_flags = FF_TX_CPU_FLAGS_ALL, .prio = FF_TX_PRIO_BASE, }; @@ -1477,8 +1494,8 @@ const FFTXCodelet * const TX_NAME(ff_tx_codelet_list)[] = { &TX_NAME(ff_tx_fft_pfa_9xM_def), &TX_NAME(ff_tx_fft_pfa_15xM_def), &TX_NAME(ff_tx_fft_naive_def), - &TX_NAME(ff_tx_mdct_sr_fwd_def), - &TX_NAME(ff_tx_mdct_sr_inv_def), + &TX_NAME(ff_tx_mdct_fwd_def), + &TX_NAME(ff_tx_mdct_inv_def), &TX_NAME(ff_tx_mdct_pfa_3xM_fwd_def), &TX_NAME(ff_tx_mdct_pfa_5xM_fwd_def), &TX_NAME(ff_tx_mdct_pfa_7xM_fwd_def),