diff --git a/libavutil/x86/tx_float.asm b/libavutil/x86/tx_float.asm index 511d8d6fa3..21f99d3945 100644 --- a/libavutil/x86/tx_float.asm +++ b/libavutil/x86/tx_float.asm @@ -1285,8 +1285,8 @@ FFT_SPLIT_RADIX_DEF 131072 %endmacro %if ARCH_X86_64 -FFT_SPLIT_RADIX_FN avx, float, 0 -FFT_SPLIT_RADIX_FN avx, ns_float, 1 +FFT_SPLIT_RADIX_FN fma3, float, 0 +FFT_SPLIT_RADIX_FN fma3, ns_float, 1 %if HAVE_AVX2_EXTERNAL FFT_SPLIT_RADIX_FN avx2, float, 0 FFT_SPLIT_RADIX_FN avx2, ns_float, 1 diff --git a/libavutil/x86/tx_float_init.c b/libavutil/x86/tx_float_init.c index 108f9b4b04..5db0b57d13 100644 --- a/libavutil/x86/tx_float_init.c +++ b/libavutil/x86/tx_float_init.c @@ -38,8 +38,8 @@ TX_DECL_FN(fft32, avx) TX_DECL_FN(fft32_ns, avx) TX_DECL_FN(fft32, fma3) TX_DECL_FN(fft32_ns, fma3) -TX_DECL_FN(fft_sr, avx) -TX_DECL_FN(fft_sr_ns, avx) +TX_DECL_FN(fft_sr, fma3) +TX_DECL_FN(fft_sr_ns, fma3) TX_DECL_FN(fft_sr, avx2) TX_DECL_FN(fft_sr_ns, avx2) @@ -88,13 +88,13 @@ const FFTXCodelet * const ff_tx_codelet_list_float_x86[] = { TX_DEF(fft32, FFT, 32, 32, 2, 0, 288, b8_i2, fma3, FMA3, AV_TX_INPLACE, AV_CPU_FLAG_AVXSLOW), TX_DEF(fft32_ns, FFT, 32, 32, 2, 0, 352, b8_i2, fma3, FMA3, AV_TX_INPLACE | FF_TX_PRESHUFFLE, AV_CPU_FLAG_AVXSLOW), -#if HAVE_AVX2_EXTERNAL - TX_DEF(fft_sr, FFT, 64, 131072, 2, 0, 256, b8_i2, avx, AVX, 0, AV_CPU_FLAG_AVXSLOW), - TX_DEF(fft_sr_ns, FFT, 64, 131072, 2, 0, 320, b8_i2, avx, AVX, AV_TX_INPLACE | FF_TX_PRESHUFFLE, + TX_DEF(fft_sr, FFT, 64, 131072, 2, 0, 288, b8_i2, fma3, FMA3, 0, AV_CPU_FLAG_AVXSLOW), + TX_DEF(fft_sr_ns, FFT, 64, 131072, 2, 0, 352, b8_i2, fma3, FMA3, AV_TX_INPLACE | FF_TX_PRESHUFFLE, AV_CPU_FLAG_AVXSLOW), - TX_DEF(fft_sr, FFT, 64, 131072, 2, 0, 288, b8_i2, avx2, AVX2, 0, +#if HAVE_AVX2_EXTERNAL + TX_DEF(fft_sr, FFT, 64, 131072, 2, 0, 320, b8_i2, avx2, AVX2, 0, AV_CPU_FLAG_AVXSLOW | AV_CPU_FLAG_SLOW_GATHER), - TX_DEF(fft_sr_ns, FFT, 64, 131072, 2, 0, 352, b8_i2, avx2, AVX2, AV_TX_INPLACE | FF_TX_PRESHUFFLE, + TX_DEF(fft_sr_ns, FFT, 64, 131072, 2, 0, 384, b8_i2, avx2, AVX2, AV_TX_INPLACE | FF_TX_PRESHUFFLE, AV_CPU_FLAG_AVXSLOW | AV_CPU_FLAG_SLOW_GATHER), #endif #endif