mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2025-01-29 10:43:15 +00:00
avutil/cpu: add AVX512 Icelake flag
Signed-off-by: Wu Jianhua <jianhua.wu@intel.com> Reviewed-by: Henrik Gramner <henrik@gramner.com> Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
parent
e6e3aae294
commit
f629ea2e18
13
configure
vendored
13
configure
vendored
@ -443,6 +443,7 @@ Optimization options (experts only):
|
||||
--disable-fma4 disable FMA4 optimizations
|
||||
--disable-avx2 disable AVX2 optimizations
|
||||
--disable-avx512 disable AVX-512 optimizations
|
||||
--disable-avx512icl disable AVX-512ICL optimizations
|
||||
--disable-aesni disable AESNI optimizations
|
||||
--disable-armv5te disable armv5te optimizations
|
||||
--disable-armv6 disable armv6 optimizations
|
||||
@ -2096,6 +2097,7 @@ ARCH_EXT_LIST_X86_SIMD="
|
||||
avx
|
||||
avx2
|
||||
avx512
|
||||
avx512icl
|
||||
fma3
|
||||
fma4
|
||||
mmx
|
||||
@ -2665,6 +2667,7 @@ fma3_deps="avx"
|
||||
fma4_deps="avx"
|
||||
avx2_deps="avx"
|
||||
avx512_deps="avx2"
|
||||
avx512icl_deps="avx512"
|
||||
|
||||
mmx_external_deps="x86asm"
|
||||
mmx_inline_deps="inline_asm x86"
|
||||
@ -6126,10 +6129,11 @@ EOF
|
||||
elf*) enabled debug && append X86ASMFLAGS $x86asm_debug ;;
|
||||
esac
|
||||
|
||||
enabled avx512 && check_x86asm avx512_external "vmovdqa32 [eax]{k1}{z}, zmm0"
|
||||
enabled avx2 && check_x86asm avx2_external "vextracti128 xmm0, ymm0, 0"
|
||||
enabled xop && check_x86asm xop_external "vpmacsdd xmm0, xmm1, xmm2, xmm3"
|
||||
enabled fma4 && check_x86asm fma4_external "vfmaddps ymm0, ymm1, ymm2, ymm3"
|
||||
enabled avx512 && check_x86asm avx512_external "vmovdqa32 [eax]{k1}{z}, zmm0"
|
||||
enabled avx512icl && check_x86asm avx512icl_external "vpdpwssds zmm31{k1}{z}, zmm29, zmm28"
|
||||
enabled avx2 && check_x86asm avx2_external "vextracti128 xmm0, ymm0, 0"
|
||||
enabled xop && check_x86asm xop_external "vpmacsdd xmm0, xmm1, xmm2, xmm3"
|
||||
enabled fma4 && check_x86asm fma4_external "vfmaddps ymm0, ymm1, ymm2, ymm3"
|
||||
check_x86asm cpunop "CPU amdnop"
|
||||
fi
|
||||
|
||||
@ -7469,6 +7473,7 @@ if enabled x86; then
|
||||
echo "AVX enabled ${avx-no}"
|
||||
echo "AVX2 enabled ${avx2-no}"
|
||||
echo "AVX-512 enabled ${avx512-no}"
|
||||
echo "AVX-512ICL enabled ${avx512icl-no}"
|
||||
echo "XOP enabled ${xop-no}"
|
||||
echo "FMA3 enabled ${fma3-no}"
|
||||
echo "FMA4 enabled ${fma4-no}"
|
||||
|
@ -14,6 +14,9 @@ libavutil: 2021-04-27
|
||||
|
||||
API changes, most recent first:
|
||||
|
||||
2022-03-10 - xxxxxxxxxx - lavu 57.23.100 - cpu.h
|
||||
Add AV_CPU_FLAG_AVX512ICL.
|
||||
|
||||
2022-02-07 - xxxxxxxxxx - lavu 57.21.100 - fifo.h
|
||||
Deprecate AVFifoBuffer and the API around it, namely av_fifo_alloc(),
|
||||
av_fifo_alloc_array(), av_fifo_free(), av_fifo_freep(), av_fifo_reset(),
|
||||
|
@ -137,6 +137,7 @@ int av_parse_cpu_caps(unsigned *flags, const char *s)
|
||||
{ "cmov", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_CMOV }, .unit = "flags" },
|
||||
{ "aesni", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AESNI }, .unit = "flags" },
|
||||
{ "avx512" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AVX512 }, .unit = "flags" },
|
||||
{ "avx512icl", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AVX512ICL }, .unit = "flags" },
|
||||
{ "slowgather", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SLOW_GATHER }, .unit = "flags" },
|
||||
|
||||
#define CPU_FLAG_P2 AV_CPU_FLAG_CMOV | AV_CPU_FLAG_MMX
|
||||
|
@ -54,6 +54,7 @@
|
||||
#define AV_CPU_FLAG_BMI1 0x20000 ///< Bit Manipulation Instruction Set 1
|
||||
#define AV_CPU_FLAG_BMI2 0x40000 ///< Bit Manipulation Instruction Set 2
|
||||
#define AV_CPU_FLAG_AVX512 0x100000 ///< AVX-512 functions: requires OS support even if YMM/ZMM registers aren't used
|
||||
#define AV_CPU_FLAG_AVX512ICL 0x200000 ///< F/CD/BW/DQ/VL/VNNI/IFMA/VBMI/VBMI2/VPOPCNTDQ/BITALG/GFNI/VAES/VPCLMULQDQ
|
||||
#define AV_CPU_FLAG_SLOW_GATHER 0x2000000 ///< CPU has slow gathers.
|
||||
|
||||
#define AV_CPU_FLAG_ALTIVEC 0x0001 ///< standard
|
||||
|
@ -79,7 +79,7 @@
|
||||
*/
|
||||
|
||||
#define LIBAVUTIL_VERSION_MAJOR 57
|
||||
#define LIBAVUTIL_VERSION_MINOR 22
|
||||
#define LIBAVUTIL_VERSION_MINOR 23
|
||||
#define LIBAVUTIL_VERSION_MICRO 100
|
||||
|
||||
#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
|
||||
|
@ -150,9 +150,13 @@ int ff_get_cpu_flags_x86(void)
|
||||
rval |= AV_CPU_FLAG_AVX2;
|
||||
#if HAVE_AVX512 /* F, CD, BW, DQ, VL */
|
||||
if ((xcr0_lo & 0xe0) == 0xe0) { /* OPMASK/ZMM state */
|
||||
if ((rval & AV_CPU_FLAG_AVX2) && (ebx & 0xd0030000) == 0xd0030000)
|
||||
if ((rval & AV_CPU_FLAG_AVX2) && (ebx & 0xd0030000) == 0xd0030000) {
|
||||
rval |= AV_CPU_FLAG_AVX512;
|
||||
|
||||
#if HAVE_AVX512ICL
|
||||
if ((ebx & 0xd0200000) == 0xd0200000 && (ecx & 0x5f42) == 0x5f42)
|
||||
rval |= AV_CPU_FLAG_AVX512ICL;
|
||||
#endif /* HAVE_AVX512ICL */
|
||||
}
|
||||
}
|
||||
#endif /* HAVE_AVX512 */
|
||||
#endif /* HAVE_AVX2 */
|
||||
|
@ -80,6 +80,7 @@
|
||||
#define EXTERNAL_AVX2_SLOW(flags) CPUEXT_SUFFIX_SLOW2(flags, _EXTERNAL, AVX2, AVX)
|
||||
#define EXTERNAL_AESNI(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AESNI)
|
||||
#define EXTERNAL_AVX512(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AVX512)
|
||||
#define EXTERNAL_AVX512ICL(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AVX512ICL)
|
||||
|
||||
#define INLINE_AMD3DNOW(flags) CPUEXT_SUFFIX(flags, _INLINE, AMD3DNOW)
|
||||
#define INLINE_AMD3DNOWEXT(flags) CPUEXT_SUFFIX(flags, _INLINE, AMD3DNOWEXT)
|
||||
|
@ -817,32 +817,33 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
|
||||
|
||||
; cpuflags
|
||||
|
||||
%assign cpuflags_mmx (1<<0)
|
||||
%assign cpuflags_mmx2 (1<<1) | cpuflags_mmx
|
||||
%assign cpuflags_3dnow (1<<2) | cpuflags_mmx
|
||||
%assign cpuflags_3dnowext (1<<3) | cpuflags_3dnow
|
||||
%assign cpuflags_sse (1<<4) | cpuflags_mmx2
|
||||
%assign cpuflags_sse2 (1<<5) | cpuflags_sse
|
||||
%assign cpuflags_sse2slow (1<<6) | cpuflags_sse2
|
||||
%assign cpuflags_lzcnt (1<<7) | cpuflags_sse2
|
||||
%assign cpuflags_sse3 (1<<8) | cpuflags_sse2
|
||||
%assign cpuflags_ssse3 (1<<9) | cpuflags_sse3
|
||||
%assign cpuflags_sse4 (1<<10)| cpuflags_ssse3
|
||||
%assign cpuflags_sse42 (1<<11)| cpuflags_sse4
|
||||
%assign cpuflags_aesni (1<<12)| cpuflags_sse42
|
||||
%assign cpuflags_avx (1<<13)| cpuflags_sse42
|
||||
%assign cpuflags_xop (1<<14)| cpuflags_avx
|
||||
%assign cpuflags_fma4 (1<<15)| cpuflags_avx
|
||||
%assign cpuflags_fma3 (1<<16)| cpuflags_avx
|
||||
%assign cpuflags_bmi1 (1<<17)| cpuflags_avx|cpuflags_lzcnt
|
||||
%assign cpuflags_bmi2 (1<<18)| cpuflags_bmi1
|
||||
%assign cpuflags_avx2 (1<<19)| cpuflags_fma3|cpuflags_bmi2
|
||||
%assign cpuflags_avx512 (1<<20)| cpuflags_avx2 ; F, CD, BW, DQ, VL
|
||||
%assign cpuflags_mmx (1<<0)
|
||||
%assign cpuflags_mmx2 (1<<1) | cpuflags_mmx
|
||||
%assign cpuflags_3dnow (1<<2) | cpuflags_mmx
|
||||
%assign cpuflags_3dnowext (1<<3) | cpuflags_3dnow
|
||||
%assign cpuflags_sse (1<<4) | cpuflags_mmx2
|
||||
%assign cpuflags_sse2 (1<<5) | cpuflags_sse
|
||||
%assign cpuflags_sse2slow (1<<6) | cpuflags_sse2
|
||||
%assign cpuflags_lzcnt (1<<7) | cpuflags_sse2
|
||||
%assign cpuflags_sse3 (1<<8) | cpuflags_sse2
|
||||
%assign cpuflags_ssse3 (1<<9) | cpuflags_sse3
|
||||
%assign cpuflags_sse4 (1<<10)| cpuflags_ssse3
|
||||
%assign cpuflags_sse42 (1<<11)| cpuflags_sse4
|
||||
%assign cpuflags_aesni (1<<12)| cpuflags_sse42
|
||||
%assign cpuflags_avx (1<<13)| cpuflags_sse42
|
||||
%assign cpuflags_xop (1<<14)| cpuflags_avx
|
||||
%assign cpuflags_fma4 (1<<15)| cpuflags_avx
|
||||
%assign cpuflags_fma3 (1<<16)| cpuflags_avx
|
||||
%assign cpuflags_bmi1 (1<<17)| cpuflags_avx|cpuflags_lzcnt
|
||||
%assign cpuflags_bmi2 (1<<18)| cpuflags_bmi1
|
||||
%assign cpuflags_avx2 (1<<19)| cpuflags_fma3|cpuflags_bmi2
|
||||
%assign cpuflags_avx512 (1<<20)| cpuflags_avx2 ; F, CD, BW, DQ, VL
|
||||
%assign cpuflags_avx512icl (1<<25)| cpuflags_avx512
|
||||
|
||||
%assign cpuflags_cache32 (1<<21)
|
||||
%assign cpuflags_cache64 (1<<22)
|
||||
%assign cpuflags_aligned (1<<23) ; not a cpu feature, but a function variant
|
||||
%assign cpuflags_atom (1<<24)
|
||||
%assign cpuflags_cache32 (1<<21)
|
||||
%assign cpuflags_cache64 (1<<22)
|
||||
%assign cpuflags_aligned (1<<23) ; not a cpu feature, but a function variant
|
||||
%assign cpuflags_atom (1<<24)
|
||||
|
||||
; Returns a boolean value expressing whether or not the specified cpuflag is enabled.
|
||||
%define cpuflag(x) (((((cpuflags & (cpuflags_ %+ x)) ^ (cpuflags_ %+ x)) - 1) >> 31) & 1)
|
||||
|
@ -220,23 +220,24 @@ static const struct {
|
||||
{ "MMI", "mmi", AV_CPU_FLAG_MMI },
|
||||
{ "MSA", "msa", AV_CPU_FLAG_MSA },
|
||||
#elif ARCH_X86
|
||||
{ "MMX", "mmx", AV_CPU_FLAG_MMX|AV_CPU_FLAG_CMOV },
|
||||
{ "MMXEXT", "mmxext", AV_CPU_FLAG_MMXEXT },
|
||||
{ "3DNOW", "3dnow", AV_CPU_FLAG_3DNOW },
|
||||
{ "3DNOWEXT", "3dnowext", AV_CPU_FLAG_3DNOWEXT },
|
||||
{ "SSE", "sse", AV_CPU_FLAG_SSE },
|
||||
{ "SSE2", "sse2", AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW },
|
||||
{ "SSE3", "sse3", AV_CPU_FLAG_SSE3|AV_CPU_FLAG_SSE3SLOW },
|
||||
{ "SSSE3", "ssse3", AV_CPU_FLAG_SSSE3|AV_CPU_FLAG_ATOM },
|
||||
{ "SSE4.1", "sse4", AV_CPU_FLAG_SSE4 },
|
||||
{ "SSE4.2", "sse42", AV_CPU_FLAG_SSE42 },
|
||||
{ "AES-NI", "aesni", AV_CPU_FLAG_AESNI },
|
||||
{ "AVX", "avx", AV_CPU_FLAG_AVX },
|
||||
{ "XOP", "xop", AV_CPU_FLAG_XOP },
|
||||
{ "FMA3", "fma3", AV_CPU_FLAG_FMA3 },
|
||||
{ "FMA4", "fma4", AV_CPU_FLAG_FMA4 },
|
||||
{ "AVX2", "avx2", AV_CPU_FLAG_AVX2 },
|
||||
{ "AVX-512", "avx512", AV_CPU_FLAG_AVX512 },
|
||||
{ "MMX", "mmx", AV_CPU_FLAG_MMX|AV_CPU_FLAG_CMOV },
|
||||
{ "MMXEXT", "mmxext", AV_CPU_FLAG_MMXEXT },
|
||||
{ "3DNOW", "3dnow", AV_CPU_FLAG_3DNOW },
|
||||
{ "3DNOWEXT", "3dnowext", AV_CPU_FLAG_3DNOWEXT },
|
||||
{ "SSE", "sse", AV_CPU_FLAG_SSE },
|
||||
{ "SSE2", "sse2", AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW },
|
||||
{ "SSE3", "sse3", AV_CPU_FLAG_SSE3|AV_CPU_FLAG_SSE3SLOW },
|
||||
{ "SSSE3", "ssse3", AV_CPU_FLAG_SSSE3|AV_CPU_FLAG_ATOM },
|
||||
{ "SSE4.1", "sse4", AV_CPU_FLAG_SSE4 },
|
||||
{ "SSE4.2", "sse42", AV_CPU_FLAG_SSE42 },
|
||||
{ "AES-NI", "aesni", AV_CPU_FLAG_AESNI },
|
||||
{ "AVX", "avx", AV_CPU_FLAG_AVX },
|
||||
{ "XOP", "xop", AV_CPU_FLAG_XOP },
|
||||
{ "FMA3", "fma3", AV_CPU_FLAG_FMA3 },
|
||||
{ "FMA4", "fma4", AV_CPU_FLAG_FMA4 },
|
||||
{ "AVX2", "avx2", AV_CPU_FLAG_AVX2 },
|
||||
{ "AVX-512", "avx512", AV_CPU_FLAG_AVX512 },
|
||||
{ "AVX-512ICL", "avx512icl", AV_CPU_FLAG_AVX512ICL },
|
||||
#elif ARCH_LOONGARCH
|
||||
{ "LSX", "lsx", AV_CPU_FLAG_LSX },
|
||||
{ "LASX", "lasx", AV_CPU_FLAG_LASX },
|
||||
|
Loading…
Reference in New Issue
Block a user