diff --git a/configure b/configure index 650b52fcc5..7d22c2a345 100755 --- a/configure +++ b/configure @@ -443,6 +443,7 @@ Optimization options (experts only): --disable-fma4 disable FMA4 optimizations --disable-avx2 disable AVX2 optimizations --disable-avx512 disable AVX-512 optimizations + --disable-avx512icl disable AVX-512ICL optimizations --disable-aesni disable AESNI optimizations --disable-armv5te disable armv5te optimizations --disable-armv6 disable armv6 optimizations @@ -2096,6 +2097,7 @@ ARCH_EXT_LIST_X86_SIMD=" avx avx2 avx512 + avx512icl fma3 fma4 mmx @@ -2665,6 +2667,7 @@ fma3_deps="avx" fma4_deps="avx" avx2_deps="avx" avx512_deps="avx2" +avx512icl_deps="avx512" mmx_external_deps="x86asm" mmx_inline_deps="inline_asm x86" @@ -6126,10 +6129,11 @@ EOF elf*) enabled debug && append X86ASMFLAGS $x86asm_debug ;; esac - enabled avx512 && check_x86asm avx512_external "vmovdqa32 [eax]{k1}{z}, zmm0" - enabled avx2 && check_x86asm avx2_external "vextracti128 xmm0, ymm0, 0" - enabled xop && check_x86asm xop_external "vpmacsdd xmm0, xmm1, xmm2, xmm3" - enabled fma4 && check_x86asm fma4_external "vfmaddps ymm0, ymm1, ymm2, ymm3" + enabled avx512 && check_x86asm avx512_external "vmovdqa32 [eax]{k1}{z}, zmm0" + enabled avx512icl && check_x86asm avx512icl_external "vpdpwssds zmm31{k1}{z}, zmm29, zmm28" + enabled avx2 && check_x86asm avx2_external "vextracti128 xmm0, ymm0, 0" + enabled xop && check_x86asm xop_external "vpmacsdd xmm0, xmm1, xmm2, xmm3" + enabled fma4 && check_x86asm fma4_external "vfmaddps ymm0, ymm1, ymm2, ymm3" check_x86asm cpunop "CPU amdnop" fi @@ -7469,6 +7473,7 @@ if enabled x86; then echo "AVX enabled ${avx-no}" echo "AVX2 enabled ${avx2-no}" echo "AVX-512 enabled ${avx512-no}" + echo "AVX-512ICL enabled ${avx512icl-no}" echo "XOP enabled ${xop-no}" echo "FMA3 enabled ${fma3-no}" echo "FMA4 enabled ${fma4-no}" diff --git a/doc/APIchanges b/doc/APIchanges index ea402f6118..ccc4f24b28 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -14,6 +14,9 @@ libavutil: 2021-04-27 API changes, most recent first: +2022-03-10 - xxxxxxxxxx - lavu 57.23.100 - cpu.h + Add AV_CPU_FLAG_AVX512ICL. + 2022-02-07 - xxxxxxxxxx - lavu 57.21.100 - fifo.h Deprecate AVFifoBuffer and the API around it, namely av_fifo_alloc(), av_fifo_alloc_array(), av_fifo_free(), av_fifo_freep(), av_fifo_reset(), diff --git a/libavutil/cpu.c b/libavutil/cpu.c index 1368502245..833c220192 100644 --- a/libavutil/cpu.c +++ b/libavutil/cpu.c @@ -137,6 +137,7 @@ int av_parse_cpu_caps(unsigned *flags, const char *s) { "cmov", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_CMOV }, .unit = "flags" }, { "aesni", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AESNI }, .unit = "flags" }, { "avx512" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AVX512 }, .unit = "flags" }, + { "avx512icl", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AVX512ICL }, .unit = "flags" }, { "slowgather", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SLOW_GATHER }, .unit = "flags" }, #define CPU_FLAG_P2 AV_CPU_FLAG_CMOV | AV_CPU_FLAG_MMX diff --git a/libavutil/cpu.h b/libavutil/cpu.h index ce9bf14bf7..9711e574c5 100644 --- a/libavutil/cpu.h +++ b/libavutil/cpu.h @@ -54,6 +54,7 @@ #define AV_CPU_FLAG_BMI1 0x20000 ///< Bit Manipulation Instruction Set 1 #define AV_CPU_FLAG_BMI2 0x40000 ///< Bit Manipulation Instruction Set 2 #define AV_CPU_FLAG_AVX512 0x100000 ///< AVX-512 functions: requires OS support even if YMM/ZMM registers aren't used +#define AV_CPU_FLAG_AVX512ICL 0x200000 ///< F/CD/BW/DQ/VL/VNNI/IFMA/VBMI/VBMI2/VPOPCNTDQ/BITALG/GFNI/VAES/VPCLMULQDQ #define AV_CPU_FLAG_SLOW_GATHER 0x2000000 ///< CPU has slow gathers. #define AV_CPU_FLAG_ALTIVEC 0x0001 ///< standard diff --git a/libavutil/version.h b/libavutil/version.h index c7004601c8..6c5d90507c 100644 --- a/libavutil/version.h +++ b/libavutil/version.h @@ -79,7 +79,7 @@ */ #define LIBAVUTIL_VERSION_MAJOR 57 -#define LIBAVUTIL_VERSION_MINOR 22 +#define LIBAVUTIL_VERSION_MINOR 23 #define LIBAVUTIL_VERSION_MICRO 100 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c index 7b13fcae91..d6cd4fab9c 100644 --- a/libavutil/x86/cpu.c +++ b/libavutil/x86/cpu.c @@ -150,9 +150,13 @@ int ff_get_cpu_flags_x86(void) rval |= AV_CPU_FLAG_AVX2; #if HAVE_AVX512 /* F, CD, BW, DQ, VL */ if ((xcr0_lo & 0xe0) == 0xe0) { /* OPMASK/ZMM state */ - if ((rval & AV_CPU_FLAG_AVX2) && (ebx & 0xd0030000) == 0xd0030000) + if ((rval & AV_CPU_FLAG_AVX2) && (ebx & 0xd0030000) == 0xd0030000) { rval |= AV_CPU_FLAG_AVX512; - +#if HAVE_AVX512ICL + if ((ebx & 0xd0200000) == 0xd0200000 && (ecx & 0x5f42) == 0x5f42) + rval |= AV_CPU_FLAG_AVX512ICL; +#endif /* HAVE_AVX512ICL */ + } } #endif /* HAVE_AVX512 */ #endif /* HAVE_AVX2 */ diff --git a/libavutil/x86/cpu.h b/libavutil/x86/cpu.h index 937c697fa0..40a1eef0ab 100644 --- a/libavutil/x86/cpu.h +++ b/libavutil/x86/cpu.h @@ -80,6 +80,7 @@ #define EXTERNAL_AVX2_SLOW(flags) CPUEXT_SUFFIX_SLOW2(flags, _EXTERNAL, AVX2, AVX) #define EXTERNAL_AESNI(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AESNI) #define EXTERNAL_AVX512(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AVX512) +#define EXTERNAL_AVX512ICL(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AVX512ICL) #define INLINE_AMD3DNOW(flags) CPUEXT_SUFFIX(flags, _INLINE, AMD3DNOW) #define INLINE_AMD3DNOWEXT(flags) CPUEXT_SUFFIX(flags, _INLINE, AMD3DNOWEXT) diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm index 01c35e3a4b..251ee797de 100644 --- a/libavutil/x86/x86inc.asm +++ b/libavutil/x86/x86inc.asm @@ -817,32 +817,33 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, ; cpuflags -%assign cpuflags_mmx (1<<0) -%assign cpuflags_mmx2 (1<<1) | cpuflags_mmx -%assign cpuflags_3dnow (1<<2) | cpuflags_mmx -%assign cpuflags_3dnowext (1<<3) | cpuflags_3dnow -%assign cpuflags_sse (1<<4) | cpuflags_mmx2 -%assign cpuflags_sse2 (1<<5) | cpuflags_sse -%assign cpuflags_sse2slow (1<<6) | cpuflags_sse2 -%assign cpuflags_lzcnt (1<<7) | cpuflags_sse2 -%assign cpuflags_sse3 (1<<8) | cpuflags_sse2 -%assign cpuflags_ssse3 (1<<9) | cpuflags_sse3 -%assign cpuflags_sse4 (1<<10)| cpuflags_ssse3 -%assign cpuflags_sse42 (1<<11)| cpuflags_sse4 -%assign cpuflags_aesni (1<<12)| cpuflags_sse42 -%assign cpuflags_avx (1<<13)| cpuflags_sse42 -%assign cpuflags_xop (1<<14)| cpuflags_avx -%assign cpuflags_fma4 (1<<15)| cpuflags_avx -%assign cpuflags_fma3 (1<<16)| cpuflags_avx -%assign cpuflags_bmi1 (1<<17)| cpuflags_avx|cpuflags_lzcnt -%assign cpuflags_bmi2 (1<<18)| cpuflags_bmi1 -%assign cpuflags_avx2 (1<<19)| cpuflags_fma3|cpuflags_bmi2 -%assign cpuflags_avx512 (1<<20)| cpuflags_avx2 ; F, CD, BW, DQ, VL +%assign cpuflags_mmx (1<<0) +%assign cpuflags_mmx2 (1<<1) | cpuflags_mmx +%assign cpuflags_3dnow (1<<2) | cpuflags_mmx +%assign cpuflags_3dnowext (1<<3) | cpuflags_3dnow +%assign cpuflags_sse (1<<4) | cpuflags_mmx2 +%assign cpuflags_sse2 (1<<5) | cpuflags_sse +%assign cpuflags_sse2slow (1<<6) | cpuflags_sse2 +%assign cpuflags_lzcnt (1<<7) | cpuflags_sse2 +%assign cpuflags_sse3 (1<<8) | cpuflags_sse2 +%assign cpuflags_ssse3 (1<<9) | cpuflags_sse3 +%assign cpuflags_sse4 (1<<10)| cpuflags_ssse3 +%assign cpuflags_sse42 (1<<11)| cpuflags_sse4 +%assign cpuflags_aesni (1<<12)| cpuflags_sse42 +%assign cpuflags_avx (1<<13)| cpuflags_sse42 +%assign cpuflags_xop (1<<14)| cpuflags_avx +%assign cpuflags_fma4 (1<<15)| cpuflags_avx +%assign cpuflags_fma3 (1<<16)| cpuflags_avx +%assign cpuflags_bmi1 (1<<17)| cpuflags_avx|cpuflags_lzcnt +%assign cpuflags_bmi2 (1<<18)| cpuflags_bmi1 +%assign cpuflags_avx2 (1<<19)| cpuflags_fma3|cpuflags_bmi2 +%assign cpuflags_avx512 (1<<20)| cpuflags_avx2 ; F, CD, BW, DQ, VL +%assign cpuflags_avx512icl (1<<25)| cpuflags_avx512 -%assign cpuflags_cache32 (1<<21) -%assign cpuflags_cache64 (1<<22) -%assign cpuflags_aligned (1<<23) ; not a cpu feature, but a function variant -%assign cpuflags_atom (1<<24) +%assign cpuflags_cache32 (1<<21) +%assign cpuflags_cache64 (1<<22) +%assign cpuflags_aligned (1<<23) ; not a cpu feature, but a function variant +%assign cpuflags_atom (1<<24) ; Returns a boolean value expressing whether or not the specified cpuflag is enabled. %define cpuflag(x) (((((cpuflags & (cpuflags_ %+ x)) ^ (cpuflags_ %+ x)) - 1) >> 31) & 1) diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index f74125e810..e77b4ec20f 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -220,23 +220,24 @@ static const struct { { "MMI", "mmi", AV_CPU_FLAG_MMI }, { "MSA", "msa", AV_CPU_FLAG_MSA }, #elif ARCH_X86 - { "MMX", "mmx", AV_CPU_FLAG_MMX|AV_CPU_FLAG_CMOV }, - { "MMXEXT", "mmxext", AV_CPU_FLAG_MMXEXT }, - { "3DNOW", "3dnow", AV_CPU_FLAG_3DNOW }, - { "3DNOWEXT", "3dnowext", AV_CPU_FLAG_3DNOWEXT }, - { "SSE", "sse", AV_CPU_FLAG_SSE }, - { "SSE2", "sse2", AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW }, - { "SSE3", "sse3", AV_CPU_FLAG_SSE3|AV_CPU_FLAG_SSE3SLOW }, - { "SSSE3", "ssse3", AV_CPU_FLAG_SSSE3|AV_CPU_FLAG_ATOM }, - { "SSE4.1", "sse4", AV_CPU_FLAG_SSE4 }, - { "SSE4.2", "sse42", AV_CPU_FLAG_SSE42 }, - { "AES-NI", "aesni", AV_CPU_FLAG_AESNI }, - { "AVX", "avx", AV_CPU_FLAG_AVX }, - { "XOP", "xop", AV_CPU_FLAG_XOP }, - { "FMA3", "fma3", AV_CPU_FLAG_FMA3 }, - { "FMA4", "fma4", AV_CPU_FLAG_FMA4 }, - { "AVX2", "avx2", AV_CPU_FLAG_AVX2 }, - { "AVX-512", "avx512", AV_CPU_FLAG_AVX512 }, + { "MMX", "mmx", AV_CPU_FLAG_MMX|AV_CPU_FLAG_CMOV }, + { "MMXEXT", "mmxext", AV_CPU_FLAG_MMXEXT }, + { "3DNOW", "3dnow", AV_CPU_FLAG_3DNOW }, + { "3DNOWEXT", "3dnowext", AV_CPU_FLAG_3DNOWEXT }, + { "SSE", "sse", AV_CPU_FLAG_SSE }, + { "SSE2", "sse2", AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW }, + { "SSE3", "sse3", AV_CPU_FLAG_SSE3|AV_CPU_FLAG_SSE3SLOW }, + { "SSSE3", "ssse3", AV_CPU_FLAG_SSSE3|AV_CPU_FLAG_ATOM }, + { "SSE4.1", "sse4", AV_CPU_FLAG_SSE4 }, + { "SSE4.2", "sse42", AV_CPU_FLAG_SSE42 }, + { "AES-NI", "aesni", AV_CPU_FLAG_AESNI }, + { "AVX", "avx", AV_CPU_FLAG_AVX }, + { "XOP", "xop", AV_CPU_FLAG_XOP }, + { "FMA3", "fma3", AV_CPU_FLAG_FMA3 }, + { "FMA4", "fma4", AV_CPU_FLAG_FMA4 }, + { "AVX2", "avx2", AV_CPU_FLAG_AVX2 }, + { "AVX-512", "avx512", AV_CPU_FLAG_AVX512 }, + { "AVX-512ICL", "avx512icl", AV_CPU_FLAG_AVX512ICL }, #elif ARCH_LOONGARCH { "LSX", "lsx", AV_CPU_FLAG_LSX }, { "LASX", "lasx", AV_CPU_FLAG_LASX },