From 0bc3de19ffe296254f214dc7615e624d8e401bcb Mon Sep 17 00:00:00 2001 From: James Almer Date: Sat, 22 Feb 2014 02:47:02 -0300 Subject: [PATCH] x86: add detection for Bit Manipulation Instruction sets Based on x264 code Signed-off-by: James Almer Signed-off-by: Michael Niedermayer --- libavutil/cpu.c | 8 ++++++++ libavutil/cpu.h | 2 ++ libavutil/x86/cpu.c | 22 ++++++++++++++-------- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/libavutil/cpu.c b/libavutil/cpu.c index 74de61e0b7..256bd237d5 100644 --- a/libavutil/cpu.c +++ b/libavutil/cpu.c @@ -94,6 +94,8 @@ int av_parse_cpu_flags(const char *s) #define CPUFLAG_FMA3 (AV_CPU_FLAG_FMA3 | CPUFLAG_AVX) #define CPUFLAG_FMA4 (AV_CPU_FLAG_FMA4 | CPUFLAG_AVX) #define CPUFLAG_AVX2 (AV_CPU_FLAG_AVX2 | CPUFLAG_AVX) +#define CPUFLAG_BMI1 (AV_CPU_FLAG_BMI1) +#define CPUFLAG_BMI2 (AV_CPU_FLAG_BMI2 | CPUFLAG_BMI1) static const AVOption cpuflags_opts[] = { { "flags" , NULL, 0, AV_OPT_TYPE_FLAGS, { .i64 = 0 }, INT64_MIN, INT64_MAX, .unit = "flags" }, #if ARCH_PPC @@ -115,6 +117,8 @@ int av_parse_cpu_flags(const char *s) { "fma3" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA3 }, .unit = "flags" }, { "fma4" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA4 }, .unit = "flags" }, { "avx2" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVX2 }, .unit = "flags" }, + { "bmi1" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_BMI1 }, .unit = "flags" }, + { "bmi2" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_BMI2 }, .unit = "flags" }, { "3dnow" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_3DNOW }, .unit = "flags" }, { "3dnowext", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_3DNOWEXT }, .unit = "flags" }, { "cmov", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_CMOV }, .unit = "flags" }, @@ -171,6 +175,8 @@ int av_parse_cpu_caps(unsigned *flags, const char *s) { "fma3" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_FMA3 }, .unit = "flags" }, { "fma4" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_FMA4 }, .unit = "flags" }, { "avx2" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AVX2 }, .unit = "flags" }, + { "bmi1" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_BMI1 }, .unit = "flags" }, + { "bmi2" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_BMI2 }, .unit = "flags" }, { "3dnow" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_3DNOW }, .unit = "flags" }, { "3dnowext", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_3DNOWEXT }, .unit = "flags" }, { "cmov", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_CMOV }, .unit = "flags" }, @@ -288,6 +294,8 @@ static const struct { { AV_CPU_FLAG_3DNOWEXT, "3dnowext" }, { AV_CPU_FLAG_CMOV, "cmov" }, { AV_CPU_FLAG_AVX2, "avx2" }, + { AV_CPU_FLAG_BMI1, "bmi1" }, + { AV_CPU_FLAG_BMI2, "bmi2" }, #endif { 0 } }; diff --git a/libavutil/cpu.h b/libavutil/cpu.h index 1d0293fed3..0ad400fefb 100644 --- a/libavutil/cpu.h +++ b/libavutil/cpu.h @@ -52,6 +52,8 @@ // #endif #define AV_CPU_FLAG_AVX2 0x8000 ///< AVX2 functions: requires OS support even if YMM registers aren't used #define AV_CPU_FLAG_FMA3 0x10000 ///< Haswell FMA3 functions +#define AV_CPU_FLAG_BMI1 0x20000 ///< Bit Manipulation Instruction Set 1 +#define AV_CPU_FLAG_BMI2 0x40000 ///< Bit Manipulation Instruction Set 2 #define AV_CPU_FLAG_ALTIVEC 0x0001 ///< standard diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c index 333b0f805f..e0b4c89062 100644 --- a/libavutil/x86/cpu.c +++ b/libavutil/x86/cpu.c @@ -137,16 +137,22 @@ int ff_get_cpu_flags_x86(void) rval |= AV_CPU_FLAG_FMA3; } } -#if HAVE_AVX2 - if (max_std_level >= 7) { - cpuid(7, eax, ebx, ecx, edx); - if (ebx&0x00000020) - rval |= AV_CPU_FLAG_AVX2; - /* TODO: BMI1/2 */ - } -#endif /* HAVE_AVX2 */ #endif /* HAVE_AVX */ #endif /* HAVE_SSE */ + if (max_std_level >= 7) { + cpuid(7, eax, ebx, ecx, edx); +#if HAVE_AVX2 + if (ebx&0x00000020) + rval |= AV_CPU_FLAG_AVX2; +#endif /* HAVE_AVX2 */ + /* BMI1/2 don't need OS support */ + if (ebx&0x00000008) + { + rval |= AV_CPU_FLAG_BMI1; + if (ebx&0x00000100) + rval |= AV_CPU_FLAG_BMI2; + } + } } cpuid(0x80000000, max_ext_level, ebx, ecx, edx);