x86: add AV_CPU_FLAG_AVXSLOW flag

Signed-off-by: James Almer <jamrial@gmail.com>
Signed-off-by: Luca Barbato <lu_zero@gentoo.org>
This commit is contained in:
James Almer 2015-05-26 14:29:06 -03:00 committed by Luca Barbato
parent d0bf20a4f2
commit f7cafb5d02
5 changed files with 23 additions and 5 deletions

View File

@ -13,6 +13,9 @@ libavutil: 2014-08-09
API changes, most recent first: API changes, most recent first:
2015-xx-xx - xxxxxxx - lavu 54.14.0 - cpu.h
Add AV_CPU_FLAG_AVXSLOW.
2015-xx-xx - xxxxxxx - lavc 56.23.0 2015-xx-xx - xxxxxxx - lavc 56.23.0
Add av_vda_default_init2. Add av_vda_default_init2.

View File

@ -86,6 +86,7 @@ int av_parse_cpu_flags(const char *s)
#define CPUFLAG_SSE4 (AV_CPU_FLAG_SSE4 | CPUFLAG_SSSE3) #define CPUFLAG_SSE4 (AV_CPU_FLAG_SSE4 | CPUFLAG_SSSE3)
#define CPUFLAG_SSE42 (AV_CPU_FLAG_SSE42 | CPUFLAG_SSE4) #define CPUFLAG_SSE42 (AV_CPU_FLAG_SSE42 | CPUFLAG_SSE4)
#define CPUFLAG_AVX (AV_CPU_FLAG_AVX | CPUFLAG_SSE42) #define CPUFLAG_AVX (AV_CPU_FLAG_AVX | CPUFLAG_SSE42)
#define CPUFLAG_AVXSLOW (AV_CPU_FLAG_AVXSLOW | CPUFLAG_AVX)
#define CPUFLAG_XOP (AV_CPU_FLAG_XOP | CPUFLAG_AVX) #define CPUFLAG_XOP (AV_CPU_FLAG_XOP | CPUFLAG_AVX)
#define CPUFLAG_FMA3 (AV_CPU_FLAG_FMA3 | CPUFLAG_AVX) #define CPUFLAG_FMA3 (AV_CPU_FLAG_FMA3 | CPUFLAG_AVX)
#define CPUFLAG_FMA4 (AV_CPU_FLAG_FMA4 | CPUFLAG_AVX) #define CPUFLAG_FMA4 (AV_CPU_FLAG_FMA4 | CPUFLAG_AVX)
@ -108,6 +109,7 @@ int av_parse_cpu_flags(const char *s)
{ "sse4.1" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_SSE4 }, .unit = "flags" }, { "sse4.1" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_SSE4 }, .unit = "flags" },
{ "sse4.2" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_SSE42 }, .unit = "flags" }, { "sse4.2" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_SSE42 }, .unit = "flags" },
{ "avx" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVX }, .unit = "flags" }, { "avx" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVX }, .unit = "flags" },
{ "avxslow" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVXSLOW }, .unit = "flags" },
{ "xop" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_XOP }, .unit = "flags" }, { "xop" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_XOP }, .unit = "flags" },
{ "fma3" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA3 }, .unit = "flags" }, { "fma3" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA3 }, .unit = "flags" },
{ "fma4" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA4 }, .unit = "flags" }, { "fma4" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA4 }, .unit = "flags" },
@ -219,6 +221,7 @@ static const struct {
{ AV_CPU_FLAG_SSE4, "sse4.1" }, { AV_CPU_FLAG_SSE4, "sse4.1" },
{ AV_CPU_FLAG_SSE42, "sse4.2" }, { AV_CPU_FLAG_SSE42, "sse4.2" },
{ AV_CPU_FLAG_AVX, "avx" }, { AV_CPU_FLAG_AVX, "avx" },
{ AV_CPU_FLAG_AVXSLOW, "avxslow" },
{ AV_CPU_FLAG_XOP, "xop" }, { AV_CPU_FLAG_XOP, "xop" },
{ AV_CPU_FLAG_FMA3, "fma3" }, { AV_CPU_FLAG_FMA3, "fma3" },
{ AV_CPU_FLAG_FMA4, "fma4" }, { AV_CPU_FLAG_FMA4, "fma4" },

View File

@ -45,6 +45,7 @@
#define AV_CPU_FLAG_SSE4 0x0100 ///< Penryn SSE4.1 functions #define AV_CPU_FLAG_SSE4 0x0100 ///< Penryn SSE4.1 functions
#define AV_CPU_FLAG_SSE42 0x0200 ///< Nehalem SSE4.2 functions #define AV_CPU_FLAG_SSE42 0x0200 ///< Nehalem SSE4.2 functions
#define AV_CPU_FLAG_AVX 0x4000 ///< AVX functions: requires OS support even if YMM registers aren't used #define AV_CPU_FLAG_AVX 0x4000 ///< AVX functions: requires OS support even if YMM registers aren't used
#define AV_CPU_FLAG_AVXSLOW 0x8000000 ///< AVX supported, but slow when using YMM registers (e.g. Bulldozer)
#define AV_CPU_FLAG_XOP 0x0400 ///< Bulldozer XOP functions #define AV_CPU_FLAG_XOP 0x0400 ///< Bulldozer XOP functions
#define AV_CPU_FLAG_FMA4 0x0800 ///< Bulldozer FMA4 functions #define AV_CPU_FLAG_FMA4 0x0800 ///< Bulldozer FMA4 functions
#define AV_CPU_FLAG_CMOV 0x1000 ///< i686 cmov #define AV_CPU_FLAG_CMOV 0x1000 ///< i686 cmov

View File

@ -54,8 +54,8 @@
*/ */
#define LIBAVUTIL_VERSION_MAJOR 54 #define LIBAVUTIL_VERSION_MAJOR 54
#define LIBAVUTIL_VERSION_MINOR 13 #define LIBAVUTIL_VERSION_MINOR 14
#define LIBAVUTIL_VERSION_MICRO 1 #define LIBAVUTIL_VERSION_MICRO 0
#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
LIBAVUTIL_VERSION_MINOR, \ LIBAVUTIL_VERSION_MINOR, \

View File

@ -167,6 +167,7 @@ int ff_get_cpu_flags_x86(void)
if (ext_caps & (1 << 22)) if (ext_caps & (1 << 22))
rval |= AV_CPU_FLAG_MMXEXT; rval |= AV_CPU_FLAG_MMXEXT;
if (!strncmp(vendor.c, "AuthenticAMD", 12)) {
/* Allow for selectively disabling SSE2 functions on AMD processors /* Allow for selectively disabling SSE2 functions on AMD processors
with SSE2 support but not SSE4a. This includes Athlon64, some with SSE2 support but not SSE4a. This includes Athlon64, some
Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
@ -174,9 +175,19 @@ int ff_get_cpu_flags_x86(void)
AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
so that SSE2 is used unless explicitly disabled by checking so that SSE2 is used unless explicitly disabled by checking
AV_CPU_FLAG_SSE2SLOW. */ AV_CPU_FLAG_SSE2SLOW. */
if (!strncmp(vendor.c, "AuthenticAMD", 12) && if (rval & AV_CPU_FLAG_SSE2 && !(ecx & 0x00000040))
rval & AV_CPU_FLAG_SSE2 && !(ecx & 0x00000040)) { rval |= AV_CPU_FLAG_SSE2SLOW;
rval |= AV_CPU_FLAG_SSE2SLOW;
/* Similar to the above but for AVX functions on AMD processors.
This is necessary only for functions using YMM registers on Bulldozer
based CPUs as they lack 256-bits execution units. SSE/AVX functions
using XMM registers are always faster on them.
AV_CPU_FLAG_AVX and AV_CPU_FLAG_AVXSLOW are both set so that AVX is
used unless explicitly disabled by checking AV_CPU_FLAG_AVXSLOW.
TODO: Confirm if Excavator is affected or not by this once it's
released, and update the check if necessary. Same for btver2. */
if (family == 0x15 && (rval & AV_CPU_FLAG_AVX))
rval |= AV_CPU_FLAG_AVXSLOW;
} }
/* XOP and FMA4 use the AVX instruction coding scheme, so they can't be /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be