btrfs-progs: detect PCLMUL CPU support for accelerated crc32c

The accelerated crc32c needs to check for two CPU features, the crc32c
instructions is in SSE 4.2 and 'pclmulqdq' is a separate. There's still
old hardware used that does not have the PCLMUL instructions. Detect it
and make it the condition.

The pclmul is not supported on old compilers so also add a
configure-time detection and leave the SSE 4.2 only implementation as
the accelerated one if possible.

Issue: #676
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
David Sterba 2023-09-12 23:32:38 +02:00
parent b40943dea4
commit 03f41ac508
7 changed files with 13 additions and 8 deletions

View File

@ -54,6 +54,7 @@ void cpu_print_flags(void) {
FLAG(SSE2); FLAG(SSE2);
FLAG(SSSE3); FLAG(SSSE3);
FLAG(SSE41); FLAG(SSE41);
FLAG(PCLMUL);
FLAG(SSE42); FLAG(SSE42);
FLAG(SHA); FLAG(SHA);
FLAG(AVX); FLAG(AVX);
@ -76,6 +77,10 @@ void cpu_detect_flags(void)
__cpu_flags |= CPU_FLAG_SSSE3; __cpu_flags |= CPU_FLAG_SSSE3;
if (__builtin_cpu_supports("sse4.1")) if (__builtin_cpu_supports("sse4.1"))
__cpu_flags |= CPU_FLAG_SSE41; __cpu_flags |= CPU_FLAG_SSE41;
#if HAVE___BUILTIN_CPU_SUPPORTS__PCLMUL
if (__builtin_cpu_supports("pclmul"))
__cpu_flags |= CPU_FLAG_PCLMUL;
#endif
if (__builtin_cpu_supports("sse4.2")) if (__builtin_cpu_supports("sse4.2"))
__cpu_flags |= CPU_FLAG_SSE42; __cpu_flags |= CPU_FLAG_SSE42;
if (__builtin_cpu_supports("avx")) if (__builtin_cpu_supports("avx"))

View File

@ -33,6 +33,7 @@ enum cpu_feature {
ENUM_CPU_BIT(CPU_FLAG_SSE2), ENUM_CPU_BIT(CPU_FLAG_SSE2),
ENUM_CPU_BIT(CPU_FLAG_SSSE3), ENUM_CPU_BIT(CPU_FLAG_SSSE3),
ENUM_CPU_BIT(CPU_FLAG_SSE41), ENUM_CPU_BIT(CPU_FLAG_SSE41),
ENUM_CPU_BIT(CPU_FLAG_PCLMUL),
ENUM_CPU_BIT(CPU_FLAG_SSE42), ENUM_CPU_BIT(CPU_FLAG_SSE42),
ENUM_CPU_BIT(CPU_FLAG_SHA), ENUM_CPU_BIT(CPU_FLAG_SHA),
ENUM_CPU_BIT(CPU_FLAG_AVX), ENUM_CPU_BIT(CPU_FLAG_AVX),

View File

@ -124,6 +124,7 @@ AC_DEFUN([AX_GCC_BUILTIN], [
[__builtin_cpu_init], [$1()], [__builtin_cpu_init], [$1()],
[__builtin_cpu_is], [$1("intel")], [__builtin_cpu_is], [$1("intel")],
[__builtin_cpu_supports], [$1("sse")], [__builtin_cpu_supports], [$1("sse")],
[__builtin_cpu_supports__pclmul], [__builtin_cpu_supports("pclmul")],
[__builtin_ctz], [$1(0)], [__builtin_ctz], [$1(0)],
[__builtin_ctzl], [$1(0)], [__builtin_ctzl], [$1(0)],
[__builtin_ctzll], [$1(0)], [__builtin_ctzll], [$1(0)],

View File

@ -86,6 +86,7 @@ AC_SUBST([HAVE_GLIBC])
AX_GCC_BUILTIN([__builtin_add_overflow]) AX_GCC_BUILTIN([__builtin_add_overflow])
AX_GCC_BUILTIN([__builtin_sub_overflow]) AX_GCC_BUILTIN([__builtin_sub_overflow])
AX_GCC_BUILTIN([__builtin_mul_overflow]) AX_GCC_BUILTIN([__builtin_mul_overflow])
AX_GCC_BUILTIN([__builtin_cpu_supports__pclmul])
AC_CHECK_HEADERS([linux/perf_event.h]) AC_CHECK_HEADERS([linux/perf_event.h])
AC_CHECK_HEADERS([linux/hw_breakpoint.h]) AC_CHECK_HEADERS([linux/hw_breakpoint.h])

View File

@ -25,7 +25,7 @@ static unsigned int crc32c_pcl(uint32_t crc, unsigned char const *data, uint32_t
return crc_pcl(data, len, crc); return crc_pcl(data, len, crc);
} }
#else #endif
/* /*
* Based on a posting to lkml by Austin Zhang <austin.zhang@intel.com> * Based on a posting to lkml by Austin Zhang <austin.zhang@intel.com>
@ -86,8 +86,6 @@ static uint32_t crc32c_intel(uint32_t crc, unsigned char const *data, uint32_t l
return crc; return crc;
} }
#endif
void crc32c_init_accel(void) void crc32c_init_accel(void)
{ {
/* /*
@ -96,14 +94,13 @@ void crc32c_init_accel(void)
*/ */
if (0) { if (0) {
#ifdef __GLIBC__ #ifdef __GLIBC__
} else if (cpu_has_feature(CPU_FLAG_SSE42)) { } else if (cpu_has_feature(CPU_FLAG_PCLMUL)) {
/* printf("CRC32C: pcl\n"); */ /* printf("CRC32C: pcl\n"); */
crc_function = crc32c_pcl; crc_function = crc32c_pcl;
#else #endif
} else if (cpu_has_feature(CPU_FLAG_SSE42)) { } else if (cpu_has_feature(CPU_FLAG_SSE42)) {
/* printf("CRC32c: intel\n"); */ /* printf("CRC32c: intel\n"); */
crc_function = crc32c_intel; crc_function = crc32c_intel;
#endif
} else { } else {
/* printf("CRC32c: fallback\n"); */ /* printf("CRC32c: fallback\n"); */
crc_function = __crc32c_le; crc_function = __crc32c_le;

View File

@ -190,7 +190,7 @@ int main(int argc, char **argv) {
{ .name = "CRC32C-ref", .digest = hash_crc32c, .digest_size = 4, { .name = "CRC32C-ref", .digest = hash_crc32c, .digest_size = 4,
.cpu_flag = CPU_FLAG_NONE }, .cpu_flag = CPU_FLAG_NONE },
{ .name = "CRC32C-NI", .digest = hash_crc32c, .digest_size = 4, { .name = "CRC32C-NI", .digest = hash_crc32c, .digest_size = 4,
.cpu_flag = CPU_FLAG_SSE42 }, .cpu_flag = CPU_FLAG_PCLMUL },
{ .name = "XXHASH", .digest = hash_xxhash, .digest_size = 8 }, { .name = "XXHASH", .digest = hash_xxhash, .digest_size = 8 },
{ .name = "SHA256-ref", .digest = hash_sha256, .digest_size = 32, { .name = "SHA256-ref", .digest = hash_sha256, .digest_size = 32,
.cpu_flag = CPU_FLAG_NONE, .backend = CRYPTOPROVIDER_BUILTIN + 1 }, .cpu_flag = CPU_FLAG_NONE, .backend = CRYPTOPROVIDER_BUILTIN + 1 },

View File

@ -442,7 +442,7 @@ static const struct hash_testspec test_spec[] = {
.digest_size = 4, .digest_size = 4,
.testvec = crc32c_tv, .testvec = crc32c_tv,
.count = ARRAY_SIZE(crc32c_tv), .count = ARRAY_SIZE(crc32c_tv),
.cpu_flag = CPU_FLAG_SSE42, .cpu_flag = CPU_FLAG_PCLMUL,
.hash = hash_crc32c .hash = hash_crc32c
}, { }, {
.name = "XXHASH", .name = "XXHASH",