From 4fc291a46516a46c1750ce01d212ccbe195d72cf Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 1 Mar 2023 01:32:26 +0100 Subject: [PATCH] btrfs-progs: fix detection of accelerated implementation. The build fails with crypto backends other than builtin, the initializers cannot be reached as they're ifdef-ed out. Move hash_init_accel under the right condition and delete the algorithm-specific initializers as they're used only by the hash test and that can simply call hash_init_accel to set the implementation. All the -m flags need to be detected at configure time and the flag used for ifdef (HAVE_CFLAG_m*), not the actual feature defined by compiler as the dispatcher function is not built with the -m flags. The uname check for x86_64 must be dropped so on i386/i586 we can still build accelerated version. Signed-off-by: David Sterba --- Makefile | 8 ++++++-- Makefile.inc.in | 3 +++ configure.ac | 12 ++++++++++++ crypto/blake2b-ref.c | 6 +++--- crypto/hash-speedtest.c | 19 +++++++++---------- crypto/hash.c | 39 ++++++++++++++++++++++----------------- crypto/hash.h | 2 -- crypto/sha224-256.c | 4 ++-- 8 files changed, 57 insertions(+), 36 deletions(-) diff --git a/Makefile b/Makefile index 2021bc4a..4b0a869b 100644 --- a/Makefile +++ b/Makefile @@ -130,14 +130,18 @@ LIBBTRFSUTIL_LDFLAGS = $(SUBST_LDFLAGS) \ # Default implementation CRYPTO_OBJECTS = -ifeq ($(shell uname -m),x86_64) +ifeq ($(HAVE_CFLAG_msse2),1) crypto_blake2b_sse2_cflags = -msse2 +endif +ifeq ($(HAVE_CFLAG_msse41),1) crypto_blake2b_sse41_cflags = -msse4.1 +endif +ifeq ($(HAVE_CFLAG_mavx2),1) crypto_blake2b_avx2_cflags = -mavx2 +endif ifeq ($(HAVE_CFLAG_msha),1) crypto_sha256_x86_cflags = -msse4.1 -msha endif -endif LIBS = $(LIBS_BASE) $(LIBS_CRYPTO) LIBBTRFS_LIBS = $(LIBS_BASE) $(LIBS_CRYPTO) diff --git a/Makefile.inc.in b/Makefile.inc.in index 213e1a98..e7da4ff1 100644 --- a/Makefile.inc.in +++ b/Makefile.inc.in @@ -24,6 +24,9 @@ PYTHON_CFLAGS = @PYTHON_CFLAGS@ CRYPTOPROVIDER_BUILTIN = @CRYPTOPROVIDER_BUILTIN@ CRYPTO_CFLAGS = @GCRYPT_CFLAGS@ @SODIUM_CFLAGS@ @KCAPI_CFLAGS@ +HAVE_CFLAG_msse2 = @HAVE_CFLAG_msse2@ +HAVE_CFLAG_msse41 = @HAVE_CFLAG_msse41@ +HAVE_CFLAG_mavx2 = @HAVE_CFLAG_mavx2@ HAVE_CFLAG_msha = @HAVE_CFLAG_msha@ SUBST_CFLAGS = @CFLAGS@ diff --git a/configure.ac b/configure.ac index 7a27c233..25bb901d 100644 --- a/configure.ac +++ b/configure.ac @@ -43,6 +43,18 @@ AC_C_CONST AC_C_VOLATILE AC_C_BIGENDIAN +AX_CHECK_COMPILE_FLAG([-msse2], [HAVE_CFLAG_msse2=1], [HAVE_CFLAG_msse2=0]) +AC_SUBST([HAVE_CFLAG_msse2]) +AC_DEFINE_UNQUOTED([HAVE_CFLAG_msse2], [$HAVE_CFLAG_msse2], [Compiler supports -msse2]) + +AX_CHECK_COMPILE_FLAG([-msse4.1], [HAVE_CFLAG_msse41=1], [HAVE_CFLAG_msse41=0]) +AC_SUBST([HAVE_CFLAG_msse41]) +AC_DEFINE_UNQUOTED([HAVE_CFLAG_msse41], [$HAVE_CFLAG_msse41], [Compiler supports -msse4.1]) + +AX_CHECK_COMPILE_FLAG([-mavx2], [HAVE_CFLAG_mavx2=1], [HAVE_CFLAG_mavx2=0]) +AC_SUBST([HAVE_CFLAG_mavx2]) +AC_DEFINE_UNQUOTED([HAVE_CFLAG_mavx2], [$HAVE_CFLAG_mavx2], [Compiler supports -mavx2]) + AX_CHECK_COMPILE_FLAG([-msha], [HAVE_CFLAG_msha=1], [HAVE_CFLAG_msha=0]) AC_SUBST([HAVE_CFLAG_msha]) AC_DEFINE_UNQUOTED([HAVE_CFLAG_msha], [$HAVE_CFLAG_msha], [Compiler supports -msha]) diff --git a/crypto/blake2b-ref.c b/crypto/blake2b-ref.c index b39cd59d..eac4cf0c 100644 --- a/crypto/blake2b-ref.c +++ b/crypto/blake2b-ref.c @@ -229,15 +229,15 @@ static void (*blake2b_compress)( blake2b_state *S, const uint8_t block[BLAKE2B_B void blake2_init_accel(void) { if (0); -#if HAVE_AVX2 +#if HAVE_CFLAG_mavx2 == 1 else if (cpu_has_feature(CPU_FLAG_AVX2)) blake2b_compress = blake2b_compress_avx2; #endif -#if HAVE_SSE41 +#if HAVE_CFLAG_msse41 == 1 else if (cpu_has_feature(CPU_FLAG_SSE41)) blake2b_compress = blake2b_compress_sse41; #endif -#if HAVE_SSE2 +#if HAVE_CFLAG_msse2 == 1 else if (cpu_has_feature(CPU_FLAG_SSE2)) blake2b_compress = blake2b_compress_sse2; #endif diff --git a/crypto/hash-speedtest.c b/crypto/hash-speedtest.c index f8ee4b68..c7ad1130 100644 --- a/crypto/hash-speedtest.c +++ b/crypto/hash-speedtest.c @@ -183,27 +183,26 @@ int main(int argc, char **argv) { u64 cycles; u64 time; unsigned long cpu_flag; - void (*init_accel)(void); } contestants[] = { { .name = "NULL-NOP", .digest = hash_null_nop, .digest_size = 32 }, { .name = "NULL-MEMCPY", .digest = hash_null_memcpy, .digest_size = 32 }, { .name = "CRC32C-ref", .digest = hash_crc32c, .digest_size = 4, - .cpu_flag = CPU_FLAG_NONE, .init_accel = hash_init_crc32c }, + .cpu_flag = CPU_FLAG_NONE }, { .name = "CRC32C-NI", .digest = hash_crc32c, .digest_size = 4, - .cpu_flag = CPU_FLAG_SSE42, .init_accel = hash_init_crc32c }, + .cpu_flag = CPU_FLAG_SSE42 }, { .name = "XXHASH", .digest = hash_xxhash, .digest_size = 8 }, { .name = "SHA256-ref", .digest = hash_sha256, .digest_size = 32, - .cpu_flag = CPU_FLAG_NONE, .init_accel = hash_init_sha256 }, + .cpu_flag = CPU_FLAG_NONE }, { .name = "SHA256-NI", .digest = hash_sha256, .digest_size = 32, - .cpu_flag = CPU_FLAG_SHA, .init_accel = hash_init_sha256 }, + .cpu_flag = CPU_FLAG_SHA }, { .name = "BLAKE2-ref", .digest = hash_blake2b, .digest_size = 32, - .cpu_flag = CPU_FLAG_NONE, .init_accel = hash_init_blake2 }, + .cpu_flag = CPU_FLAG_NONE }, { .name = "BLAKE2-SSE2", .digest = hash_blake2b, .digest_size = 32, - .cpu_flag = CPU_FLAG_SSE2, .init_accel = hash_init_blake2 }, + .cpu_flag = CPU_FLAG_SSE2 }, { .name = "BLAKE2-SSE41", .digest = hash_blake2b, .digest_size = 32, - .cpu_flag = CPU_FLAG_SSE41, .init_accel = hash_init_blake2 }, + .cpu_flag = CPU_FLAG_SSE41 }, { .name = "BLAKE2-AVX2", .digest = hash_blake2b, .digest_size = 32, - .cpu_flag = CPU_FLAG_AVX2, .init_accel = hash_init_blake2 }, + .cpu_flag = CPU_FLAG_AVX2 }, }; int units = UNITS_CYCLES; @@ -278,7 +277,7 @@ int main(int argc, char **argv) { if (c->cpu_flag) { cpu_set_level(c->cpu_flag); - c->init_accel(); + hash_init_accel(); } tstart = get_time(); start = get_cycles(units); diff --git a/crypto/hash.c b/crypto/hash.c index e8ca18c9..f18dbcb6 100644 --- a/crypto/hash.c +++ b/crypto/hash.c @@ -20,28 +20,11 @@ #include "crypto/sha.h" #include "crypto/blake2.h" -void hash_init_accel(void) -{ - crc32c_init_accel(); - blake2_init_accel(); - sha256_init_accel(); -} - void hash_init_crc32c(void) { crc32c_init_accel(); } -void hash_init_blake2(void) -{ - blake2_init_accel(); -} - -void hash_init_sha256(void) -{ - sha256_init_accel(); -} - /* * Default builtin implementations */ @@ -70,6 +53,13 @@ int hash_xxhash(const u8 *buf, size_t length, u8 *out) */ #if CRYPTOPROVIDER_BUILTIN == 1 +void hash_init_accel(void) +{ + crc32c_init_accel(); + blake2_init_accel(); + sha256_init_accel(); +} + int hash_sha256(const u8 *buf, size_t len, u8 *out) { SHA256Context context; @@ -98,6 +88,11 @@ int hash_blake2b(const u8 *buf, size_t len, u8 *out) #include +void hash_init_accel(void) +{ + crc32c_init_accel(); +} + int hash_sha256(const u8 *buf, size_t len, u8 *out) { gcry_md_hash_buffer(GCRY_MD_SHA256, out, buf, len); @@ -117,6 +112,11 @@ int hash_blake2b(const u8 *buf, size_t len, u8 *out) #include #include +void hash_init_accel(void) +{ + crc32c_init_accel(); +} + int hash_sha256(const u8 *buf, size_t len, u8 *out) { return crypto_hash_sha256(out, buf, len); @@ -134,6 +134,11 @@ int hash_blake2b(const u8 *buf, size_t len, u8 *out) #include +void hash_init_accel(void) +{ + crc32c_init_accel(); +} + int hash_sha256(const u8 *buf, size_t len, u8 *out) { static struct kcapi_handle *handle = NULL; diff --git a/crypto/hash.h b/crypto/hash.h index 8a6deed1..fda89e50 100644 --- a/crypto/hash.h +++ b/crypto/hash.h @@ -28,7 +28,5 @@ int hash_blake2b(const u8 *buf, size_t length, u8 *out); void hash_init_accel(void); void hash_init_crc32c(void); -void hash_init_blake2(void); -void hash_init_sha256(void); #endif diff --git a/crypto/sha224-256.c b/crypto/sha224-256.c index 207014cb..5b15a5c4 100644 --- a/crypto/sha224-256.c +++ b/crypto/sha224-256.c @@ -97,7 +97,7 @@ static uint32_t SHA256_H0[SHA256HashSize/4] = { static void (*sha256_process_message_block)(SHA256Context *context) = SHA224_256ProcessMessageBlock; -#ifdef __SHA__ +#if HAVE_CFLAG_msha == 1 void sha256_process_x86(uint32_t state[8], const uint8_t data[], uint32_t length); static void sha256_process_x86_dispatch(SHA256Context *context) @@ -110,7 +110,7 @@ static void sha256_process_x86_dispatch(SHA256Context *context) void sha256_init_accel(void) { -#ifdef __SHA__ +#if HAVE_CFLAG_msha == 1 if (cpu_has_feature(CPU_FLAG_SHA)) sha256_process_message_block = sha256_process_x86_dispatch; else