From b154422db10bee52050668d7679c140ed27800cd Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Fri, 3 Dec 2021 17:38:42 +0100 Subject: [PATCH] IMPORT: slz: use the correct CRC32 instruction when running in 32-bit mode Many ARMv8 processors also support Aarch32 and can run armv7 and even thumb2 code. While armv8 compilers will not emit these instructions, armv7 compilers that are aware of these processors will do. For example, using gcc built for an armv7 target and passing it "-mcpu=cortex-a72" or "-march=armv8-a+crc" will result in the CRC32 instruction to be used. In this case the current assembly code fails because with the ARM and Thumb2 instruction sets there is no such "%wX" half-registers. We need to use "%X" instead as the native 32-bit register when running with a 32-bit instruction set, and use "%wX" when using the 64-bit instruction set (A64). This is slz upstream commit fab83248612a1e8ee942963fe916a9cdbf085097 --- src/slz.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/slz.c b/src/slz.c index 23912da6a2..0ca9d279c3 100644 --- a/src/slz.c +++ b/src/slz.c @@ -374,7 +374,13 @@ static void copy_lit_huff(struct slz_stream *strm, const unsigned char *buf, uin static inline uint32_t slz_hash(uint32_t a) { #if defined(__ARM_FEATURE_CRC32) +# if defined(__ARM_ARCH_ISA_A64) + // 64 bit mode __asm__ volatile("crc32w %w0,%w0,%w1" : "+r"(a) : "r"(0)); +# else + // 32 bit mode (e.g. armv7 compiler building for armv8 + __asm__ volatile("crc32w %0,%0,%1" : "+r"(a) : "r"(0)); +# endif return a >> (32 - HASH_BITS); #else return ((a << 19) + (a << 6) - a) >> (32 - HASH_BITS); @@ -870,7 +876,13 @@ static inline uint32_t crc32_char(uint32_t crc, uint8_t x) { #if defined(__ARM_FEATURE_CRC32) crc = ~crc; +# if defined(__ARM_ARCH_ISA_A64) + // 64 bit mode __asm__ volatile("crc32b %w0,%w0,%w1" : "+r"(crc) : "r"(x)); +# else + // 32 bit mode (e.g. armv7 compiler building for armv8 + __asm__ volatile("crc32b %0,%0,%1" : "+r"(crc) : "r"(x)); +# endif crc = ~crc; #else crc = crc32_fast[0][(crc ^ x) & 0xff] ^ (crc >> 8); @@ -881,7 +893,13 @@ static inline uint32_t crc32_char(uint32_t crc, uint8_t x) static inline uint32_t crc32_uint32(uint32_t data) { #if defined(__ARM_FEATURE_CRC32) +# if defined(__ARM_ARCH_ISA_A64) + // 64 bit mode __asm__ volatile("crc32w %w0,%w0,%w1" : "+r"(data) : "r"(~0UL)); +# else + // 32 bit mode (e.g. armv7 compiler building for armv8 + __asm__ volatile("crc32w %0,%0,%1" : "+r"(data) : "r"(~0UL)); +# endif data = ~data; #else data = crc32_fast[3][(data >> 0) & 0xff] ^ @@ -913,10 +931,19 @@ uint32_t slz_crc32_by4(uint32_t crc, const unsigned char *buf, int len) #ifdef UNALIGNED_LE_OK #if defined(__ARM_FEATURE_CRC32) crc = ~crc; +# if defined(__ARM_ARCH_ISA_A64) + // 64 bit mode __asm__ volatile("crc32w %w0,%w0,%w1" : "+r"(crc) : "r"(*(uint32_t*)(buf))); __asm__ volatile("crc32w %w0,%w0,%w1" : "+r"(crc) : "r"(*(uint32_t*)(buf + 4))); __asm__ volatile("crc32w %w0,%w0,%w1" : "+r"(crc) : "r"(*(uint32_t*)(buf + 8))); __asm__ volatile("crc32w %w0,%w0,%w1" : "+r"(crc) : "r"(*(uint32_t*)(buf + 12))); +# else + // 32 bit mode (e.g. armv7 compiler building for armv8 + __asm__ volatile("crc32w %0,%0,%1" : "+r"(crc) : "r"(*(uint32_t*)(buf))); + __asm__ volatile("crc32w %0,%0,%1" : "+r"(crc) : "r"(*(uint32_t*)(buf + 4))); + __asm__ volatile("crc32w %0,%0,%1" : "+r"(crc) : "r"(*(uint32_t*)(buf + 8))); + __asm__ volatile("crc32w %0,%0,%1" : "+r"(crc) : "r"(*(uint32_t*)(buf + 12))); +# endif crc = ~crc; #else crc ^= *(uint32_t *)buf;