Merge pull request #3604 from yghannam/yazen-testing

crc32c: add aarch64 optimized crc32c implementation

Reviewed-by: Sage Weil <sage@redhat.com>
This commit is contained in:
Sage Weil 2015-03-25 10:06:46 -07:00
commit c639ce51ac
10 changed files with 133 additions and 3 deletions

View File

@ -708,6 +708,7 @@ fi
# Find supported SIMD / NEON / SSE extensions supported by the compiler
AX_ARM_FEATURES()
AM_CONDITIONAL(HAVE_NEON, [ test "x$ax_cv_support_neon_ext" = "xyes"])
AM_CONDITIONAL(HAVE_ARMV8_CRC, [ test "x$ax_cv_support_crc_ext" = "xyes"])
AX_INTEL_FEATURES()
AM_CONDITIONAL(HAVE_SSSE3, [ test "x$ax_cv_support_ssse3_ext" = "xyes"])
AM_CONDITIONAL(HAVE_SSE4_PCLMUL, [ test "x$ax_cv_support_pclmuldq_ext" = "xyes"])

View File

@ -13,13 +13,27 @@ AC_DEFUN([AX_ARM_FEATURES],
fi
;;
aarch64*)
AX_CHECK_COMPILE_FLAG(-march=armv8-a, ax_cv_support_armv8=yes, [])
if test x"$ax_cv_support_armv8" = x"yes"; then
ARM_ARCH_FLAGS="-march=armv8-a"
ARM_DEFINE_FLAGS="-DARCH_AARCH64"
fi
AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd, ax_cv_support_neon_ext=yes, [])
if test x"$ax_cv_support_neon_ext" = x"yes"; then
ARM_ARCH_FLAGS="$ARM_ARCH_FLAGS+simd"
ARM_DEFINE_FLAGS="$ARM_DEFINE_FLAGS -DARM_NEON"
ARM_NEON_FLAGS="-march=armv8-a+simd -DARCH_AARCH64 -DARM_NEON"
AC_SUBST(ARM_NEON_FLAGS)
ARM_FLAGS="$ARM_FLAGS $ARM_NEON_FLAGS"
AC_DEFINE(HAVE_NEON,,[Support NEON instructions])
AC_SUBST(ARM_NEON_FLAGS)
fi
AX_CHECK_COMPILE_FLAG(-march=armv8-a+crc, ax_cv_support_crc_ext=yes, [])
if test x"$ax_cv_support_crc_ext" = x"yes"; then
ARM_ARCH_FLAGS="$ARM_ARCH_FLAGS+crc"
ARM_CRC_FLAGS="-march=armv8-a+crc -DARCH_AARCH64"
AC_DEFINE(HAVE_ARMV8_CRC,,[Support ARMv8 CRC instructions])
AC_SUBST(ARM_CRC_FLAGS)
fi
ARM_FLAGS="$ARM_ARCH_FLAGS $ARM_DEFINE_FLAGS"
;;
esac

View File

@ -2,6 +2,7 @@
/* flags we export */
int ceph_arch_neon = 0;
int ceph_arch_aarch64_crc32 = 0;
#include <stdio.h>
@ -47,6 +48,7 @@ int ceph_arch_arm_probe(void)
ceph_arch_neon = (get_hwcap() & HWCAP_NEON) == HWCAP_NEON;
#elif __aarch64__ && __linux__
ceph_arch_neon = (get_hwcap() & HWCAP_ASIMD) == HWCAP_ASIMD;
ceph_arch_aarch64_crc32 = (get_hwcap() & HWCAP_CRC32) == HWCAP_CRC32;
#else
if (0)
get_hwcap(); // make compiler shut up

View File

@ -6,6 +6,7 @@ extern "C" {
#endif
extern int ceph_arch_neon; /* true if we have ARM NEON or ASIMD abilities */
extern int ceph_arch_aarch64_crc32; /* true if we have AArch64 CRC32/CRC32C abilities */
extern int ceph_arch_arm_probe(void);

View File

@ -115,11 +115,19 @@ endif
LIBCOMMON_DEPS += libcommon_crc.la
noinst_LTLIBRARIES += libcommon_crc.la
if HAVE_ARMV8_CRC
libcommon_crc_aarch64_la_SOURCES = common/crc32c_aarch64.c
libcommon_crc_aarch64_la_CFLAGS = $(AM_CFLAGS) $(ARM_CRC_FLAGS)
LIBCOMMON_DEPS += libcommon_crc_aarch64.la
noinst_LTLIBRARIES += libcommon_crc_aarch64.la
endif
noinst_HEADERS += \
common/bloom_filter.hpp \
common/sctp_crc32.h \
common/crc32c_intel_baseline.h \
common/crc32c_intel_fast.h
common/crc32c_intel_fast.h \
common/crc32c_aarch64.h
# important; libmsg before libauth!

View File

@ -5,9 +5,11 @@
#include "arch/probe.h"
#include "arch/intel.h"
#include "arch/arm.h"
#include "common/sctp_crc32.h"
#include "common/crc32c_intel_baseline.h"
#include "common/crc32c_intel_fast.h"
#include "common/crc32c_aarch64.h"
/*
* choose best implementation based on the CPU architecture.
@ -24,6 +26,10 @@ ceph_crc32c_func_t ceph_choose_crc32(void)
return ceph_crc32c_intel_fast;
}
if (ceph_arch_aarch64_crc32){
return ceph_crc32c_aarch64;
}
// default
return ceph_crc32c_sctp;
}

View File

@ -0,0 +1,47 @@
#include "acconfig.h"
#include "include/int_types.h"
#include "common/crc32c_aarch64.h"
#define CRC32CX(crc, value) __asm__("crc32cx %w[c], %w[c], %x[v]":[c]"+r"(crc):[v]"r"(value))
#define CRC32CW(crc, value) __asm__("crc32cw %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
#define CRC32CH(crc, value) __asm__("crc32ch %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
#define CRC32CB(crc, value) __asm__("crc32cb %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
uint32_t ceph_crc32c_aarch64(uint32_t crc, unsigned char const *buffer, unsigned len)
{
int64_t length = len;
if (!buffer) {
while ((length -= sizeof(uint64_t)) >= 0)
CRC32CX(crc, 0);
/* The following is more efficient than the straight loop */
if (length & sizeof(uint32_t))
CRC32CW(crc, 0);
if (length & sizeof(uint16_t))
CRC32CH(crc, 0);
if (length & sizeof(uint8_t))
CRC32CB(crc, 0);
} else {
while ((length -= sizeof(uint64_t)) >= 0) {
CRC32CX(crc, *(uint64_t *)buffer);
buffer += sizeof(uint64_t);
}
/* The following is more efficient than the straight loop */
if (length & sizeof(uint32_t)) {
CRC32CW(crc, *(uint32_t *)buffer);
buffer += sizeof(uint32_t);
}
if (length & sizeof(uint16_t)) {
CRC32CH(crc, *(uint16_t *)buffer);
buffer += sizeof(uint16_t);
}
if (length & sizeof(uint8_t))
CRC32CB(crc, *buffer);
}
return crc;
}

View File

@ -0,0 +1,27 @@
#ifndef CEPH_COMMON_CRC32C_AARCH64_H
#define CEPH_COMMON_CRC32C_AARCH64_H
#include "arch/arm.h"
#ifdef __cplusplus
extern "C" {
#endif
#ifdef HAVE_ARMV8_CRC
extern uint32_t ceph_crc32c_aarch64(uint32_t crc, unsigned char const *buffer, unsigned len);
#else
static inline uint32_t ceph_crc32c_aarch64(uint32_t crc, unsigned char const *buffer, unsigned len)
{
return 0;
}
#endif
#ifdef __cplusplus
}
#endif
#endif

View File

@ -13,6 +13,7 @@
#include "common/sctp_crc32.h"
#include "common/crc32c_intel_baseline.h"
#include "common/crc32c_aarch64.h"
TEST(Crc32c, Small) {
const char *a = "foo bar baz";
@ -80,6 +81,15 @@ TEST(Crc32c, Performance) {
std::cout << "intel baseline = " << rate << " MB/sec" << std::endl;
ASSERT_EQ(261108528u, val);
}
if (ceph_arch_aarch64_crc32) // Skip if CRC32C instructions are not defined.
{
utime_t start = ceph_clock_now(NULL);
unsigned val = ceph_crc32c_aarch64(0, (unsigned char *)a, len);
utime_t end = ceph_clock_now(NULL);
float rate = (float)len / (float)(1024*1024) / (float)(end - start);
std::cout << "aarch64 = " << rate << " MB/sec" << std::endl;
ASSERT_EQ(261108528u, val);
}
}

View File

@ -47,9 +47,20 @@ TEST(Arch, all)
int expected;
#if (__arm__ || __aarch64__)
expected = (strstr(flags, " neon ") || strstr(flags, " asimd ")) ? 1 : 0;
EXPECT_EQ(expected, ceph_arch_neon);
#endif
#if (__aarch64__)
expected = strstr(flags, " crc32 ") ? 1 : 0;
EXPECT_EQ(expected, ceph_arch_aarch64_crc32);
#endif
#if (__x86_64__)
expected = strstr(flags, " pclmulqdq ") ? 1 : 0;
EXPECT_EQ(expected, ceph_arch_intel_pclmul);
@ -67,6 +78,9 @@ TEST(Arch, all)
expected = strstr(flags, " sse2 ") ? 1 : 0;
EXPECT_EQ(expected, ceph_arch_intel_sse2);
#endif
#endif
}