From 7d1353fa01da000945f747bd8d26673e2745487c Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 9 Feb 2023 02:54:35 +0100 Subject: [PATCH] btrfs-progs: hash-speedtest: add accelerated BLAKE2 implementations Benchmark all accelerated implementations if the CPU supports them. Set the level before each test, expecting that the implementation switches the implementation dynamically. Signed-off-by: David Sterba --- crypto/hash-speedtest.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/crypto/hash-speedtest.c b/crypto/hash-speedtest.c index 21e7697e..83217b17 100644 --- a/crypto/hash-speedtest.c +++ b/crypto/hash-speedtest.c @@ -29,6 +29,7 @@ #include "crypto/sha.h" #include "crypto/blake2.h" #include "common/messages.h" +#include "common/cpu-utils.h" #ifdef __x86_64__ static const int cycles_supported = 1; @@ -181,16 +182,26 @@ int main(int argc, char **argv) { int digest_size; u64 cycles; u64 time; + unsigned long cpu_flag; } contestants[] = { { .name = "NULL-NOP", .digest = hash_null_nop, .digest_size = 32 }, { .name = "NULL-MEMCPY", .digest = hash_null_memcpy, .digest_size = 32 }, { .name = "CRC32C", .digest = hash_crc32c, .digest_size = 4 }, { .name = "XXHASH", .digest = hash_xxhash, .digest_size = 8 }, { .name = "SHA256", .digest = hash_sha256, .digest_size = 32 }, - { .name = "BLAKE2", .digest = hash_blake2b, .digest_size = 32 }, + { .name = "BLAKE2-ref", .digest = hash_blake2b, .digest_size = 32 }, + { .name = "BLAKE2-SSE2", .digest = hash_blake2b, .digest_size = 32, + .cpu_flag = CPU_FLAG_SSE2 }, + { .name = "BLAKE2-SSE41", .digest = hash_blake2b, .digest_size = 32, + .cpu_flag = CPU_FLAG_SSE41 }, + { .name = "BLAKE2-AVX2", .digest = hash_blake2b, .digest_size = 32, + .cpu_flag = CPU_FLAG_AVX2 }, }; int units = UNITS_CYCLES; + cpu_detect_flags(); + cpu_print_flags(); + optind = 0; while (1) { static const struct option long_options[] = { @@ -250,9 +261,14 @@ int main(int argc, char **argv) { u64 tstart, tend; u64 total = 0; + if (c->cpu_flag != 0 && !cpu_has_feature(c->cpu_flag)) { + printf("%12s: no CPU support\n", c->name); + continue; + } printf("%12s: ", c->name); fflush(stdout); + cpu_set_level(c->cpu_flag); tstart = get_time(); start = get_cycles(units); for (iter = 0; iter < iterations; iter++) { @@ -264,6 +280,7 @@ int main(int argc, char **argv) { tend = get_time(); c->cycles = end - start; c->time = tend - tstart; + cpu_reset_level(); if (units == UNITS_CYCLES || units == UNITS_PERF) total = c->cycles;