btrfs-progs: hash-speedtest: select implementation by features

Now put all the recent changes into action. Add a callback that will
reinitialize the implementation pointers according to the desired
feature. Reference implementations use the NONE CPU flag to distinguish
them from the rest.

Example results:

$ hash-speedtest
CPU flags: 0xff
CPU features: SSE2 SSSE3 SSE41 SSE42 SHA AVX AVX2
Block size:     4096
Iterations:     1000000
Implementation: builtin
Units:          CPU cycles

    NULL-NOP: cycles:     67129026, cycles/i       67
 NULL-MEMCPY: cycles:    231303654, cycles/i      231,    60792.500 MiB/s
  CRC32C-ref: cycles:  23982698042, cycles/i    23982,      586.322 MiB/s
   CRC32C-NI: cycles:   1168017624, cycles/i     1168,    12038.828 MiB/s
      XXHASH: cycles:    838434468, cycles/i      838,    16771.152 MiB/s
  SHA256-ref: cycles:  68296865380, cycles/i    68296,      205.889 MiB/s
   SHA256-NI: cycles:  29748853920, cycles/i    29748,      472.676 MiB/s
  BLAKE2-ref: cycles:  14532177414, cycles/i    14532,      967.617 MiB/s
 BLAKE2-SSE2: cycles:  17762215810, cycles/i    17762,      791.657 MiB/s
BLAKE2-SSE41: cycles:  12370044656, cycles/i    12370,     1136.744 MiB/s
 BLAKE2-AVX2: cycles:   9472823338, cycles/i     9472,     1484.412 MiB/s

Previously:

Block size:     4096
Iterations:     1000000
Implementation: builtin
Units:          CPU cycles

    NULL-NOP: cycles:     67714016, cycles/i       67
 NULL-MEMCPY: cycles:    234140818, cycles/i      234,    60055.762 MiB/s
      CRC32C: cycles:   1187358432, cycles/i     1187,    11842.733 MiB/s
      XXHASH: cycles:   1897530684, cycles/i     1897,     7410.448 MiB/s
      SHA256: cycles:  69855340702, cycles/i    69855,      201.296 MiB/s
      BLAKE2: cycles:  14713130972, cycles/i    14713,      955.716 MiB/s

The CPU is i7-11700 3.60GHz and not the same as previous results
mentioned in changelogs so the results are incomparable. Otherwise, the
updated xxhash implementation is twice as fast, no significant changes
for the rest.

Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
David Sterba 2023-02-16 04:41:27 +01:00
parent 8a60fde969
commit 3d9217f7ab

View File

@ -183,21 +183,27 @@ int main(int argc, char **argv) {
u64 cycles;
u64 time;
unsigned long cpu_flag;
void (*init_accel)(void);
} contestants[] = {
{ .name = "NULL-NOP", .digest = hash_null_nop, .digest_size = 32 },
{ .name = "NULL-MEMCPY", .digest = hash_null_memcpy, .digest_size = 32 },
{ .name = "CRC32C", .digest = hash_crc32c, .digest_size = 4 },
{ .name = "CRC32C-ref", .digest = hash_crc32c, .digest_size = 4,
.cpu_flag = CPU_FLAG_NONE, .init_accel = hash_init_crc32c },
{ .name = "CRC32C-NI", .digest = hash_crc32c, .digest_size = 4,
.cpu_flag = CPU_FLAG_SSE42, .init_accel = hash_init_crc32c },
{ .name = "XXHASH", .digest = hash_xxhash, .digest_size = 8 },
{ .name = "SHA256-ref", .digest = hash_sha256, .digest_size = 32 },
{ .name = "SHA256-ref", .digest = hash_sha256, .digest_size = 32,
.cpu_flag = CPU_FLAG_NONE, .init_accel = hash_init_sha256 },
{ .name = "SHA256-NI", .digest = hash_sha256, .digest_size = 32,
.cpu_flag = CPU_FLAG_SHA },
{ .name = "BLAKE2-ref", .digest = hash_blake2b, .digest_size = 32 },
.cpu_flag = CPU_FLAG_SHA, .init_accel = hash_init_sha256 },
{ .name = "BLAKE2-ref", .digest = hash_blake2b, .digest_size = 32,
.cpu_flag = CPU_FLAG_NONE, .init_accel = hash_init_blake2 },
{ .name = "BLAKE2-SSE2", .digest = hash_blake2b, .digest_size = 32,
.cpu_flag = CPU_FLAG_SSE2 },
.cpu_flag = CPU_FLAG_SSE2, .init_accel = hash_init_blake2 },
{ .name = "BLAKE2-SSE41", .digest = hash_blake2b, .digest_size = 32,
.cpu_flag = CPU_FLAG_SSE41 },
.cpu_flag = CPU_FLAG_SSE41, .init_accel = hash_init_blake2 },
{ .name = "BLAKE2-AVX2", .digest = hash_blake2b, .digest_size = 32,
.cpu_flag = CPU_FLAG_AVX2 },
.cpu_flag = CPU_FLAG_AVX2, .init_accel = hash_init_blake2 },
};
int units = UNITS_CYCLES;
@ -270,7 +276,10 @@ int main(int argc, char **argv) {
printf("%12s: ", c->name);
fflush(stdout);
cpu_set_level(c->cpu_flag);
if (c->cpu_flag) {
cpu_set_level(c->cpu_flag);
c->init_accel();
}
tstart = get_time();
start = get_cycles(units);
for (iter = 0; iter < iterations; iter++) {