btrfs-progs: crypto: add perf support to speed test
Use perf events to read the cycle count, this should work on all architectures. Enabled by option --perf and the sysctl kernel.perf_event_paranoid must be 0 or 1. The results are roughly the same as for raw cycles on x86_64 but worse because of the additional overhead (read, context switch): Block size: 4096 Iterations: 100000 Implementation: builtin Units: CPU cycles NULL-NOP: cycles: 42719688, cycles/i 427 NULL-MEMCPY: cycles: 72941208, cycles/i 729, 18670.314 MiB/s CRC32C: cycles: 183709926, cycles/i 1837, 7413.009 MiB/s XXHASH: cycles: 136727614, cycles/i 1367, 9960.264 MiB/s SHA256: cycles: 10711594532, cycles/i 107115, 127.137 MiB/s BLAKE2: cycles: 2256957529, cycles/i 22569, 603.398 MiB/s Block size: 4096 Iterations: 100000 Implementation: builtin Units: perf event: CPU cycles NULL-NOP: perf_c: 29649530, perf_c/i 296 NULL-MEMCPY: perf_c: 59954062, perf_c/i 599, 15137.464 MiB/s CRC32C: perf_c: 179009071, perf_c/i 1790, 6929.460 MiB/s XXHASH: perf_c: 136413509, perf_c/i 1364, 9982.950 MiB/s SHA256: perf_c: 10997356664, perf_c/i 109973, 127.046 MiB/s BLAKE2: perf_c: 2379077576, perf_c/i 23790, 588.780 MiB/s Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
parent
1d4bab875a
commit
9527bc0649
|
@ -60,6 +60,9 @@ AC_CHECK_FUNCS([reallocarray])
|
||||||
|
|
||||||
AC_CHECK_FUNCS([clock_gettime])
|
AC_CHECK_FUNCS([clock_gettime])
|
||||||
|
|
||||||
|
AC_CHECK_HEADERS([linux/perf_event.h])
|
||||||
|
AC_CHECK_HEADERS([linux/hw_breakpoint.h])
|
||||||
|
|
||||||
m4_ifndef([PKG_PROG_PKG_CONFIG],
|
m4_ifndef([PKG_PROG_PKG_CONFIG],
|
||||||
[m4_fatal([Could not locate the pkg-config autoconf
|
[m4_fatal([Could not locate the pkg-config autoconf
|
||||||
macros. These are usually located in /usr/share/aclocal/pkg.m4.
|
macros. These are usually located in /usr/share/aclocal/pkg.m4.
|
||||||
|
|
|
@ -1,6 +1,13 @@
|
||||||
#include "../kerncompat.h"
|
#include "../kerncompat.h"
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
#include <getopt.h>
|
#include <getopt.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#if HAVE_LINUX_PERF_EVENT_H == 1 && HAVE_LINUX_HW_BREAKPOINT_H == 1
|
||||||
|
#include <linux/perf_event.h>
|
||||||
|
#include <linux/hw_breakpoint.h>
|
||||||
|
#include <sys/syscall.h>
|
||||||
|
#define HAVE_PERF
|
||||||
|
#endif
|
||||||
#include "crypto/hash.h"
|
#include "crypto/hash.h"
|
||||||
#include "crypto/crc32c.h"
|
#include "crypto/crc32c.h"
|
||||||
#include "crypto/sha.h"
|
#include "crypto/sha.h"
|
||||||
|
@ -12,6 +19,12 @@ static const int cycles_supported = 1;
|
||||||
static const int cycles_supported = 0;
|
static const int cycles_supported = 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
enum {
|
||||||
|
UNITS_CYCLES,
|
||||||
|
UNITS_TIME,
|
||||||
|
UNITS_PERF,
|
||||||
|
};
|
||||||
|
|
||||||
const int blocksize = 4096;
|
const int blocksize = 4096;
|
||||||
int iterations = 100000;
|
int iterations = 100000;
|
||||||
|
|
||||||
|
@ -31,14 +44,56 @@ static inline u64 read_tsc(void)
|
||||||
return rdtsc();
|
return rdtsc();
|
||||||
}
|
}
|
||||||
|
|
||||||
#define get_cycles() read_tsc()
|
#define cpu_cycles() read_tsc()
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#define get_cycles() (0)
|
#define cpu_cycles() (0)
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef HAVE_PERF
|
||||||
|
|
||||||
|
static int perf_fd = -1;
|
||||||
|
static int perf_init(void)
|
||||||
|
{
|
||||||
|
static struct perf_event_attr attr = {
|
||||||
|
.type = PERF_TYPE_HARDWARE,
|
||||||
|
.config = PERF_COUNT_HW_CPU_CYCLES
|
||||||
|
};
|
||||||
|
|
||||||
|
perf_fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0);
|
||||||
|
return perf_fd;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void perf_finish(void)
|
||||||
|
{
|
||||||
|
close(perf_fd);
|
||||||
|
}
|
||||||
|
|
||||||
|
static long long perf_cycles(void)
|
||||||
|
{
|
||||||
|
long long cycles;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = read(perf_fd, &cycles, sizeof(cycles));
|
||||||
|
if (ret != sizeof(cycles))
|
||||||
|
return 0;
|
||||||
|
return cycles;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
static int perf_init()
|
||||||
|
{
|
||||||
|
errno = EOPNOTSUPP;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
static void perf_finish() {}
|
||||||
|
static long long perf_cycles() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static inline u64 get_time(void)
|
static inline u64 get_time(void)
|
||||||
{
|
{
|
||||||
struct timespec ts;
|
struct timespec ts;
|
||||||
|
@ -47,6 +102,16 @@ static inline u64 get_time(void)
|
||||||
return ts.tv_sec * 1000 * 1000 * 1000 + ts.tv_nsec;
|
return ts.tv_sec * 1000 * 1000 * 1000 + ts.tv_nsec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline u64 get_cycles(int units)
|
||||||
|
{
|
||||||
|
switch (units) {
|
||||||
|
case UNITS_CYCLES: return cpu_cycles();
|
||||||
|
case UNITS_TIME: return get_time();
|
||||||
|
case UNITS_PERF: return perf_cycles();
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* Read the input and copy last bytes as the hash */
|
/* Read the input and copy last bytes as the hash */
|
||||||
static int hash_null_memcpy(const u8 *buf, size_t length, u8 *out)
|
static int hash_null_memcpy(const u8 *buf, size_t length, u8 *out)
|
||||||
{
|
{
|
||||||
|
@ -68,11 +133,22 @@ static int hash_null_nop(const u8 *buf, size_t length, u8 *out)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *units_to_str(int units)
|
static const char *units_to_desc(int units)
|
||||||
{
|
{
|
||||||
switch (units) {
|
switch (units) {
|
||||||
case 0: return "cycles";
|
case UNITS_CYCLES: return "CPU cycles";
|
||||||
case 1: return "nsecs";
|
case UNITS_TIME: return "time: ns";
|
||||||
|
case UNITS_PERF: return "perf event: CPU cycles";
|
||||||
|
}
|
||||||
|
return "unknown";
|
||||||
|
}
|
||||||
|
|
||||||
|
static const char *units_to_str(int units)
|
||||||
|
{
|
||||||
|
switch (units) {
|
||||||
|
case UNITS_CYCLES: return "cycles";
|
||||||
|
case UNITS_TIME: return "nsecs";
|
||||||
|
case UNITS_PERF: return "perf_c";
|
||||||
}
|
}
|
||||||
return "unknown";
|
return "unknown";
|
||||||
}
|
}
|
||||||
|
@ -96,18 +172,19 @@ int main(int argc, char **argv) {
|
||||||
{ .name = "SHA256", .digest = hash_sha256, .digest_size = 32 },
|
{ .name = "SHA256", .digest = hash_sha256, .digest_size = 32 },
|
||||||
{ .name = "BLAKE2", .digest = hash_blake2b, .digest_size = 32 },
|
{ .name = "BLAKE2", .digest = hash_blake2b, .digest_size = 32 },
|
||||||
};
|
};
|
||||||
int units = 0;
|
int units = UNITS_CYCLES;
|
||||||
|
|
||||||
optind = 0;
|
optind = 0;
|
||||||
while (1) {
|
while (1) {
|
||||||
static const struct option long_options[] = {
|
static const struct option long_options[] = {
|
||||||
{ "cycles", no_argument, NULL, 'c' },
|
{ "cycles", no_argument, NULL, 'c' },
|
||||||
{ "time", no_argument, NULL, 't' },
|
{ "time", no_argument, NULL, 't' },
|
||||||
|
{ "perf", no_argument, NULL, 'p' },
|
||||||
{ NULL, 0, NULL, 0}
|
{ NULL, 0, NULL, 0}
|
||||||
};
|
};
|
||||||
int c;
|
int c;
|
||||||
|
|
||||||
c = getopt_long(argc, argv, "ct", long_options, NULL);
|
c = getopt_long(argc, argv, "ctp", long_options, NULL);
|
||||||
if (c < 0)
|
if (c < 0)
|
||||||
break;
|
break;
|
||||||
switch (c) {
|
switch (c) {
|
||||||
|
@ -117,10 +194,18 @@ int main(int argc, char **argv) {
|
||||||
"ERROR: cannot measure cycles on this arch, use --time\n");
|
"ERROR: cannot measure cycles on this arch, use --time\n");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
units = 0;
|
units = UNITS_CYCLES;
|
||||||
break;
|
break;
|
||||||
case 't':
|
case 't':
|
||||||
units = 1;
|
units = UNITS_TIME;
|
||||||
|
break;
|
||||||
|
case 'p':
|
||||||
|
if (perf_init() == -1) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"ERROR: cannot initialize perf, please check sysctl kernel.perf_event_paranoid: %m\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
units = UNITS_PERF;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
fprintf(stderr, "ERROR: unknown option\n");
|
fprintf(stderr, "ERROR: unknown option\n");
|
||||||
|
@ -140,33 +225,33 @@ int main(int argc, char **argv) {
|
||||||
printf("Block size: %d\n", blocksize);
|
printf("Block size: %d\n", blocksize);
|
||||||
printf("Iterations: %d\n", iterations);
|
printf("Iterations: %d\n", iterations);
|
||||||
printf("Implementation: %s\n", CRYPTOPROVIDER);
|
printf("Implementation: %s\n", CRYPTOPROVIDER);
|
||||||
printf("Units: %s\n", units_to_str(units));
|
printf("Units: %s\n", units_to_desc(units));
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
|
||||||
for (idx = 0; idx < ARRAY_SIZE(contestants); idx++) {
|
for (idx = 0; idx < ARRAY_SIZE(contestants); idx++) {
|
||||||
struct contestant *c = &contestants[idx];
|
struct contestant *c = &contestants[idx];
|
||||||
u64 start, end;
|
u64 start, end;
|
||||||
u64 tstart, tend;
|
u64 tstart, tend;
|
||||||
u64 total;
|
u64 total = 0;
|
||||||
|
|
||||||
printf("%12s: ", c->name);
|
printf("%12s: ", c->name);
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
|
|
||||||
tstart = get_time();
|
tstart = get_time();
|
||||||
start = get_cycles();
|
start = get_cycles(units);
|
||||||
for (iter = 0; iter < iterations; iter++) {
|
for (iter = 0; iter < iterations; iter++) {
|
||||||
memset(buf, iter & 0xFF, blocksize);
|
memset(buf, iter & 0xFF, blocksize);
|
||||||
memset(hash, 0, 32);
|
memset(hash, 0, 32);
|
||||||
c->digest(buf, blocksize, hash);
|
c->digest(buf, blocksize, hash);
|
||||||
}
|
}
|
||||||
end = get_cycles();
|
end = get_cycles(units);
|
||||||
tend = get_time();
|
tend = get_time();
|
||||||
c->cycles = end - start;
|
c->cycles = end - start;
|
||||||
c->time = tend - tstart;
|
c->time = tend - tstart;
|
||||||
|
|
||||||
if (units == 0)
|
if (units == UNITS_CYCLES || units == UNITS_PERF)
|
||||||
total = c->cycles;
|
total = c->cycles;
|
||||||
if (units == 1)
|
if (units == UNITS_TIME)
|
||||||
total = c->time;
|
total = c->time;
|
||||||
|
|
||||||
printf("%s: %12llu, %s/i %8llu",
|
printf("%s: %12llu, %s/i %8llu",
|
||||||
|
@ -182,6 +267,7 @@ int main(int argc, char **argv) {
|
||||||
}
|
}
|
||||||
putchar('\n');
|
putchar('\n');
|
||||||
}
|
}
|
||||||
|
perf_finish();
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue