modernize malloc_bench

Instead of relying on gperftools-specific tc_XYZ functions for sized
deallocation and memalign we use standard C++ facilities. There are
also other minor improvements like mallocing larger buffers rather
than statically allocating them.
This commit is contained in:
Aliaksey Kandratsenka 2024-09-05 22:56:23 -04:00
parent f46c141b4e
commit 7fa0c2da53
2 changed files with 44 additions and 58 deletions

View File

@ -30,6 +30,8 @@
#include <stdint.h>
#include <algorithm>
#include <memory>
#include <new>
#include <random>
#include "run_benchmark.h"
@ -77,63 +79,47 @@ static void bench_fastpath_simple(long iterations,
}
}
#ifdef __GNUC__
#define HAVE_SIZED_FREE_OPTION
extern "C" void tc_delete_sized(void *ptr, size_t size) __attribute__((weak));
extern "C" void *tc_memalign(size_t align, size_t size) __attribute__((weak));
static bool is_sized_free_available(void)
{
return tc_delete_sized != NULL;
}
static bool is_memalign_available(void)
{
return tc_memalign != NULL;
}
#if __cpp_sized_deallocation
static void bench_fastpath_simple_sized(long iterations,
uintptr_t param)
{
size_t sz = static_cast<size_t>(param);
for (; iterations>0; iterations--) {
void *p = (operator new)(sz);
tc_delete_sized(p, sz);
(operator delete)(p, sz);
// next iteration will use same free list as this iteration. So it
// should be prevent next iterations malloc to go too far before
// free done. But using same size will make free "too fast" since
// we'll hit size class cache.
}
}
#endif // __cpp_sized_deallocation
#if __cpp_aligned_new
static void bench_fastpath_memalign(long iterations,
uintptr_t param)
{
size_t sz = static_cast<size_t>(param);
for (; iterations>0; iterations--) {
void *p = tc_memalign(32, sz);
free(p);
static constexpr std::align_val_t kAlign{32};
void *p = (operator new)(sz, kAlign);
(operator delete)(p, sz, kAlign);
// next iteration will use same free list as this iteration. So it
// should be prevent next iterations malloc to go too far before
// free done. But using same size will make free "too fast" since
// we'll hit size class cache.
}
}
#endif // __GNUC__
#define STACKSZ (1 << 16)
#endif // __cpp_aligned_new
static void bench_fastpath_stack(long iterations,
uintptr_t _param)
{
void *stack[STACKSZ];
size_t sz = 64;
long param = static_cast<long>(_param);
param &= STACKSZ - 1;
param = param ? param : 1;
param = std::max(1l, param);
std::unique_ptr<void*[]> stack = std::make_unique<void*[]>(param);
for (; iterations>0; iterations -= param) {
for (long k = param-1; k >= 0; k--) {
void *p = (operator new)(sz);
@ -151,18 +137,21 @@ static void bench_fastpath_stack_simple(long iterations,
uintptr_t _param)
{
void *stack[STACKSZ];
size_t sz = 128;
size_t sz = 32;
long param = static_cast<long>(_param);
param &= STACKSZ - 1;
param = param ? param : 1;
param = std::max(1l, param);
std::unique_ptr<void*[]> stack = std::make_unique<void*[]>(param);
for (; iterations>0; iterations -= param) {
for (long k = param-1; k >= 0; k--) {
void *p = (operator new)(sz);
stack[k] = p;
}
for (long k = 0; k < param; k++) {
#if __cpp_sized_deallocation
(operator delete)(stack[k], sz);
#else
(operator delete)(stack[k]);
#endif
}
}
}
@ -173,15 +162,14 @@ static void bench_fastpath_rnd_dependent(long iterations,
static const uintptr_t rnd_c = 1013904223;
static const uintptr_t rnd_a = 1664525;
void *ptrs[STACKSZ];
size_t sz = 128;
if ((_param & (_param - 1))) {
abort();
}
if (_param > STACKSZ) {
abort();
}
int param = static_cast<int>(_param);
long param = static_cast<long>(_param);
param = std::max(1l, param);
std::unique_ptr<void*[]> ptrs = std::make_unique<void*[]>(param);
for (; iterations>0; iterations -= param) {
for (int k = param-1; k >= 0; k--) {
@ -202,21 +190,18 @@ static void bench_fastpath_rnd_dependent(long iterations,
}
}
static void *randomize_buffer[13<<20];
void randomize_one_size_class(size_t size) {
int count = (100<<20) / size;
if (count * sizeof(randomize_buffer[0]) > sizeof(randomize_buffer)) {
abort();
}
for (int i = 0; i < count; i++) {
randomize_buffer[i] = malloc(size);
size_t count = (100<<20) / size;
auto randomize_buffer = std::make_unique<void*[]>(count);
for (size_t i = 0; i < count; i++) {
randomize_buffer[i] = (operator new)(size);
}
std::shuffle(randomize_buffer, randomize_buffer + count, std::minstd_rand(rand()));
for (int i = 0; i < count; i++) {
free(randomize_buffer[i]);
std::shuffle(randomize_buffer.get(), randomize_buffer.get() + count, std::minstd_rand(rand()));
for (size_t i = 0; i < count; i++) {
(operator delete)(randomize_buffer[i]);
}
}
@ -252,25 +237,26 @@ int main(void)
report_benchmark("bench_fastpath_simple", bench_fastpath_simple, 2048);
report_benchmark("bench_fastpath_simple", bench_fastpath_simple, 16384);
#ifdef HAVE_SIZED_FREE_OPTION
if (is_sized_free_available()) {
report_benchmark("bench_fastpath_simple_sized", bench_fastpath_simple_sized, 64);
report_benchmark("bench_fastpath_simple_sized", bench_fastpath_simple_sized, 2048);
}
if (is_memalign_available()) {
report_benchmark("bench_fastpath_memalign", bench_fastpath_memalign, 64);
report_benchmark("bench_fastpath_memalign", bench_fastpath_memalign, 2048);
}
#if __cpp_sized_deallocation
report_benchmark("bench_fastpath_simple_sized", bench_fastpath_simple_sized, 64);
report_benchmark("bench_fastpath_simple_sized", bench_fastpath_simple_sized, 2048);
#endif
#if __cpp_aligned_new
report_benchmark("bench_fastpath_memalign", bench_fastpath_memalign, 64);
report_benchmark("bench_fastpath_memalign", bench_fastpath_memalign, 2048);
#endif
for (int i = 8; i <= 512; i <<= 1) {
report_benchmark("bench_fastpath_stack", bench_fastpath_stack, i);
}
report_benchmark("bench_fastpath_stack_simple", bench_fastpath_stack_simple, 32);
report_benchmark("bench_fastpath_stack_simple", bench_fastpath_stack_simple, 8192);
report_benchmark("bench_fastpath_stack_simple", bench_fastpath_stack_simple, 32768);
report_benchmark("bench_fastpath_rnd_dependent", bench_fastpath_rnd_dependent, 32);
report_benchmark("bench_fastpath_rnd_dependent", bench_fastpath_rnd_dependent, 8192);
report_benchmark("bench_fastpath_rnd_dependent", bench_fastpath_rnd_dependent, 32768);
return 0;
}

View File

@ -114,7 +114,7 @@ void report_benchmark(const char *name, bench_body body, uintptr_t param)
if (padding_size < 1) {
padding_size = 1;
}
printf("%*c%f nsec\n", padding_size, ' ', nsec);
printf("%*c%f nsec (rate: %f Mops/sec)\n", padding_size, ' ', nsec, 1e9/nsec/1e6);
fflush(stdout);
}
}