From a5d86777ceef8c0d76040f5540e446688a1dfa74 Mon Sep 17 00:00:00 2001 From: Aliaksey Kandratsenka Date: Fri, 13 Sep 2024 13:59:41 -0400 Subject: [PATCH] [malloc_bench] add rnd_dependent_8cores benchmark This benchmark exercizes multi-threaded central free list operations, and this is where we're losing to a bunch of competing malloc (i.e. which shard heap). --- benchmark/malloc_bench.cc | 48 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/benchmark/malloc_bench.cc b/benchmark/malloc_bench.cc index e87cd8f..7f862ac 100644 --- a/benchmark/malloc_bench.cc +++ b/benchmark/malloc_bench.cc @@ -33,6 +33,7 @@ #include #include #include +#include #include "run_benchmark.h" @@ -190,6 +191,50 @@ static void bench_fastpath_rnd_dependent(long iterations, } } +static void bench_fastpath_rnd_dependent_8cores(long iterations, + uintptr_t _param) +{ + static const uintptr_t rnd_c = 1013904223; + static const uintptr_t rnd_a = 1664525; + + if ((_param & (_param - 1))) { + abort(); + } + + long param = static_cast(_param); + param = std::max(1l, param); + + auto body = [iterations, param] () { + size_t sz = 128; + std::unique_ptr ptrs = std::make_unique(param); + + for (long i = iterations; i>0; i -= param) { + for (int k = param-1; k >= 0; k--) { + void *p = (operator new)(sz); + ptrs[k] = p; + sz = ((sz | reinterpret_cast(p)) & 511) + 16; + } + + // this will iterate through all objects in order that is + // unpredictable to processor's prefetchers + uint32_t rnd = 0; + uint32_t free_idx = 0; + do { + (operator delete)(ptrs[free_idx]); + rnd = rnd * rnd_a + rnd_c; + free_idx = rnd & (param - 1); + } while (free_idx != 0); + } + }; + + std::thread ts[] = { + std::thread{body}, std::thread{body}, std::thread{body}, std::thread{body}, + std::thread{body}, std::thread{body}, std::thread{body}, std::thread{body}}; + for (auto &t : ts) { + t.join(); + } +} + void randomize_one_size_class(size_t size) { size_t count = (100<<20) / size; auto randomize_buffer = std::make_unique(count); @@ -262,5 +307,8 @@ int main(int argc, char **argv) report_benchmark("bench_fastpath_rnd_dependent", bench_fastpath_rnd_dependent, 32); report_benchmark("bench_fastpath_rnd_dependent", bench_fastpath_rnd_dependent, 8192); report_benchmark("bench_fastpath_rnd_dependent", bench_fastpath_rnd_dependent, 32768); + + report_benchmark("bench_fastpath_rnd_dependent_8cores", bench_fastpath_rnd_dependent_8cores, 32768); + return 0; }