diff --git a/Makefile.am b/Makefile.am index 9f82fde..b5f4725 100755 --- a/Makefile.am +++ b/Makefile.am @@ -845,18 +845,23 @@ endif WITH_STACK_TRACE endif WITH_DEBUGALLOC +if !MINGW +noinst_LTLIBRARIES += librun_benchmark.la +librun_benchmark_la_SOURCES = \ + benchmark/run_benchmark.c benchmark/run_benchmark.h + noinst_PROGRAMS += malloc_bench malloc_bench_shared malloc_bench_SOURCES = benchmark/malloc_bench.cc malloc_bench_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS) $(NO_BUILTIN_CXXFLAGS) malloc_bench_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS) -static -malloc_bench_LDADD = libtcmalloc_minimal.la $(PTHREAD_LIBS) +malloc_bench_LDADD = librun_benchmark.la libtcmalloc_minimal.la $(PTHREAD_LIBS) malloc_bench_shared_SOURCES = benchmark/malloc_bench.cc malloc_bench_shared_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS) $(NO_BUILTIN_CXXFLAGS) malloc_bench_shared_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS) -malloc_bench_shared_LDADD = libtcmalloc_minimal.la $(PTHREAD_LIBS) - +malloc_bench_shared_LDADD = librun_benchmark.la libtcmalloc_minimal.la $(PTHREAD_LIBS) +endif !MINGW ### ------- tcmalloc (thread-caching malloc + heap profiler + heap checker) diff --git a/benchmark/malloc_bench.cc b/benchmark/malloc_bench.cc index e5e0d38..0d9dc4c 100644 --- a/benchmark/malloc_bench.cc +++ b/benchmark/malloc_bench.cc @@ -1,93 +1,184 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + #include #include +#include + +#include "run_benchmark.h" + +static void bench_fastpath_throughput(long iterations, + uintptr_t param) +{ + size_t sz = 32; + for (; iterations>0; iterations--) { + void *p = malloc(sz); + if (!p) { + abort(); + } + free(p); + // this makes next iteration use different free list. So + // subsequent iterations may actually overlap in time. + sz = (sz & 511) + 16; + } +} + +static void bench_fastpath_dependent(long iterations, + uintptr_t param) +{ + size_t sz = 32; + for (; iterations>0; iterations--) { + void *p = malloc(sz); + if (!p) { + abort(); + } + free(p); + // this makes next iteration depend on current iteration. But this + // iteration's free may still overlap with next iteration's malloc + sz = ((sz | reinterpret_cast(p)) & 511) + 16; + } +} + +static void bench_fastpath_simple(long iterations, + uintptr_t param) +{ + size_t sz = 64; + for (; iterations>0; iterations--) { + void *p = malloc(sz); + if (!p) { + abort(); + } + free(p); + // next iteration will use same free list as this iteration. So it + // should be prevent next iterations malloc to go too far before + // free done. But using same size will make free "too fast" since + // we'll hit size class cache. + } +} + +#define STACKSZ (1 << 16) + +static void bench_fastpath_stack(long iterations, + uintptr_t _param) +{ + + void *stack[STACKSZ]; + size_t sz = 64; + long param = static_cast(_param); + param &= STACKSZ - 1; + param = param ? param : 1; + for (; iterations>0; iterations -= param) { + for (long k = param-1; k >= 0; k--) { + void *p = malloc(sz); + if (!p) { + abort(); + } + stack[k] = p; + // this makes next iteration depend on result of this iteration + sz = ((sz | reinterpret_cast(p)) & 511) + 16; + } + for (long k = 0; k < param; k++) { + free(stack[k]); + } + } +} + +static void bench_fastpath_stack_simple(long iterations, + uintptr_t _param) +{ + + void *stack[STACKSZ]; + size_t sz = 128; + long param = static_cast(_param); + param &= STACKSZ - 1; + param = param ? param : 1; + for (; iterations>0; iterations -= param) { + for (long k = param-1; k >= 0; k--) { + void *p = malloc(sz); + if (!p) { + abort(); + } + stack[k] = p; + } + for (long k = 0; k < param; k++) { + free(stack[k]); + } + } +} + +static void bench_fastpath_rnd_dependent(long iterations, + uintptr_t _param) +{ + static const uintptr_t rnd_c = 1013904223; + static const uintptr_t rnd_a = 1664525; + + void *ptrs[STACKSZ]; + size_t sz = 128; + if ((_param & (_param - 1))) { + abort(); + } + if (_param > STACKSZ) { + abort(); + } + int param = static_cast(_param); + + for (; iterations>0; iterations -= param) { + for (int k = param-1; k >= 0; k--) { + void *p = malloc(sz); + if (!p) { + abort(); + } + ptrs[k] = p; + sz = ((sz | reinterpret_cast(p)) & 511) + 16; + } + + // this will iterate through all objects in order that is + // unpredictable to processor's prefetchers + uint32_t rnd = 0; + uint32_t free_idx = 0; + do { + free(ptrs[free_idx]); + rnd = rnd * rnd_a + rnd_c; + free_idx = rnd & (param - 1); + } while (free_idx != 0); + } +} int main(void) { - long long i = 1LL<<(28-4); - size_t sz = 32; - printf("i = %lld\n", i); - for (;i>0;i--) { - void *p = malloc(sz); - if (!p) { - abort(); - } - free(p); - p = malloc(sz); - if (!p) { - abort(); - } - free(p); - p = malloc(sz); - if (!p) { - abort(); - } - free(p); - p = malloc(sz); - if (!p) { - abort(); - } - free(p); - p = malloc(sz); - if (!p) { - abort(); - } - free(p); - p = malloc(sz); - if (!p) { - abort(); - } - free(p); - p = malloc(sz); - if (!p) { - abort(); - } - free(p); - p = malloc(sz); - if (!p) { - abort(); - } - free(p); - p = malloc(sz); - if (!p) { - abort(); - } - free(p); - p = malloc(sz); - if (!p) { - abort(); - } - free(p); - p = malloc(sz); - if (!p) { - abort(); - } - free(p); - p = malloc(sz); - if (!p) { - abort(); - } - free(p); - p = malloc(sz); - if (!p) { - abort(); - } - free(p); - p = malloc(sz); - if (!p) { - abort(); - } - free(p); - p = malloc(sz); - if (!p) { - abort(); - } - free(p); - p = malloc(sz); - if (!p) { - abort(); - } - free(p); - sz = ((sz | reinterpret_cast(p)) & 511) + 16; - } - return 0; + report_benchmark("bench_fastpath_throughput", bench_fastpath_throughput, 0); + report_benchmark("bench_fastpath_dependent", bench_fastpath_dependent, 0); + report_benchmark("bench_fastpath_simple", bench_fastpath_simple, 0); + for (int i = 8; i <= 512; i <<= 1) { + report_benchmark("bench_fastpath_stack", bench_fastpath_stack, i); + } + report_benchmark("bench_fastpath_stack_simple", bench_fastpath_stack_simple, 32); + report_benchmark("bench_fastpath_stack_simple", bench_fastpath_stack_simple, 8192); + report_benchmark("bench_fastpath_rnd_dependent", bench_fastpath_rnd_dependent, 32); + report_benchmark("bench_fastpath_rnd_dependent", bench_fastpath_rnd_dependent, 8192); + return 0; } diff --git a/benchmark/run_benchmark.c b/benchmark/run_benchmark.c new file mode 100644 index 0000000..9bf04f4 --- /dev/null +++ b/benchmark/run_benchmark.c @@ -0,0 +1,112 @@ +// -*- Mode: C; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "run_benchmark.h" + +#include +#include +#include +#include + +struct internal_bench { + bench_body body; + uintptr_t param; +}; + +static void run_body(struct internal_bench *b, long iterations) +{ + b->body(iterations, b->param); +} + +static double measure_once(struct internal_bench *b, long iterations) +{ + struct timeval tv_before, tv_after; + int rv; + double time; + + rv = gettimeofday(&tv_before, NULL); + if (rv) { + perror("gettimeofday"); + abort(); + } + + run_body(b, iterations); + + rv = gettimeofday(&tv_after, NULL); + if (rv) { + perror("gettimeofday"); + abort(); + } + tv_after.tv_sec -= tv_before.tv_sec; + time = tv_after.tv_sec * 1E6 + tv_after.tv_usec; + time -= tv_before.tv_usec; + time *= 1000; + return time; +} + +#define TRIAL_NSEC 0.3E9 +#define TARGET_NSEC 3E9 + +static double run_benchmark(struct internal_bench *b) +{ + long iterations = 128; + double nsec; + while (1) { + nsec = measure_once(b, iterations); + if (nsec > TRIAL_NSEC) { + break; + } + iterations <<= 1; + } + while (nsec < TARGET_NSEC) { + iterations = (long)(iterations * TARGET_NSEC * 1.1 / nsec); + nsec = measure_once(b, iterations); + } + return nsec / iterations; +} + +void report_benchmark(const char *name, bench_body body, uintptr_t param) +{ + int i; + struct internal_bench b = {.body = body, .param = param}; + for (i = 0; i < 3; i++) { + double nsec = run_benchmark(&b); + int slen; + int padding_size; + + slen = printf("Benchmark: %s", name); + if (param && name[strlen(name)-1] != ')') { + slen += printf("(%lld)", (long long)param); + } + padding_size = 60 - slen; + if (padding_size < 1) { + padding_size = 1; + } + printf("%*c%f nsec\n", padding_size, ' ', nsec); + fflush(stdout); + } +} diff --git a/benchmark/run_benchmark.h b/benchmark/run_benchmark.h new file mode 100644 index 0000000..e030d1e --- /dev/null +++ b/benchmark/run_benchmark.h @@ -0,0 +1,43 @@ +// -*- Mode: C; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#ifndef _RUN_BENCHMARK_H_ +#define _RUN_BENCHMARK_H_ +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef void (*bench_body)(long iterations, uintptr_t param); + +void report_benchmark(const char *name, bench_body body, uintptr_t param); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // _RUN_BENCHMARK_H_