mirror of
https://github.com/gperftools/gperftools
synced 2025-03-25 04:19:22 +00:00
added more fastpath microbenchmarks
This also makes them output nicer results. I.e. every benchmark is run 3 times and iteration duration is printed for every run. While this is still very synthetic and unrepresentave of malloc performance as a whole, it is exercising more situations in tcmalloc fastpath. So it a step forward.
This commit is contained in:
parent
347a830689
commit
962aa53c55
11
Makefile.am
11
Makefile.am
@ -845,18 +845,23 @@ endif WITH_STACK_TRACE
|
||||
|
||||
endif WITH_DEBUGALLOC
|
||||
|
||||
if !MINGW
|
||||
noinst_LTLIBRARIES += librun_benchmark.la
|
||||
librun_benchmark_la_SOURCES = \
|
||||
benchmark/run_benchmark.c benchmark/run_benchmark.h
|
||||
|
||||
noinst_PROGRAMS += malloc_bench malloc_bench_shared
|
||||
|
||||
malloc_bench_SOURCES = benchmark/malloc_bench.cc
|
||||
malloc_bench_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS) $(NO_BUILTIN_CXXFLAGS)
|
||||
malloc_bench_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS) -static
|
||||
malloc_bench_LDADD = libtcmalloc_minimal.la $(PTHREAD_LIBS)
|
||||
malloc_bench_LDADD = librun_benchmark.la libtcmalloc_minimal.la $(PTHREAD_LIBS)
|
||||
|
||||
malloc_bench_shared_SOURCES = benchmark/malloc_bench.cc
|
||||
malloc_bench_shared_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS) $(NO_BUILTIN_CXXFLAGS)
|
||||
malloc_bench_shared_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
|
||||
malloc_bench_shared_LDADD = libtcmalloc_minimal.la $(PTHREAD_LIBS)
|
||||
|
||||
malloc_bench_shared_LDADD = librun_benchmark.la libtcmalloc_minimal.la $(PTHREAD_LIBS)
|
||||
endif !MINGW
|
||||
|
||||
### ------- tcmalloc (thread-caching malloc + heap profiler + heap checker)
|
||||
|
||||
|
@ -1,93 +1,184 @@
|
||||
// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "run_benchmark.h"
|
||||
|
||||
static void bench_fastpath_throughput(long iterations,
|
||||
uintptr_t param)
|
||||
{
|
||||
size_t sz = 32;
|
||||
for (; iterations>0; iterations--) {
|
||||
void *p = malloc(sz);
|
||||
if (!p) {
|
||||
abort();
|
||||
}
|
||||
free(p);
|
||||
// this makes next iteration use different free list. So
|
||||
// subsequent iterations may actually overlap in time.
|
||||
sz = (sz & 511) + 16;
|
||||
}
|
||||
}
|
||||
|
||||
static void bench_fastpath_dependent(long iterations,
|
||||
uintptr_t param)
|
||||
{
|
||||
size_t sz = 32;
|
||||
for (; iterations>0; iterations--) {
|
||||
void *p = malloc(sz);
|
||||
if (!p) {
|
||||
abort();
|
||||
}
|
||||
free(p);
|
||||
// this makes next iteration depend on current iteration. But this
|
||||
// iteration's free may still overlap with next iteration's malloc
|
||||
sz = ((sz | reinterpret_cast<size_t>(p)) & 511) + 16;
|
||||
}
|
||||
}
|
||||
|
||||
static void bench_fastpath_simple(long iterations,
|
||||
uintptr_t param)
|
||||
{
|
||||
size_t sz = 64;
|
||||
for (; iterations>0; iterations--) {
|
||||
void *p = malloc(sz);
|
||||
if (!p) {
|
||||
abort();
|
||||
}
|
||||
free(p);
|
||||
// next iteration will use same free list as this iteration. So it
|
||||
// should be prevent next iterations malloc to go too far before
|
||||
// free done. But using same size will make free "too fast" since
|
||||
// we'll hit size class cache.
|
||||
}
|
||||
}
|
||||
|
||||
#define STACKSZ (1 << 16)
|
||||
|
||||
static void bench_fastpath_stack(long iterations,
|
||||
uintptr_t _param)
|
||||
{
|
||||
|
||||
void *stack[STACKSZ];
|
||||
size_t sz = 64;
|
||||
long param = static_cast<long>(_param);
|
||||
param &= STACKSZ - 1;
|
||||
param = param ? param : 1;
|
||||
for (; iterations>0; iterations -= param) {
|
||||
for (long k = param-1; k >= 0; k--) {
|
||||
void *p = malloc(sz);
|
||||
if (!p) {
|
||||
abort();
|
||||
}
|
||||
stack[k] = p;
|
||||
// this makes next iteration depend on result of this iteration
|
||||
sz = ((sz | reinterpret_cast<size_t>(p)) & 511) + 16;
|
||||
}
|
||||
for (long k = 0; k < param; k++) {
|
||||
free(stack[k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void bench_fastpath_stack_simple(long iterations,
|
||||
uintptr_t _param)
|
||||
{
|
||||
|
||||
void *stack[STACKSZ];
|
||||
size_t sz = 128;
|
||||
long param = static_cast<long>(_param);
|
||||
param &= STACKSZ - 1;
|
||||
param = param ? param : 1;
|
||||
for (; iterations>0; iterations -= param) {
|
||||
for (long k = param-1; k >= 0; k--) {
|
||||
void *p = malloc(sz);
|
||||
if (!p) {
|
||||
abort();
|
||||
}
|
||||
stack[k] = p;
|
||||
}
|
||||
for (long k = 0; k < param; k++) {
|
||||
free(stack[k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void bench_fastpath_rnd_dependent(long iterations,
|
||||
uintptr_t _param)
|
||||
{
|
||||
static const uintptr_t rnd_c = 1013904223;
|
||||
static const uintptr_t rnd_a = 1664525;
|
||||
|
||||
void *ptrs[STACKSZ];
|
||||
size_t sz = 128;
|
||||
if ((_param & (_param - 1))) {
|
||||
abort();
|
||||
}
|
||||
if (_param > STACKSZ) {
|
||||
abort();
|
||||
}
|
||||
int param = static_cast<int>(_param);
|
||||
|
||||
for (; iterations>0; iterations -= param) {
|
||||
for (int k = param-1; k >= 0; k--) {
|
||||
void *p = malloc(sz);
|
||||
if (!p) {
|
||||
abort();
|
||||
}
|
||||
ptrs[k] = p;
|
||||
sz = ((sz | reinterpret_cast<size_t>(p)) & 511) + 16;
|
||||
}
|
||||
|
||||
// this will iterate through all objects in order that is
|
||||
// unpredictable to processor's prefetchers
|
||||
uint32_t rnd = 0;
|
||||
uint32_t free_idx = 0;
|
||||
do {
|
||||
free(ptrs[free_idx]);
|
||||
rnd = rnd * rnd_a + rnd_c;
|
||||
free_idx = rnd & (param - 1);
|
||||
} while (free_idx != 0);
|
||||
}
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
long long i = 1LL<<(28-4);
|
||||
size_t sz = 32;
|
||||
printf("i = %lld\n", i);
|
||||
for (;i>0;i--) {
|
||||
void *p = malloc(sz);
|
||||
if (!p) {
|
||||
abort();
|
||||
}
|
||||
free(p);
|
||||
p = malloc(sz);
|
||||
if (!p) {
|
||||
abort();
|
||||
}
|
||||
free(p);
|
||||
p = malloc(sz);
|
||||
if (!p) {
|
||||
abort();
|
||||
}
|
||||
free(p);
|
||||
p = malloc(sz);
|
||||
if (!p) {
|
||||
abort();
|
||||
}
|
||||
free(p);
|
||||
p = malloc(sz);
|
||||
if (!p) {
|
||||
abort();
|
||||
}
|
||||
free(p);
|
||||
p = malloc(sz);
|
||||
if (!p) {
|
||||
abort();
|
||||
}
|
||||
free(p);
|
||||
p = malloc(sz);
|
||||
if (!p) {
|
||||
abort();
|
||||
}
|
||||
free(p);
|
||||
p = malloc(sz);
|
||||
if (!p) {
|
||||
abort();
|
||||
}
|
||||
free(p);
|
||||
p = malloc(sz);
|
||||
if (!p) {
|
||||
abort();
|
||||
}
|
||||
free(p);
|
||||
p = malloc(sz);
|
||||
if (!p) {
|
||||
abort();
|
||||
}
|
||||
free(p);
|
||||
p = malloc(sz);
|
||||
if (!p) {
|
||||
abort();
|
||||
}
|
||||
free(p);
|
||||
p = malloc(sz);
|
||||
if (!p) {
|
||||
abort();
|
||||
}
|
||||
free(p);
|
||||
p = malloc(sz);
|
||||
if (!p) {
|
||||
abort();
|
||||
}
|
||||
free(p);
|
||||
p = malloc(sz);
|
||||
if (!p) {
|
||||
abort();
|
||||
}
|
||||
free(p);
|
||||
p = malloc(sz);
|
||||
if (!p) {
|
||||
abort();
|
||||
}
|
||||
free(p);
|
||||
p = malloc(sz);
|
||||
if (!p) {
|
||||
abort();
|
||||
}
|
||||
free(p);
|
||||
sz = ((sz | reinterpret_cast<size_t>(p)) & 511) + 16;
|
||||
}
|
||||
return 0;
|
||||
report_benchmark("bench_fastpath_throughput", bench_fastpath_throughput, 0);
|
||||
report_benchmark("bench_fastpath_dependent", bench_fastpath_dependent, 0);
|
||||
report_benchmark("bench_fastpath_simple", bench_fastpath_simple, 0);
|
||||
for (int i = 8; i <= 512; i <<= 1) {
|
||||
report_benchmark("bench_fastpath_stack", bench_fastpath_stack, i);
|
||||
}
|
||||
report_benchmark("bench_fastpath_stack_simple", bench_fastpath_stack_simple, 32);
|
||||
report_benchmark("bench_fastpath_stack_simple", bench_fastpath_stack_simple, 8192);
|
||||
report_benchmark("bench_fastpath_rnd_dependent", bench_fastpath_rnd_dependent, 32);
|
||||
report_benchmark("bench_fastpath_rnd_dependent", bench_fastpath_rnd_dependent, 8192);
|
||||
return 0;
|
||||
}
|
||||
|
112
benchmark/run_benchmark.c
Normal file
112
benchmark/run_benchmark.c
Normal file
@ -0,0 +1,112 @@
|
||||
// -*- Mode: C; c-basic-offset: 2; indent-tabs-mode: nil -*-
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include "run_benchmark.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
struct internal_bench {
|
||||
bench_body body;
|
||||
uintptr_t param;
|
||||
};
|
||||
|
||||
static void run_body(struct internal_bench *b, long iterations)
|
||||
{
|
||||
b->body(iterations, b->param);
|
||||
}
|
||||
|
||||
static double measure_once(struct internal_bench *b, long iterations)
|
||||
{
|
||||
struct timeval tv_before, tv_after;
|
||||
int rv;
|
||||
double time;
|
||||
|
||||
rv = gettimeofday(&tv_before, NULL);
|
||||
if (rv) {
|
||||
perror("gettimeofday");
|
||||
abort();
|
||||
}
|
||||
|
||||
run_body(b, iterations);
|
||||
|
||||
rv = gettimeofday(&tv_after, NULL);
|
||||
if (rv) {
|
||||
perror("gettimeofday");
|
||||
abort();
|
||||
}
|
||||
tv_after.tv_sec -= tv_before.tv_sec;
|
||||
time = tv_after.tv_sec * 1E6 + tv_after.tv_usec;
|
||||
time -= tv_before.tv_usec;
|
||||
time *= 1000;
|
||||
return time;
|
||||
}
|
||||
|
||||
#define TRIAL_NSEC 0.3E9
|
||||
#define TARGET_NSEC 3E9
|
||||
|
||||
static double run_benchmark(struct internal_bench *b)
|
||||
{
|
||||
long iterations = 128;
|
||||
double nsec;
|
||||
while (1) {
|
||||
nsec = measure_once(b, iterations);
|
||||
if (nsec > TRIAL_NSEC) {
|
||||
break;
|
||||
}
|
||||
iterations <<= 1;
|
||||
}
|
||||
while (nsec < TARGET_NSEC) {
|
||||
iterations = (long)(iterations * TARGET_NSEC * 1.1 / nsec);
|
||||
nsec = measure_once(b, iterations);
|
||||
}
|
||||
return nsec / iterations;
|
||||
}
|
||||
|
||||
void report_benchmark(const char *name, bench_body body, uintptr_t param)
|
||||
{
|
||||
int i;
|
||||
struct internal_bench b = {.body = body, .param = param};
|
||||
for (i = 0; i < 3; i++) {
|
||||
double nsec = run_benchmark(&b);
|
||||
int slen;
|
||||
int padding_size;
|
||||
|
||||
slen = printf("Benchmark: %s", name);
|
||||
if (param && name[strlen(name)-1] != ')') {
|
||||
slen += printf("(%lld)", (long long)param);
|
||||
}
|
||||
padding_size = 60 - slen;
|
||||
if (padding_size < 1) {
|
||||
padding_size = 1;
|
||||
}
|
||||
printf("%*c%f nsec\n", padding_size, ' ', nsec);
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
43
benchmark/run_benchmark.h
Normal file
43
benchmark/run_benchmark.h
Normal file
@ -0,0 +1,43 @@
|
||||
// -*- Mode: C; c-basic-offset: 2; indent-tabs-mode: nil -*-
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#ifndef _RUN_BENCHMARK_H_
|
||||
#define _RUN_BENCHMARK_H_
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef void (*bench_body)(long iterations, uintptr_t param);
|
||||
|
||||
void report_benchmark(const char *name, bench_body body, uintptr_t param);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // _RUN_BENCHMARK_H_
|
Loading…
Reference in New Issue
Block a user