From e7297f0c14b0c09ec52707ed7477c7aee43a1882 Mon Sep 17 00:00:00 2001 From: Aliaksey Kandratsenka Date: Sat, 15 Mar 2014 12:44:11 -0700 Subject: [PATCH] speed up MallocExtension::instance() It was reported that pthread_once is expensive, especially on ppc. In new implementation in hot path instead of doing potentially expensive atomic read with barrier, we do just plain read. It's slightly less robust than older implementation, but it should be faster. New code is making assumption that programs do not spawn threads before main() is called. And therefore all variables & modules are initialized before threads are created. Which looks like pretty safe assumption. With that assumption, doing plain read is safe, because current_instance is initialized as part of module init and therefore before threads are spawned. This patch is based on feedback of Adhemerval Zanella. --- src/malloc_extension.cc | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/malloc_extension.cc b/src/malloc_extension.cc index 5ac337f..4ff719c 100644 --- a/src/malloc_extension.cc +++ b/src/malloc_extension.cc @@ -51,6 +51,7 @@ #include "gperftools/malloc_extension.h" #include "gperftools/malloc_extension_c.h" #include "maybe_threads.h" +#include "base/googleinit.h" using STL_NAMESPACE::string; using STL_NAMESPACE::vector; @@ -194,23 +195,27 @@ void MallocExtension::GetFreeListSizes( // The current malloc extension object. -static pthread_once_t module_init = PTHREAD_ONCE_INIT; -static MallocExtension* current_instance = NULL; +static MallocExtension* current_instance; static void InitModule() { + if (current_instance != NULL) { + return; + } current_instance = new MallocExtension; #ifndef NO_HEAP_CHECK HeapLeakChecker::IgnoreObject(current_instance); #endif } +REGISTER_MODULE_INITIALIZER(malloc_extension_init, InitModule()) + MallocExtension* MallocExtension::instance() { - perftools_pthread_once(&module_init, InitModule); + InitModule(); return current_instance; } void MallocExtension::Register(MallocExtension* implementation) { - perftools_pthread_once(&module_init, InitModule); + InitModule(); // When running under valgrind, our custom malloc is replaced with // valgrind's one and malloc extensions will not work. (Note: // callers should be responsible for checking that they are the