From 66bdf24061a26e5c0f0c18619b06338b6925a1bb Mon Sep 17 00:00:00 2001 From: Aliaksey Kandratsenka Date: Thu, 22 Feb 2024 18:16:54 -0500 Subject: [PATCH] initialize correct MallocExtension instance early Previous implementation only had tcmalloc.cc's TCMallocGuard to register correct MallocExtension instance. Which is occasionally too late. This original design (as well as it's ancestor in abseil tcmalloc) allows malloc_extension.cc to be built and linked separately from tcmalloc. So that software that uses extended features can be linked with non-tcmalloc malloc (or, for example, asan). In our case, we don't offer such flexibility. But we choose to keep ability to (re)enable it. New implementation makes sure to register malloc extension on first call to memory allocation. Which typically happens super-early. In case malloc/operator new aren't called early enough, we make sure that first call to MallocExtension::instance invokes malloc as part of creating it's 'empty' malloc extension, and thus provoking tcmalloc (or any other malloc that chose to implement our malloc extension interface) to register it's proper MallocExtension instance. --- src/debugallocation.cc | 26 ++++-------- src/malloc_extension.cc | 92 ++++++++++++++--------------------------- src/tcmalloc.cc | 28 ++++++------- src/tcmalloc_internal.h | 7 ++++ src/thread_cache.cc | 2 + 5 files changed, 58 insertions(+), 97 deletions(-) diff --git a/src/debugallocation.cc b/src/debugallocation.cc index 98be1fa..4d358c4 100644 --- a/src/debugallocation.cc +++ b/src/debugallocation.cc @@ -1156,26 +1156,14 @@ class DebugMallocImplementation : public TCMallocImplementation { v->push_back(i); } - }; +}; -static union { - char chars[sizeof(DebugMallocImplementation)]; - void *ptr; -} debug_malloc_implementation_space; - -REGISTER_MODULE_INITIALIZER(debugallocation, { -#if (__cplusplus >= 201103L) - static_assert(alignof(decltype(debug_malloc_implementation_space)) >= alignof(DebugMallocImplementation), - "DebugMallocImplementation is expected to need just word alignment"); -#endif - // Either we or valgrind will control memory management. We - // register our extension if we're the winner. Otherwise let - // Valgrind use its own malloc (so don't register our extension). - if (!RunningOnValgrind()) { - DebugMallocImplementation *impl = new (debug_malloc_implementation_space.chars) DebugMallocImplementation(); - MallocExtension::Register(impl); - } -}); +void SetupMallocExtension() { + static struct { + alignas(DebugMallocImplementation) char memory[sizeof(DebugMallocImplementation)]; + } storage; + MallocExtension::Register(new (storage.memory) DebugMallocImplementation); +} REGISTER_MODULE_DESTRUCTOR(debugallocation, { if (!RunningOnValgrind()) { diff --git a/src/malloc_extension.cc b/src/malloc_extension.cc index 2c4f4b2..baa3361 100644 --- a/src/malloc_extension.cc +++ b/src/malloc_extension.cc @@ -32,6 +32,10 @@ // Author: Sanjay Ghemawat #include + +#include "gperftools/malloc_extension.h" +#include "gperftools/malloc_extension_c.h" + #include #include #include @@ -39,53 +43,25 @@ #include #include +#include #include "base/dynamic_annotations.h" #include "base/googleinit.h" #include "base/proc_maps_iterator.h" -#include "gperftools/malloc_extension.h" -#include "gperftools/malloc_extension_c.h" +#include "tcmalloc_internal.h" #ifndef NO_HEAP_CHECK #include "gperftools/heap-checker.h" #endif -using std::string; -using std::vector; - -static void DumpAddressMap(string* result) { +static void DumpAddressMap(std::string* result) { tcmalloc::StringGenericWriter writer(result); writer.AppendStr("\nMAPPED_LIBRARIES:\n"); tcmalloc::SaveProcSelfMaps(&writer); } -// Note: this routine is meant to be called before threads are spawned. void MallocExtension::Initialize() { - static bool initialize_called = false; - - if (initialize_called) return; - initialize_called = true; - -#ifdef __GLIBC__ - // GNU libc++ versions 3.3 and 3.4 obey the environment variables - // GLIBCPP_FORCE_NEW and GLIBCXX_FORCE_NEW respectively. Setting - // one of these variables forces the STL default allocator to call - // new() or delete() for each allocation or deletion. Otherwise - // the STL allocator tries to avoid the high cost of doing - // allocations by pooling memory internally. However, tcmalloc - // does allocations really fast, especially for the types of small - // items one sees in STL, so it's better off just using us. - // TODO: control whether we do this via an environment variable? - setenv("GLIBCPP_FORCE_NEW", "1", false /* no overwrite*/); - setenv("GLIBCXX_FORCE_NEW", "1", false /* no overwrite*/); - - // Now we need to make the setenv 'stick', which it may not do since - // the env is flakey before main() is called. But luckily stl only - // looks at this env var the first time it tries to do an alloc, and - // caches what it finds. So we just cause an stl alloc here. - string dummy("I need to be allocated"); - dummy += "!"; // so the definition of dummy isn't optimized out -#endif /* __GLIBC__ */ + // no-op } // SysAllocator implementation @@ -173,7 +149,7 @@ MallocExtension::Ownership MallocExtension::GetOwnership(const void* p) { } void MallocExtension::GetFreeListSizes( - vector* v) { + std::vector* v) { v->clear(); } @@ -187,40 +163,32 @@ void MallocExtension::MarkThreadTemporarilyIdle() { // The current malloc extension object. -static MallocExtension* current_instance; - -static union { - char chars[sizeof(MallocExtension)]; - void *ptr; -} mallocextension_implementation_space; - -static void InitModule() { - if (current_instance != NULL) { - return; - } - current_instance = new (mallocextension_implementation_space.chars) MallocExtension(); -#ifndef NO_HEAP_CHECK - HeapLeakChecker::IgnoreObject(current_instance); -#endif -} - -REGISTER_MODULE_INITIALIZER(malloc_extension_init, InitModule()) +static std::atomic current_instance; MallocExtension* MallocExtension::instance() { - InitModule(); - return current_instance; + MallocExtension* inst = current_instance.load(std::memory_order_relaxed); + if (PREDICT_FALSE(!inst)) { + // Note, we expect the 'new' call to trigger malloc + // initialization. Which will call MallocExtension::Register and + // set right value of current_instance. So we check for that. + MallocExtension* candidate = new MallocExtension; + inst = current_instance.load(); + if (!inst) { + Register(candidate); + } else { + delete candidate; + } + } + + return inst; } void MallocExtension::Register(MallocExtension* implementation) { - InitModule(); - // When running under valgrind, our custom malloc is replaced with - // valgrind's one and malloc extensions will not work. (Note: - // callers should be responsible for checking that they are the - // malloc that is really being run, before calling Register. This - // is just here as an extra sanity check.) - if (!RunningOnValgrind()) { - current_instance = implementation; - } + current_instance.store(implementation); + +#ifndef NO_HEAP_CHECK + HeapLeakChecker::IgnoreObject(implementation); +#endif } // ----------------------------------------------------------------------- diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc index 3aed87a..b04c949 100644 --- a/src/tcmalloc.cc +++ b/src/tcmalloc.cc @@ -1087,23 +1087,8 @@ TCMallocGuard::TCMallocGuard() { #ifndef WIN32_OVERRIDE_ALLOCATORS ReplaceSystemAlloc(); // defined in libc_override_*.h + (void)MallocExtension::instance(); // make sure malloc extension is constructed tc_free(tc_malloc(1)); - // Either we, or debugallocation.cc, or valgrind will control memory - // management. We register our extension if we're the winner. -#ifdef TCMALLOC_USING_DEBUGALLOCATION - // Let debugallocation register its extension. -#else - if (RunningOnValgrind()) { - // Let Valgrind uses its own malloc (so don't register our extension). - } else { - static union { - char chars[sizeof(TCMallocImplementation)]; - void *ptr; - } tcmallocimplementation_space; - - MallocExtension::Register(new (tcmallocimplementation_space.chars) TCMallocImplementation()); - } -#endif // !TCMALLOC_USING_DEBUGALLOCATION #endif // !WIN32_OVERRIDE_ALLOCATORS ThreadCachePtr::InitThreadCachePtrLate(); @@ -1127,6 +1112,17 @@ TCMallocGuard::~TCMallocGuard() { static TCMallocGuard module_enter_exit_hook; +#ifndef TCMALLOC_USING_DEBUGALLOCATION + +void SetupMallocExtension() { + static struct { + alignas(TCMallocImplementation) char memory[sizeof(TCMallocImplementation)]; + } storage; + MallocExtension::Register(new (storage.memory) TCMallocImplementation); +} + +#endif // TCMALLOC_USING_DEBUGALLOCATION + //------------------------------------------------------------------- // Helpers for the exported routines below //------------------------------------------------------------------- diff --git a/src/tcmalloc_internal.h b/src/tcmalloc_internal.h index 016c805..eece1b5 100644 --- a/src/tcmalloc_internal.h +++ b/src/tcmalloc_internal.h @@ -46,6 +46,10 @@ #include // for memalign, valloc, pvalloc #endif +#include + +#include "base/basictypes.h" + // __THROW is defined in glibc systems. It means, counter-intuitively, // "This function will never throw an exception." It's an optional // optimization tool, but we may need to use it to match glibc prototypes. @@ -68,3 +72,6 @@ extern "C" void* valloc(size_t __size) __THROW; #if !HAVE_DECL_PVALLOC extern "C" void* pvalloc(size_t __size) __THROW; #endif + +// Implemented in tcmalloc.cc or debugallocation.cc +ATTRIBUTE_HIDDEN void SetupMallocExtension(); diff --git a/src/thread_cache.cc b/src/thread_cache.cc index 90114b4..ba40e61 100644 --- a/src/thread_cache.cc +++ b/src/thread_cache.cc @@ -44,6 +44,7 @@ #include "base/spinlock.h" // for SpinLockHolder #include "central_freelist.h" #include "getenv_safe.h" // for TCMallocGetenvSafe +#include "tcmalloc_internal.h" #include "thread_cache_ptr.h" using std::min; @@ -293,6 +294,7 @@ void ThreadCache::InitModule() { } Static::InitStaticVars(); threadcache_allocator.Init(); + SetupMallocExtension(); phinited = 1; }