From b8f9d0d44f94177d34b069180618b7d002e85b69 Mon Sep 17 00:00:00 2001 From: Aliaksey Kandratsenka Date: Sun, 18 Dec 2016 09:35:02 -0800 Subject: [PATCH] ported nallocx support from Google-internal tcmalloc nallocx is extension introduced by jemalloc. It returns effective size of allocaiton without allocating anything. We also support MALLOCX_LG_ALIGN flag. But all other jemalloc flags (which at the moment do nothing for nallocx anyways) are silently ignored, since there is no sensible way to return errors in this API. This was originally contributed by Dmitry Vyukov with input from Andrew Hunter. But due to significant divergence of Google-internal and free-software forks of tcmalloc, significant massaging was done by me. So all bugs are mine. --- Makefile.am | 5 +- src/gperftools/nallocx.h | 37 +++++++++ src/tcmalloc.cc | 133 ++++++++++++++++++++++++--------- src/tests/tcmalloc_unittest.cc | 47 ++++++++++++ 4 files changed, 184 insertions(+), 38 deletions(-) create mode 100644 src/gperftools/nallocx.h diff --git a/Makefile.am b/Makefile.am index ccbf009..4a281f4 100755 --- a/Makefile.am +++ b/Makefile.am @@ -101,7 +101,7 @@ endif MINGW if HAVE_OBJCOPY_WEAKEN WEAKEN = $(OBJCOPY) -W malloc -W free -W realloc -W calloc -W cfree \ -W memalign -W posix_memalign -W valloc -W pvalloc \ - -W malloc_stats -W mallopt -W mallinfo \ + -W malloc_stats -W mallopt -W mallinfo -W nallocx \ -W _Znwm -W _ZnwmRKSt9nothrow_t -W _Znam -W _ZnamRKSt9nothrow_t \ -W _ZdlPv -W _ZdaPv \ -W __Znwm -W __ZnwmRKSt9nothrow_t -W __Znam -W __ZnamRKSt9nothrow_t \ @@ -457,7 +457,8 @@ S_TCMALLOC_MINIMAL_INCLUDES = src/common.h \ SG_TCMALLOC_MINIMAL_INCLUDES = src/gperftools/malloc_hook.h \ src/gperftools/malloc_hook_c.h \ src/gperftools/malloc_extension.h \ - src/gperftools/malloc_extension_c.h + src/gperftools/malloc_extension_c.h \ + src/gperftools/nallocx.h TCMALLOC_MINIMAL_INCLUDES = $(S_TCMALLOC_MINIMAL_INCLUDES) $(SG_TCMALLOC_MINIMAL_INCLUDES) $(SG_STACKTRACE_INCLUDES) perftoolsinclude_HEADERS += $(SG_TCMALLOC_MINIMAL_INCLUDES) diff --git a/src/gperftools/nallocx.h b/src/gperftools/nallocx.h new file mode 100644 index 0000000..9d77329 --- /dev/null +++ b/src/gperftools/nallocx.h @@ -0,0 +1,37 @@ +#ifndef _NALLOCX_H_ +#define _NALLOCX_H_ +#include + +#ifdef _WIN32 +# define PERFTOOLS_NALLOCX_DLL_DECL __declspec(dllimport) +#else +# define PERFTOOLS_NALLOCX_DLL_DECL +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#define MALLOCX_LG_ALIGN(la) ((int)(la)) + +/* + * The nallocx function allocates no memory, but it performs the same size + * computation as the malloc function, and returns the real size of the + * allocation that would result from the equivalent malloc function call. + * nallocx is a malloc extension originally implemented by jemalloc: + * http://www.unix.com/man-page/freebsd/3/nallocx/ + * + * Note, we only support MALLOCX_LG_ALIGN flag and nothing else. + */ +PERFTOOLS_NALLOCX_DLL_DECL size_t nallocx(size_t size, int flags); + +/* same as above but never weak */ +PERFTOOLS_NALLOCX_DLL_DECL size_t tc_nallocx(size_t size, int flags); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#undef PERFTOOLS_NALLOCX_DLL_DECL + +#endif /* _NALLOCX_H_ */ diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc index 467f090..5f64339 100644 --- a/src/tcmalloc.cc +++ b/src/tcmalloc.cc @@ -111,6 +111,7 @@ #include #include // for MallocHook +#include #include "base/basictypes.h" // for int64 #include "base/commandlineflags.h" // for RegisterFlagValidator, etc #include "base/dynamic_annotations.h" // for RunningOnValgrind @@ -789,15 +790,7 @@ class TCMallocImplementation : public MallocExtension { virtual double GetMemoryReleaseRate() { return FLAGS_tcmalloc_release_rate; } - virtual size_t GetEstimatedAllocatedSize(size_t size) { - if (size <= kMaxSize) { - const size_t cl = Static::sizemap()->SizeClass(size); - const size_t alloc_size = Static::sizemap()->ByteSizeForClass(cl); - return alloc_size; - } else { - return tcmalloc::pages(size) << kPageShift; - } - } + virtual size_t GetEstimatedAllocatedSize(size_t size); // This just calls GetSizeWithCallback, but because that's in an // unnamed namespace, we need to move the definition below it in the @@ -914,6 +907,79 @@ class TCMallocImplementation : public MallocExtension { } }; +// Returns size class that is suitable for allocation of size bytes with +// align alignment. Or 0, if there is no such size class. +static uint32_t size_class_with_alignment(size_t size, size_t align) { + if (align >= kPageSize) { + return 0; + } + size_t cl; + if (!Static::sizemap()->MaybeSizeClass(size, &cl)) { + return 0; + } + // Search through acceptable size classes looking for one with + // enough alignment. This depends on the fact that + // InitSizeClasses() currently produces several size classes that + // are aligned at powers of two. We will waste time and space if + // we miss in the size class array, but that is deemed acceptable + // since memalign() should be used rarely. + while (cl < kNumClasses && + ((Static::sizemap()->class_to_size(cl) & (align - 1)) != 0)) { + cl++; + } + if (cl == kNumClasses) { + return 0; + } + return cl; +} + +// nallocx slow path. Moved to a separate function because +// ThreadCache::InitModule is not inlined which would cause nallocx to +// become non-leaf function with stack frame and stack spills. +static ATTRIBUTE_NOINLINE size_t nallocx_slow(size_t size, int flags) { + if (UNLIKELY(!Static::IsInited())) ThreadCache::InitModule(); + + size_t align = static_cast(1ull << (flags & 0x3f)); + size_t cl = size_class_with_alignment(size, align); + if (cl) { + return Static::sizemap()->ByteSizeForClass(cl); + } else { + return tcmalloc::pages(size) << kPageShift; + } +} + +// The nallocx function allocates no memory, but it performs the same size +// computation as the malloc function, and returns the real size of the +// allocation that would result from the equivalent malloc function call. +// nallocx is a malloc extension originally implemented by jemalloc: +// http://www.unix.com/man-page/freebsd/3/nallocx/ +extern "C" size_t tc_nallocx(size_t size, int flags) { + if (UNLIKELY(flags != 0)) { + return nallocx_slow(size, flags); + } + size_t cl; + // size class 0 is only possible if malloc is not yet initialized + if (Static::sizemap()->MaybeSizeClass(size, &cl) && cl != 0) { + return Static::sizemap()->ByteSizeForClass(cl); + } else { + return nallocx_slow(size, 0); + } +} + +extern "C" size_t nallocx(size_t size, int flags) +#ifdef TC_ALIAS + TC_ALIAS(tc_nallocx); +#else +{ + return nallocx_slow(size, flags); +} +#endif + + +size_t TCMallocImplementation::GetEstimatedAllocatedSize(size_t size) { + return tc_nallocx(size, 0); +} + // The constructor allocates an object to ensure that initialization // runs before main(), and therefore we do not have a chance to become // multi-threaded before initialization. We also create the TSD key @@ -1355,17 +1421,24 @@ inline size_t GetSizeWithCallback(const void* ptr, size_t cl = Static::pageheap()->GetSizeClassIfCached(p); if (cl != 0) { return Static::sizemap()->ByteSizeForClass(cl); - } else { - const Span *span = Static::pageheap()->GetDescriptor(p); - if (UNLIKELY(span == NULL)) { // means we do not own this memory - return (*invalid_getsize_fn)(ptr); - } else if (span->sizeclass != 0) { - Static::pageheap()->CacheSizeClass(p, span->sizeclass); - return Static::sizemap()->ByteSizeForClass(span->sizeclass); - } else { - return span->length << kPageShift; - } } + + const Span *span = Static::pageheap()->GetDescriptor(p); + if (UNLIKELY(span == NULL)) { // means we do not own this memory + return (*invalid_getsize_fn)(ptr); + } + + if (span->sizeclass != 0) { + Static::pageheap()->CacheSizeClass(p, span->sizeclass); + return Static::sizemap()->ByteSizeForClass(span->sizeclass); + } + + if (span->sample) { + size_t orig_size = reinterpret_cast(span->objects)->size; + return tc_nallocx(orig_size, 0); + } + + return span->length << kPageShift; } // This lets you call back to a given function pointer if ptr is invalid. @@ -1444,23 +1517,11 @@ void* do_memalign(size_t align, size_t size) { // Allocate at least one byte to avoid boundary conditions below if (size == 0) size = 1; - if (size <= kMaxSize && align < kPageSize) { - // Search through acceptable size classes looking for one with - // enough alignment. This depends on the fact that - // InitSizeClasses() currently produces several size classes that - // are aligned at powers of two. We will waste time and space if - // we miss in the size class array, but that is deemed acceptable - // since memalign() should be used rarely. - int cl = Static::sizemap()->SizeClass(size); - while (cl < kNumClasses && - ((Static::sizemap()->class_to_size(cl) & (align - 1)) != 0)) { - cl++; - } - if (cl < kNumClasses) { - ThreadCache* heap = ThreadCache::GetCache(); - size = Static::sizemap()->class_to_size(cl); - return CheckedMallocResult(heap->Allocate(size, cl)); - } + uint32_t cl = size_class_with_alignment(size, align); + if (cl != 0) { + ThreadCache* heap = ThreadCache::GetCache(); + size = Static::sizemap()->class_to_size(cl); + return CheckedMallocResult(heap->Allocate(size, cl)); } // We will allocate directly from the page heap diff --git a/src/tests/tcmalloc_unittest.cc b/src/tests/tcmalloc_unittest.cc index b7ca04c..f1ea001 100644 --- a/src/tests/tcmalloc_unittest.cc +++ b/src/tests/tcmalloc_unittest.cc @@ -91,6 +91,7 @@ #include "base/simple_mutex.h" #include "gperftools/malloc_hook.h" #include "gperftools/malloc_extension.h" +#include "gperftools/nallocx.h" #include "gperftools/tcmalloc.h" #include "thread_cache.h" #include "system-alloc.h" @@ -1069,6 +1070,46 @@ static void TestErrno(void) { EXPECT_EQ(ENOMEM, errno); } + +#ifndef DEBUGALLOCATION +// Ensure that nallocx works before main. +struct GlobalNallocx { + GlobalNallocx() { CHECK_GT(nallocx(99, 0), 99); } +} global_nallocx; + +#if defined(__GNUC__) + +// 101 is the max user priority. +static void check_global_nallocx() __attribute__((constructor(101))); +static void check_global_nallocx() { CHECK_GT(nallocx(99, 0), 99); } + +#endif // __GNUC__ + +static void TestNAllocX() { + for (size_t size = 0; size <= (1 << 20); size += 7) { + size_t rounded = nallocx(size, 0); + ASSERT_GE(rounded, size); + void* ptr = malloc(size); + ASSERT_EQ(rounded, MallocExtension::instance()->GetAllocatedSize(ptr)); + free(ptr); + } +} + +static void TestNAllocXAlignment() { + for (size_t size = 0; size <= (1 << 20); size += 7) { + for (size_t align = 0; align < 10; align++) { + size_t rounded = nallocx(size, MALLOCX_LG_ALIGN(align)); + ASSERT_GE(rounded, size); + ASSERT_EQ(rounded % (1 << align), 0); + void* ptr = memalign(1 << align, size); + ASSERT_EQ(rounded, MallocExtension::instance()->GetAllocatedSize(ptr)); + free(ptr); + } + } +} + +#endif // !DEBUGALLOCATION + static int RunAllTests(int argc, char** argv) { // Optional argv[1] is the seed AllocatorState rnd(argc > 1 ? atoi(argv[1]) : 100); @@ -1403,6 +1444,12 @@ static int RunAllTests(int argc, char** argv) { TestSetNewMode(); TestErrno(); +// GetAllocatedSize under DEBUGALLOCATION returns the size that we asked for. +#ifndef DEBUGALLOCATION + TestNAllocX(); + TestNAllocXAlignment(); +#endif + return 0; }