ported nallocx support from Google-internal tcmalloc

nallocx is extension introduced by jemalloc. It returns effective size
of allocaiton without allocating anything.

We also support MALLOCX_LG_ALIGN flag. But all other jemalloc
flags (which at the moment do nothing for nallocx anyways) are
silently ignored, since there is no sensible way to return errors in
this API.

This was originally contributed by Dmitry Vyukov with input from
Andrew Hunter. But due to significant divergence of Google-internal
and free-software forks of tcmalloc, significant massaging was done by
me. So all bugs are mine.
This commit is contained in:
Aliaksey Kandratsenka 2016-12-18 09:35:02 -08:00
parent b0abefd938
commit b8f9d0d44f
4 changed files with 184 additions and 38 deletions

View File

@ -101,7 +101,7 @@ endif MINGW
if HAVE_OBJCOPY_WEAKEN
WEAKEN = $(OBJCOPY) -W malloc -W free -W realloc -W calloc -W cfree \
-W memalign -W posix_memalign -W valloc -W pvalloc \
-W malloc_stats -W mallopt -W mallinfo \
-W malloc_stats -W mallopt -W mallinfo -W nallocx \
-W _Znwm -W _ZnwmRKSt9nothrow_t -W _Znam -W _ZnamRKSt9nothrow_t \
-W _ZdlPv -W _ZdaPv \
-W __Znwm -W __ZnwmRKSt9nothrow_t -W __Znam -W __ZnamRKSt9nothrow_t \
@ -457,7 +457,8 @@ S_TCMALLOC_MINIMAL_INCLUDES = src/common.h \
SG_TCMALLOC_MINIMAL_INCLUDES = src/gperftools/malloc_hook.h \
src/gperftools/malloc_hook_c.h \
src/gperftools/malloc_extension.h \
src/gperftools/malloc_extension_c.h
src/gperftools/malloc_extension_c.h \
src/gperftools/nallocx.h
TCMALLOC_MINIMAL_INCLUDES = $(S_TCMALLOC_MINIMAL_INCLUDES) $(SG_TCMALLOC_MINIMAL_INCLUDES) $(SG_STACKTRACE_INCLUDES)
perftoolsinclude_HEADERS += $(SG_TCMALLOC_MINIMAL_INCLUDES)

37
src/gperftools/nallocx.h Normal file
View File

@ -0,0 +1,37 @@
#ifndef _NALLOCX_H_
#define _NALLOCX_H_
#include <stddef.h>
#ifdef _WIN32
# define PERFTOOLS_NALLOCX_DLL_DECL __declspec(dllimport)
#else
# define PERFTOOLS_NALLOCX_DLL_DECL
#endif
#ifdef __cplusplus
extern "C" {
#endif
#define MALLOCX_LG_ALIGN(la) ((int)(la))
/*
* The nallocx function allocates no memory, but it performs the same size
* computation as the malloc function, and returns the real size of the
* allocation that would result from the equivalent malloc function call.
* nallocx is a malloc extension originally implemented by jemalloc:
* http://www.unix.com/man-page/freebsd/3/nallocx/
*
* Note, we only support MALLOCX_LG_ALIGN flag and nothing else.
*/
PERFTOOLS_NALLOCX_DLL_DECL size_t nallocx(size_t size, int flags);
/* same as above but never weak */
PERFTOOLS_NALLOCX_DLL_DECL size_t tc_nallocx(size_t size, int flags);
#ifdef __cplusplus
} /* extern "C" */
#endif
#undef PERFTOOLS_NALLOCX_DLL_DECL
#endif /* _NALLOCX_H_ */

View File

@ -111,6 +111,7 @@
#include <gperftools/malloc_extension.h>
#include <gperftools/malloc_hook.h> // for MallocHook
#include <gperftools/nallocx.h>
#include "base/basictypes.h" // for int64
#include "base/commandlineflags.h" // for RegisterFlagValidator, etc
#include "base/dynamic_annotations.h" // for RunningOnValgrind
@ -789,15 +790,7 @@ class TCMallocImplementation : public MallocExtension {
virtual double GetMemoryReleaseRate() {
return FLAGS_tcmalloc_release_rate;
}
virtual size_t GetEstimatedAllocatedSize(size_t size) {
if (size <= kMaxSize) {
const size_t cl = Static::sizemap()->SizeClass(size);
const size_t alloc_size = Static::sizemap()->ByteSizeForClass(cl);
return alloc_size;
} else {
return tcmalloc::pages(size) << kPageShift;
}
}
virtual size_t GetEstimatedAllocatedSize(size_t size);
// This just calls GetSizeWithCallback, but because that's in an
// unnamed namespace, we need to move the definition below it in the
@ -914,6 +907,79 @@ class TCMallocImplementation : public MallocExtension {
}
};
// Returns size class that is suitable for allocation of size bytes with
// align alignment. Or 0, if there is no such size class.
static uint32_t size_class_with_alignment(size_t size, size_t align) {
if (align >= kPageSize) {
return 0;
}
size_t cl;
if (!Static::sizemap()->MaybeSizeClass(size, &cl)) {
return 0;
}
// Search through acceptable size classes looking for one with
// enough alignment. This depends on the fact that
// InitSizeClasses() currently produces several size classes that
// are aligned at powers of two. We will waste time and space if
// we miss in the size class array, but that is deemed acceptable
// since memalign() should be used rarely.
while (cl < kNumClasses &&
((Static::sizemap()->class_to_size(cl) & (align - 1)) != 0)) {
cl++;
}
if (cl == kNumClasses) {
return 0;
}
return cl;
}
// nallocx slow path. Moved to a separate function because
// ThreadCache::InitModule is not inlined which would cause nallocx to
// become non-leaf function with stack frame and stack spills.
static ATTRIBUTE_NOINLINE size_t nallocx_slow(size_t size, int flags) {
if (UNLIKELY(!Static::IsInited())) ThreadCache::InitModule();
size_t align = static_cast<size_t>(1ull << (flags & 0x3f));
size_t cl = size_class_with_alignment(size, align);
if (cl) {
return Static::sizemap()->ByteSizeForClass(cl);
} else {
return tcmalloc::pages(size) << kPageShift;
}
}
// The nallocx function allocates no memory, but it performs the same size
// computation as the malloc function, and returns the real size of the
// allocation that would result from the equivalent malloc function call.
// nallocx is a malloc extension originally implemented by jemalloc:
// http://www.unix.com/man-page/freebsd/3/nallocx/
extern "C" size_t tc_nallocx(size_t size, int flags) {
if (UNLIKELY(flags != 0)) {
return nallocx_slow(size, flags);
}
size_t cl;
// size class 0 is only possible if malloc is not yet initialized
if (Static::sizemap()->MaybeSizeClass(size, &cl) && cl != 0) {
return Static::sizemap()->ByteSizeForClass(cl);
} else {
return nallocx_slow(size, 0);
}
}
extern "C" size_t nallocx(size_t size, int flags)
#ifdef TC_ALIAS
TC_ALIAS(tc_nallocx);
#else
{
return nallocx_slow(size, flags);
}
#endif
size_t TCMallocImplementation::GetEstimatedAllocatedSize(size_t size) {
return tc_nallocx(size, 0);
}
// The constructor allocates an object to ensure that initialization
// runs before main(), and therefore we do not have a chance to become
// multi-threaded before initialization. We also create the TSD key
@ -1355,17 +1421,24 @@ inline size_t GetSizeWithCallback(const void* ptr,
size_t cl = Static::pageheap()->GetSizeClassIfCached(p);
if (cl != 0) {
return Static::sizemap()->ByteSizeForClass(cl);
} else {
const Span *span = Static::pageheap()->GetDescriptor(p);
if (UNLIKELY(span == NULL)) { // means we do not own this memory
return (*invalid_getsize_fn)(ptr);
} else if (span->sizeclass != 0) {
Static::pageheap()->CacheSizeClass(p, span->sizeclass);
return Static::sizemap()->ByteSizeForClass(span->sizeclass);
} else {
return span->length << kPageShift;
}
}
const Span *span = Static::pageheap()->GetDescriptor(p);
if (UNLIKELY(span == NULL)) { // means we do not own this memory
return (*invalid_getsize_fn)(ptr);
}
if (span->sizeclass != 0) {
Static::pageheap()->CacheSizeClass(p, span->sizeclass);
return Static::sizemap()->ByteSizeForClass(span->sizeclass);
}
if (span->sample) {
size_t orig_size = reinterpret_cast<StackTrace*>(span->objects)->size;
return tc_nallocx(orig_size, 0);
}
return span->length << kPageShift;
}
// This lets you call back to a given function pointer if ptr is invalid.
@ -1444,23 +1517,11 @@ void* do_memalign(size_t align, size_t size) {
// Allocate at least one byte to avoid boundary conditions below
if (size == 0) size = 1;
if (size <= kMaxSize && align < kPageSize) {
// Search through acceptable size classes looking for one with
// enough alignment. This depends on the fact that
// InitSizeClasses() currently produces several size classes that
// are aligned at powers of two. We will waste time and space if
// we miss in the size class array, but that is deemed acceptable
// since memalign() should be used rarely.
int cl = Static::sizemap()->SizeClass(size);
while (cl < kNumClasses &&
((Static::sizemap()->class_to_size(cl) & (align - 1)) != 0)) {
cl++;
}
if (cl < kNumClasses) {
ThreadCache* heap = ThreadCache::GetCache();
size = Static::sizemap()->class_to_size(cl);
return CheckedMallocResult(heap->Allocate(size, cl));
}
uint32_t cl = size_class_with_alignment(size, align);
if (cl != 0) {
ThreadCache* heap = ThreadCache::GetCache();
size = Static::sizemap()->class_to_size(cl);
return CheckedMallocResult(heap->Allocate(size, cl));
}
// We will allocate directly from the page heap

View File

@ -91,6 +91,7 @@
#include "base/simple_mutex.h"
#include "gperftools/malloc_hook.h"
#include "gperftools/malloc_extension.h"
#include "gperftools/nallocx.h"
#include "gperftools/tcmalloc.h"
#include "thread_cache.h"
#include "system-alloc.h"
@ -1069,6 +1070,46 @@ static void TestErrno(void) {
EXPECT_EQ(ENOMEM, errno);
}
#ifndef DEBUGALLOCATION
// Ensure that nallocx works before main.
struct GlobalNallocx {
GlobalNallocx() { CHECK_GT(nallocx(99, 0), 99); }
} global_nallocx;
#if defined(__GNUC__)
// 101 is the max user priority.
static void check_global_nallocx() __attribute__((constructor(101)));
static void check_global_nallocx() { CHECK_GT(nallocx(99, 0), 99); }
#endif // __GNUC__
static void TestNAllocX() {
for (size_t size = 0; size <= (1 << 20); size += 7) {
size_t rounded = nallocx(size, 0);
ASSERT_GE(rounded, size);
void* ptr = malloc(size);
ASSERT_EQ(rounded, MallocExtension::instance()->GetAllocatedSize(ptr));
free(ptr);
}
}
static void TestNAllocXAlignment() {
for (size_t size = 0; size <= (1 << 20); size += 7) {
for (size_t align = 0; align < 10; align++) {
size_t rounded = nallocx(size, MALLOCX_LG_ALIGN(align));
ASSERT_GE(rounded, size);
ASSERT_EQ(rounded % (1 << align), 0);
void* ptr = memalign(1 << align, size);
ASSERT_EQ(rounded, MallocExtension::instance()->GetAllocatedSize(ptr));
free(ptr);
}
}
}
#endif // !DEBUGALLOCATION
static int RunAllTests(int argc, char** argv) {
// Optional argv[1] is the seed
AllocatorState rnd(argc > 1 ? atoi(argv[1]) : 100);
@ -1403,6 +1444,12 @@ static int RunAllTests(int argc, char** argv) {
TestSetNewMode();
TestErrno();
// GetAllocatedSize under DEBUGALLOCATION returns the size that we asked for.
#ifndef DEBUGALLOCATION
TestNAllocX();
TestNAllocXAlignment();
#endif
return 0;
}