* Default to not sampling in tcmalloc (csilvers)

* Add -DTCMALLOC_LARGE_PAGES: better perf for some workloads (rus)
	* Extend pprof --tools to allow per-tool configs (csilvers)
	* Have STL_Allocator pass on # bytes to free (richardfang)
	* Add a header guard to config.h (csilvers)
	* DOC: Clean up documentation around tcmalloc.slack_bytes (fikes)
	* DOC: Document ProfilerFlush, ProfilerStartWithOptions (csilvers)
	* PORTING: Work around a gcc 4.5.0 optimization bug (csilvers)
	* PORTING: Use -fno-builtin-malloc and friends when compiling tcmalloc
	* PORTING: Define _WIN32_WINNT high enough for mingw (csilvers)
	* PORTING: Work around libtool bug getting deps wrong in some cases
	* Update README.windows to emphasize $IncludeDir more (csilvers)
	* Rename README.windows to README_windows.txt (csilvers)


git-svn-id: http://gperftools.googlecode.com/svn/trunk@95 6b5cf1ce-ec42-a296-1ba9-69fdba395a50
This commit is contained in:
csilvers 2010-06-21 15:59:56 +00:00
parent d8c0276168
commit cb7393cbe2
31 changed files with 530 additions and 306 deletions

View File

@ -17,9 +17,17 @@ endif !WITH_STACK_TRACE
# This is mostly based on configure options
AM_CXXFLAGS =
# These are good warnings to turn on by default,
# These are good warnings to turn on by default. We also tell gcc
# that malloc, free, realloc, mmap, etc. are not builtins (these flags
# are supported since gcc 3.1.1). gcc doesn't think most of them are
# builtins now in any case, but it's best to be explicit in case that
# changes one day. gcc ignores functions it doesn't understand.
if GCC
AM_CXXFLAGS += -Wall -Wwrite-strings -Woverloaded-virtual -Wno-sign-compare
AM_CXXFLAGS += -Wall -Wwrite-strings -Woverloaded-virtual -Wno-sign-compare \
-fno-builtin-malloc -fno-builtin-free -fno-builtin-realloc \
-fno-builtin-calloc -fno-builtin-cfree \
-fno-builtin-memalign -fno-builtin-posix_memalign \
-fno-builtin-valloc -fno-builtin-pvalloc
endif GCC
# The -no-undefined flag allows libtool to generate shared libraries for
@ -96,7 +104,7 @@ docdir = $(prefix)/share/doc/$(PACKAGE)-$(VERSION)
# Add your documentation files (in doc/) in addition to these
# top-level boilerplate files. Also add a TODO file if you have one.
# We'll add to this later, on a library-by-library basis
dist_doc_DATA = AUTHORS COPYING ChangeLog INSTALL NEWS README README.windows \
dist_doc_DATA = AUTHORS COPYING ChangeLog INSTALL NEWS README README_windows.txt \
TODO
# The libraries (.so's) you want to install
@ -400,7 +408,7 @@ libtcmalloc_minimal_la_SOURCES = $(TCMALLOC_CC) $(TCMALLOC_MINIMAL_INCLUDES)
libtcmalloc_minimal_la_CXXFLAGS = -DNO_TCMALLOC_SAMPLES \
$(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS)
libtcmalloc_minimal_la_LDFLAGS = $(PTHREAD_CFLAGS)
libtcmalloc_minimal_la_LIBADD = $(PTHREAD_LIBS) libtcmalloc_minimal_internal.la
libtcmalloc_minimal_la_LIBADD = libtcmalloc_minimal_internal.la $(PTHREAD_LIBS)
# For windows, we're playing around with trying to do some stacktrace
# support even with libtcmalloc_minimal. For everyone else, though,
@ -442,6 +450,13 @@ tcmalloc_minimal_unittest_SOURCES = src/tests/tcmalloc_unittest.cc \
$(TCMALLOC_UNITTEST_INCLUDES)
tcmalloc_minimal_unittest_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS)
tcmalloc_minimal_unittest_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
# We want libtcmalloc last on the link line, but due to a bug in
# libtool involving convenience libs, they need to come last on the
# link line in order to get dependency ordering right. This is ok:
# convenience libraries are .a's, so tcmalloc is still the last .so.
# We also put pthreads after tcmalloc, because some pthread
# implementations define their own malloc, and we need to go on the
# first linkline to make sure our malloc 'wins'.
tcmalloc_minimal_unittest_LDADD = $(LIBTCMALLOC_MINIMAL) \
liblogging.la $(PTHREAD_LIBS)
@ -750,13 +765,13 @@ libtcmalloc_internal_la_SOURCES = $(libtcmalloc_minimal_internal_la_SOURCES) \
libtcmalloc_internal_la_CXXFLAGS = $(PTHREAD_CFLAGS) -DNDEBUG \
$(AM_CXXFLAGS) $(NO_EXCEPTIONS)
libtcmalloc_internal_la_LDFLAGS = $(PTHREAD_CFLAGS)
libtcmalloc_internal_la_LIBADD = $(PTHREAD_LIBS) libstacktrace.la
libtcmalloc_internal_la_LIBADD = libstacktrace.la $(PTHREAD_LIBS)
lib_LTLIBRARIES += libtcmalloc.la
libtcmalloc_la_SOURCES = $(TCMALLOC_CC) $(TCMALLOC_INCLUDES)
libtcmalloc_la_CXXFLAGS = $(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS)
libtcmalloc_la_LDFLAGS = $(PTHREAD_CFLAGS)
libtcmalloc_la_LIBADD = $(PTHREAD_LIBS) libtcmalloc_internal.la
libtcmalloc_la_LIBADD = libtcmalloc_internal.la $(PTHREAD_LIBS)
if WITH_HEAP_CHECKER
# heap-checker-bcad is last, in hopes its global ctor will run first.
@ -789,6 +804,13 @@ tcmalloc_unittest_SOURCES = src/tests/tcmalloc_unittest.cc \
$(TCMALLOC_UNITTEST_INCLUDES)
tcmalloc_unittest_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS)
tcmalloc_unittest_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
# We want libtcmalloc last on the link line, but due to a bug in
# libtool involving convenience libs, they need to come last on the
# link line in order to get dependency ordering right. This is ok:
# convenience libraries are .a's, so tcmalloc is still the last .so.
# We also put pthreads after tcmalloc, because some pthread
# implementations define their own malloc, and we need to go on the
# first linkline to make sure our malloc 'wins'.
tcmalloc_unittest_LDADD = $(LIBTCMALLOC) liblogging.la $(PTHREAD_LIBS)
# This makes sure it's safe to link in both tcmalloc and
@ -803,6 +825,13 @@ tcmalloc_both_unittest_SOURCES = src/tests/tcmalloc_unittest.cc \
tcmalloc_both_unittest_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS)
tcmalloc_both_unittest_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
if WITH_CPU_PROFILER
# We want libtcmalloc last on the link line, but due to a bug in
# libtool involving convenience libs, they need to come last on the
# link line in order to get dependency ordering right. This is ok:
# convenience libraries are .a's, so tcmalloc is still the last .so.
# We also put pthreads after tcmalloc, because some pthread
# implementations define their own malloc, and we need to go on the
# first linkline to make sure our malloc 'wins'.
tcmalloc_both_unittest_LDADD = $(LIBTCMALLOC) $(LIBTCMALLOC_MINIMAL) \
libprofiler.la liblogging.la $(PTHREAD_LIBS)
else
@ -822,6 +851,10 @@ raw_printer_test_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS)
raw_printer_test_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
raw_printer_test_LDADD = $(LIBTCMALLOC) $(PTHREAD_LIBS)
# sampler_test and sampling_test both require sampling to be turned
# on, which it's not by default. Use the "standard" value of 2^19.
TESTS_ENVIRONMENT += TCMALLOC_SAMPLE_PARAMETER=524288
TESTS += sampler_test
WINDOWS_PROJECTS += vsprojects/sampler_test/sampler_test.vcproj
sampler_test_SOURCES = src/tests/sampler_test.cc \
@ -909,8 +942,14 @@ heap_checker_unittest_SOURCES = src/tests/heap-checker_unittest.cc \
$(HEAP_CHECKER_UNITTEST_INCLUDES)
heap_checker_unittest_CXXFLAGS = -g $(PTHREAD_CFLAGS) $(AM_CXXFLAGS)
heap_checker_unittest_LDFLAGS = -g $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
# tcmalloc has to be specified last!
heap_checker_unittest_LDADD = $(PTHREAD_LIBS) liblogging.la $(LIBTCMALLOC)
# We want libtcmalloc last on the link line, but due to a bug in
# libtool involving convenience libs, they need to come last on the
# link line in order to get dependency ordering right. This is ok:
# convenience libraries are .a's, so tcmalloc is still the last .so.
# We also put pthreads after tcmalloc, because some pthread
# implementations define their own malloc, and we need to go on the
# first linkline to make sure our malloc 'wins'.
heap_checker_unittest_LDADD = $(LIBTCMALLOC) liblogging.la $(PTHREAD_LIBS)
endif WITH_HEAP_CHECKER
@ -1003,9 +1042,12 @@ noinst_PROGRAMS += heap-checker_debug_unittest
heap_checker_debug_unittest_SOURCES = $(heap_checker_unittest_SOURCES)
heap_checker_debug_unittest_CXXFLAGS = $(heap_checker_unittest_CXXFLAGS)
heap_checker_debug_unittest_LDFLAGS = $(heap_checker_unittest_LDFLAGS)
# tcmalloc has to be specified last!
heap_checker_debug_unittest_LDADD = $(PTHREAD_LIBS) liblogging.la \
libtcmalloc_debug.la
# We want libtcmalloc last on the link line, but due to a bug in
# libtool involving convenience libs, they need to come last on the
# link line in order to get dependency ordering right. This is ok:
# convenience libraries are .a's, so tcmalloc is still the last .so.
heap_checker_debug_unittest_LDADD = libtcmalloc_debug.la liblogging.la \
$(PTHREAD_LIBS)
endif WITH_HEAP_CHECKER
endif WITH_DEBUGALLOC

View File

@ -46,8 +46,17 @@ build_triplet = @build@
host_triplet = @host@
@WITH_STACK_TRACE_FALSE@am__append_1 = -DNO_TCMALLOC_SAMPLES
# These are good warnings to turn on by default,
@GCC_TRUE@am__append_2 = -Wall -Wwrite-strings -Woverloaded-virtual -Wno-sign-compare
# These are good warnings to turn on by default. We also tell gcc
# that malloc, free, realloc, mmap, etc. are not builtins (these flags
# are supported since gcc 3.1.1). gcc doesn't think most of them are
# builtins now in any case, but it's best to be explicit in case that
# changes one day. gcc ignores functions it doesn't understand.
@GCC_TRUE@am__append_2 = -Wall -Wwrite-strings -Woverloaded-virtual -Wno-sign-compare \
@GCC_TRUE@ -fno-builtin-malloc -fno-builtin-free -fno-builtin-realloc \
@GCC_TRUE@ -fno-builtin-calloc -fno-builtin-cfree \
@GCC_TRUE@ -fno-builtin-memalign -fno-builtin-posix_memalign \
@GCC_TRUE@ -fno-builtin-valloc -fno-builtin-pvalloc
# These are x86-specific, having to do with frame-pointers. In
# particular, some x86_64 systems do not insert frame pointers by
@ -152,11 +161,15 @@ bin_PROGRAMS =
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ raw_printer_test \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ sampler_test \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ sampling_test.sh$(EXEEXT)
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_35 = vsprojects/sampler_test/sampler_test.vcproj
# sampler_test and sampling_test both require sampling to be turned
# on, which it's not by default. Use the "standard" value of 2^19.
# These unittests often need to run binaries. They're in the current dir
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_36 = BINDIR=. \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_35 = TCMALLOC_SAMPLE_PARAMETER=524288 \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ BINDIR=. \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ TMPDIR=/tmp/perftools
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_36 = vsprojects/sampler_test/sampler_test.vcproj
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_37 = $(sampling_test_sh_SOURCES)
# This is the sub-program used by sampling_test.sh
@ -345,8 +358,8 @@ libsysinfo_la_DEPENDENCIES = $(am__DEPENDENCIES_1) \
am_libsysinfo_la_OBJECTS = sysinfo.lo $(am__objects_1)
libsysinfo_la_OBJECTS = $(am_libsysinfo_la_OBJECTS)
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_la_DEPENDENCIES = \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__DEPENDENCIES_1) \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ libtcmalloc_internal.la
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ libtcmalloc_internal.la \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__DEPENDENCIES_1)
am__libtcmalloc_la_SOURCES_DIST = src/tcmalloc.cc src/common.h \
src/internal_logging.h src/system-alloc.h \
src/packed-cache-inl.h src/base/spinlock.h \
@ -394,8 +407,8 @@ libtcmalloc_la_OBJECTS = $(am_libtcmalloc_la_OBJECTS)
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am_libtcmalloc_la_rpath = -rpath \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(libdir)
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__DEPENDENCIES_3 = \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__DEPENDENCIES_1) \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ libtcmalloc_internal.la
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ libtcmalloc_internal.la \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__DEPENDENCIES_1)
@WITH_CPU_PROFILER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_and_profiler_la_DEPENDENCIES = $(am__DEPENDENCIES_3)
am__libtcmalloc_and_profiler_la_SOURCES_DIST = src/tcmalloc.cc \
src/common.h src/internal_logging.h src/system-alloc.h \
@ -486,8 +499,8 @@ libtcmalloc_debug_la_OBJECTS = $(am_libtcmalloc_debug_la_OBJECTS)
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am_libtcmalloc_debug_la_rpath = -rpath \
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(libdir)
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_internal_la_DEPENDENCIES = \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__DEPENDENCIES_1) \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ libstacktrace.la
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ libstacktrace.la \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__DEPENDENCIES_1)
am__libtcmalloc_internal_la_SOURCES_DIST = src/common.cc \
src/internal_logging.cc src/system-alloc.cc \
src/memfs_malloc.cc src/central_freelist.cc src/page_heap.cc \
@ -550,8 +563,8 @@ am__objects_21 = libtcmalloc_internal_la-common.lo \
libtcmalloc_internal_la_OBJECTS = \
$(am_libtcmalloc_internal_la_OBJECTS)
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am_libtcmalloc_internal_la_rpath =
libtcmalloc_minimal_la_DEPENDENCIES = $(am__DEPENDENCIES_1) \
libtcmalloc_minimal_internal.la
libtcmalloc_minimal_la_DEPENDENCIES = libtcmalloc_minimal_internal.la \
$(am__DEPENDENCIES_1)
am__libtcmalloc_minimal_la_SOURCES_DIST = src/tcmalloc.cc src/common.h \
src/internal_logging.h src/system-alloc.h \
src/packed-cache-inl.h src/base/spinlock.h \
@ -574,8 +587,8 @@ am__libtcmalloc_minimal_la_SOURCES_DIST = src/tcmalloc.cc src/common.h \
am_libtcmalloc_minimal_la_OBJECTS = $(am__objects_22) \
$(am__objects_20)
libtcmalloc_minimal_la_OBJECTS = $(am_libtcmalloc_minimal_la_OBJECTS)
am__DEPENDENCIES_4 = $(am__DEPENDENCIES_1) \
libtcmalloc_minimal_internal.la
am__DEPENDENCIES_4 = libtcmalloc_minimal_internal.la \
$(am__DEPENDENCIES_1)
@WITH_DEBUGALLOC_TRUE@libtcmalloc_minimal_debug_la_DEPENDENCIES = \
@WITH_DEBUGALLOC_TRUE@ $(am__DEPENDENCIES_4)
am__libtcmalloc_minimal_debug_la_SOURCES_DIST = \
@ -782,9 +795,9 @@ am__heap_checker_debug_unittest_SOURCES_DIST = \
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@am_heap_checker_debug_unittest_OBJECTS = $(am__objects_27)
heap_checker_debug_unittest_OBJECTS = \
$(am_heap_checker_debug_unittest_OBJECTS)
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@heap_checker_debug_unittest_DEPENDENCIES = $(am__DEPENDENCIES_1) \
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@heap_checker_debug_unittest_DEPENDENCIES = libtcmalloc_debug.la \
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@ liblogging.la \
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@ libtcmalloc_debug.la
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@ $(am__DEPENDENCIES_1)
am__heap_checker_debug_unittest_sh_SOURCES_DIST = \
src/tests/heap-checker_unittest.sh
am_heap_checker_debug_unittest_sh_OBJECTS =
@ -803,8 +816,8 @@ heap_checker_unittest_OBJECTS = $(am_heap_checker_unittest_OBJECTS)
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__DEPENDENCIES_6 = \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ libtcmalloc.la
@WITH_HEAP_CHECKER_TRUE@heap_checker_unittest_DEPENDENCIES = \
@WITH_HEAP_CHECKER_TRUE@ $(am__DEPENDENCIES_1) liblogging.la \
@WITH_HEAP_CHECKER_TRUE@ $(am__DEPENDENCIES_6)
@WITH_HEAP_CHECKER_TRUE@ $(am__DEPENDENCIES_6) liblogging.la \
@WITH_HEAP_CHECKER_TRUE@ $(am__DEPENDENCIES_1)
am__heap_checker_unittest_sh_SOURCES_DIST = \
src/tests/heap-checker_unittest.sh
am_heap_checker_unittest_sh_OBJECTS =
@ -1329,7 +1342,7 @@ man1dir = $(mandir)/man1
NROFF = nroff
MANS = $(dist_man_MANS)
am__dist_doc_DATA_DIST = AUTHORS COPYING ChangeLog INSTALL NEWS README \
README.windows TODO doc/index.html doc/designstyle.css \
README_windows.txt TODO doc/index.html doc/designstyle.css \
doc/pprof_remote_servers.html doc/tcmalloc.html \
doc/overview.gif doc/pageheap.gif doc/spanmap.gif \
doc/threadheap.gif doc/t-test1.times.txt \
@ -1597,7 +1610,7 @@ noinst_HEADERS = src/google/tcmalloc.h.in
# one day we figure it out. Regardless, installing the dot files isn't the
# end of the world.
dist_doc_DATA = AUTHORS COPYING ChangeLog INSTALL NEWS README \
README.windows TODO doc/index.html doc/designstyle.css \
README_windows.txt TODO doc/index.html doc/designstyle.css \
$(am__append_12) doc/tcmalloc.html doc/overview.gif \
doc/pageheap.gif doc/spanmap.gif doc/threadheap.gif \
doc/t-test1.times.txt \
@ -1658,7 +1671,7 @@ WINDOWS_PROJECTS = google-perftools.sln \
vsprojects/realloc_unittest/realloc_unittest.vcproj \
vsprojects/stack_trace_table_test/stack_trace_table_test.vcproj \
vsprojects/thread_dealloc_unittest/thread_dealloc_unittest.vcproj \
$(am__append_35)
$(am__append_36)
# unittests you want to run when people type 'make check'.
# Note: tests cannot take any arguments!
@ -1690,7 +1703,7 @@ TESTS = low_level_alloc_unittest atomicops_unittest $(am__append_11) \
# TESTS_ENVIRONMENT sets environment variables for when you run unittest.
# We always get "srcdir" set for free.
# We'll add to this later, on a library-by-library basis.
TESTS_ENVIRONMENT = $(am__append_13) $(am__append_36)
TESTS_ENVIRONMENT = $(am__append_13) $(am__append_35)
# All script tests should be added here
noinst_SCRIPTS = $(am__append_16) $(am__append_25) $(am__append_37) \
$(am__append_40) $(am__append_43) $(am__append_58)
@ -1916,7 +1929,7 @@ libtcmalloc_minimal_la_CXXFLAGS = -DNO_TCMALLOC_SAMPLES \
$(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS)
libtcmalloc_minimal_la_LDFLAGS = $(PTHREAD_CFLAGS)
libtcmalloc_minimal_la_LIBADD = $(PTHREAD_LIBS) libtcmalloc_minimal_internal.la
libtcmalloc_minimal_la_LIBADD = libtcmalloc_minimal_internal.la $(PTHREAD_LIBS)
@MINGW_FALSE@LIBTCMALLOC_MINIMAL = libtcmalloc_minimal.la
# For windows, we're playing around with trying to do some stacktrace
@ -1930,6 +1943,13 @@ tcmalloc_minimal_unittest_SOURCES = src/tests/tcmalloc_unittest.cc \
tcmalloc_minimal_unittest_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS)
tcmalloc_minimal_unittest_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
# We want libtcmalloc last on the link line, but due to a bug in
# libtool involving convenience libs, they need to come last on the
# link line in order to get dependency ordering right. This is ok:
# convenience libraries are .a's, so tcmalloc is still the last .so.
# We also put pthreads after tcmalloc, because some pthread
# implementations define their own malloc, and we need to go on the
# first linkline to make sure our malloc 'wins'.
tcmalloc_minimal_unittest_LDADD = $(LIBTCMALLOC_MINIMAL) \
liblogging.la $(PTHREAD_LIBS)
@ -2098,7 +2118,7 @@ thread_dealloc_unittest_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS)
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(NO_EXCEPTIONS) \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__append_31)
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_internal_la_LDFLAGS = $(PTHREAD_CFLAGS)
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_internal_la_LIBADD = $(PTHREAD_LIBS) libstacktrace.la
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_internal_la_LIBADD = libstacktrace.la $(PTHREAD_LIBS)
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_la_SOURCES = \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(TCMALLOC_CC) \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(TCMALLOC_INCLUDES) \
@ -2108,7 +2128,7 @@ thread_dealloc_unittest_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS)
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(AM_CXXFLAGS) \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__append_32)
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_la_LDFLAGS = $(PTHREAD_CFLAGS)
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_la_LIBADD = $(PTHREAD_LIBS) libtcmalloc_internal.la
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_la_LIBADD = libtcmalloc_internal.la $(PTHREAD_LIBS)
@WITH_HEAP_CHECKER_FALSE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@HEAP_CHECKER_SOURCES =
# heap-checker-bcad is last, in hopes its global ctor will run first.
@ -2131,6 +2151,13 @@ thread_dealloc_unittest_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS)
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@tcmalloc_unittest_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS)
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@tcmalloc_unittest_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
# We want libtcmalloc last on the link line, but due to a bug in
# libtool involving convenience libs, they need to come last on the
# link line in order to get dependency ordering right. This is ok:
# convenience libraries are .a's, so tcmalloc is still the last .so.
# We also put pthreads after tcmalloc, because some pthread
# implementations define their own malloc, and we need to go on the
# first linkline to make sure our malloc 'wins'.
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@tcmalloc_unittest_LDADD = $(LIBTCMALLOC) liblogging.la $(PTHREAD_LIBS)
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@tcmalloc_both_unittest_SOURCES = src/tests/tcmalloc_unittest.cc \
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ src/tests/testutil.h src/tests/testutil.cc \
@ -2141,6 +2168,13 @@ thread_dealloc_unittest_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS)
@WITH_CPU_PROFILER_FALSE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@tcmalloc_both_unittest_LDADD = $(LIBTCMALLOC) $(LIBTCMALLOC_MINIMAL) \
@WITH_CPU_PROFILER_FALSE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ liblogging.la $(PTHREAD_LIBS)
# We want libtcmalloc last on the link line, but due to a bug in
# libtool involving convenience libs, they need to come last on the
# link line in order to get dependency ordering right. This is ok:
# convenience libraries are .a's, so tcmalloc is still the last .so.
# We also put pthreads after tcmalloc, because some pthread
# implementations define their own malloc, and we need to go on the
# first linkline to make sure our malloc 'wins'.
@WITH_CPU_PROFILER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@tcmalloc_both_unittest_LDADD = $(LIBTCMALLOC) $(LIBTCMALLOC_MINIMAL) \
@WITH_CPU_PROFILER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ libprofiler.la liblogging.la $(PTHREAD_LIBS)
@ -2193,8 +2227,14 @@ thread_dealloc_unittest_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS)
@WITH_HEAP_CHECKER_TRUE@heap_checker_unittest_CXXFLAGS = -g $(PTHREAD_CFLAGS) $(AM_CXXFLAGS)
@WITH_HEAP_CHECKER_TRUE@heap_checker_unittest_LDFLAGS = -g $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
# tcmalloc has to be specified last!
@WITH_HEAP_CHECKER_TRUE@heap_checker_unittest_LDADD = $(PTHREAD_LIBS) liblogging.la $(LIBTCMALLOC)
# We want libtcmalloc last on the link line, but due to a bug in
# libtool involving convenience libs, they need to come last on the
# link line in order to get dependency ordering right. This is ok:
# convenience libraries are .a's, so tcmalloc is still the last .so.
# We also put pthreads after tcmalloc, because some pthread
# implementations define their own malloc, and we need to go on the
# first linkline to make sure our malloc 'wins'.
@WITH_HEAP_CHECKER_TRUE@heap_checker_unittest_LDADD = $(LIBTCMALLOC) liblogging.la $(PTHREAD_LIBS)
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_debug_la_SOURCES = src/debugallocation.cc $(HEAP_CHECKER_SOURCES) \
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(TCMALLOC_INCLUDES)
@ -2227,9 +2267,12 @@ thread_dealloc_unittest_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS)
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@heap_checker_debug_unittest_SOURCES = $(heap_checker_unittest_SOURCES)
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@heap_checker_debug_unittest_CXXFLAGS = $(heap_checker_unittest_CXXFLAGS)
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@heap_checker_debug_unittest_LDFLAGS = $(heap_checker_unittest_LDFLAGS)
# tcmalloc has to be specified last!
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@heap_checker_debug_unittest_LDADD = $(PTHREAD_LIBS) liblogging.la \
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@ libtcmalloc_debug.la
# We want libtcmalloc last on the link line, but due to a bug in
# libtool involving convenience libs, they need to come last on the
# link line in order to get dependency ordering right. This is ok:
# convenience libraries are .a's, so tcmalloc is still the last .so.
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@heap_checker_debug_unittest_LDADD = libtcmalloc_debug.la liblogging.la \
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@ $(PTHREAD_LIBS)
### ------- CPU profiler

69
README
View File

@ -4,34 +4,6 @@ There are known issues with some perftools functionality on x86_64
systems. See 64-BIT ISSUES, below.
CPU PROFILER
------------
See doc/cpu-profiler.html for information about how to use the CPU
profiler and analyze its output.
As a quick-start, do the following after installing this package:
1) Link your executable with -lprofiler
2) Run your executable with the CPUPROFILE environment var set:
$ CPUPROFILE=/tmp/prof.out <path/to/binary> [binary args]
3) Run pprof to analyze the CPU usage
$ pprof <path/to/binary> /tmp/prof.out # -pg-like text output
$ pprof --gv <path/to/binary> /tmp/prof.out # really cool graphical output
There are other environment variables, besides CPUPROFILE, you can set
to adjust the cpu-profiler behavior; cf "ENVIRONMENT VARIABLES" below.
The CPU profiler is available on all unix-based systems we've tested;
see INSTALL for more details. It is not currently available on Windows.
NOTE: CPU profiling doesn't work after fork (unless you immediately
do an exec()-like call afterwards). Furthermore, if you do
fork, and the child calls exit(), it may corrupt the profile
data. You can use _exit() to work around this. We hope to have
a fix for both problems in the next release of perftools
(hopefully perftools 1.2).
TCMALLOC
--------
Just link in -ltcmalloc or -ltcmalloc_minimal to get the advantages of
@ -42,6 +14,19 @@ tcmalloc functionality is available on all systems we've tested; see
INSTALL for more details. See README.windows for instructions on
using tcmalloc on Windows.
NOTE: When compiling with programs with gcc, that you plan to link
with libtcmalloc, it's safest to pass in the flags
-fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free
when compiling. gcc makes some optimizations assuming it is using its
own, built-in malloc; that assumption obviously isn't true with
tcmalloc. In practice, we haven't seen any problems with this, but
the expected risk is highest for users who register their own malloc
hooks with tcmalloc (using google/malloc_hook.h). The risk is lowest
for folks who use tcmalloc_minimal (or, of course, who pass in the
above flags :-) ).
HEAP PROFILER
-------------
@ -96,6 +81,34 @@ The heap checker is only available on Linux at this time; see INSTALL
for more details.
CPU PROFILER
------------
See doc/cpu-profiler.html for information about how to use the CPU
profiler and analyze its output.
As a quick-start, do the following after installing this package:
1) Link your executable with -lprofiler
2) Run your executable with the CPUPROFILE environment var set:
$ CPUPROFILE=/tmp/prof.out <path/to/binary> [binary args]
3) Run pprof to analyze the CPU usage
$ pprof <path/to/binary> /tmp/prof.out # -pg-like text output
$ pprof --gv <path/to/binary> /tmp/prof.out # really cool graphical output
There are other environment variables, besides CPUPROFILE, you can set
to adjust the cpu-profiler behavior; cf "ENVIRONMENT VARIABLES" below.
The CPU profiler is available on all unix-based systems we've tested;
see INSTALL for more details. It is not currently available on Windows.
NOTE: CPU profiling doesn't work after fork (unless you immediately
do an exec()-like call afterwards). Furthermore, if you do
fork, and the child calls exit(), it may corrupt the profile
data. You can use _exit() to work around this. We hope to have
a fix for both problems in the next release of perftools
(hopefully perftools 1.2).
EVERYTHING IN ONE
-----------------
If you want the CPU profiler, heap profiler, and heap leak-checker to

9
configure vendored
View File

@ -21533,6 +21533,15 @@ _ACEOF
$as_echo "#define PERFTOOLS_DLL_DECL /**/" >>confdefs.h
# In theory, config.h files shouldn't need a header guard, but we do,
# because we (maybe) #include windows/mingw.h from within config.h,
# and it #includes other .h files. These all have header guards, so
# the end result is if config.h is #included twice, its #undefs get
# evaluated twice, but all the ones in mingw.h/etc only get evaluated
# once, potentially causing trouble. c.f.
# http://code.google.com/p/google-perftools/issues/detail?id=246
# MinGW uses autoconf, but also needs the windows shim routines
# (since it doesn't have its own support for, say, pthreads).
# This requires us to #include a special header file, and also to

View File

@ -301,6 +301,18 @@ AC_DEFINE(PERFTOOLS_DLL_DECL,,
internally, to compile the DLL, and every DLL source file
#includes "config.h" before anything else.])
# In theory, config.h files shouldn't need a header guard, but we do,
# because we (maybe) #include windows/mingw.h from within config.h,
# and it #includes other .h files. These all have header guards, so
# the end result is if config.h is #included twice, its #undefs get
# evaluated twice, but all the ones in mingw.h/etc only get evaluated
# once, potentially causing trouble. c.f.
# http://code.google.com/p/google-perftools/issues/detail?id=246
AH_TOP([
#ifndef GOOGLE_PERFTOOLS_CONFIG_H_
#define GOOGLE_PERFTOOLS_CONFIG_H_
])
# MinGW uses autoconf, but also needs the windows shim routines
# (since it doesn't have its own support for, say, pthreads).
# This requires us to #include a special header file, and also to
@ -309,6 +321,8 @@ AH_BOTTOM([
#ifdef __MINGW32__
#include "windows/mingw.h"
#endif
#endif /* #ifndef GOOGLE_PERFTOOLS_CONFIG_H_ */
])
AM_CONDITIONAL(MINGW, expr $host : '.*-mingw' >/dev/null 2>&1)

View File

@ -71,6 +71,11 @@ CPUPROFILE with the child's process id).</p>
<p>For security reasons, CPU profiling will not write to a file -- and
is thus not usable -- for setuid programs.</p>
<p>See the include-file <code>google/profiler.h</code> for
advanced-use functions, including <code>ProfilerFlush()</code> and
<code>ProfilerStartWithOptions()</code>.</p>
<H2>Modifying Runtime Behavior</H2>
<p>You can more finely control the behavior of the CPU profiler via

View File

@ -462,11 +462,15 @@ environment variables.</p>
<tr valign=top>
<td><code>TCMALLOC_SAMPLE_PARAMETER</code></td>
<td>default: 524288</td>
<td>default: 0</td>
<td>
The approximate gap between sampling actions. That is, we
take one sample approximately once every
<code>tcmalloc_sample_parmeter</code> bytes of allocation.
This sampled heap information is available via
<code>MallocExtension::GetHeapSample()</code> or
<code>MallocExtension::ReadStackTraces()</code>. A reasonable
value is 524288.
</td>
</tr>
@ -674,12 +678,34 @@ you can access them with a call like
</td>
</tr>
<tr valign=top>
<td><code>tcmalloc.pageheap_free_bytes</code></td>
<td>
Number of bytes in free, mapped pages in page heap. These bytes
can be used to fulfill allocation requests. They always count
towards virtual memory usage, and unless the underlying memory is
swapped out by the OS, they also count towards physical memory
usage.
</td>
</tr>
<tr valign=top>
<td><code>tcmalloc.pageheap_unmapped_bytes</code></td>
<td>
Number of bytes in free, unmapped pages in page heap. These are
bytes that have been released back to the OS, possibly by one of
the MallocExtension "Release" calls. They can be used to fulfill
allocation requests, but typically incur a page fault. They
always count towards virtual memory usage, and depending on the
OS, typically do not count towards physical memory usage.
</td>
</tr>
<tr valign=top>
<td><code>tcmalloc.slack_bytes</code></td>
<td>
A measure of memory fragmentation (how much memory is reserved by
TCMalloc but unlikely to ever be able to serve an allocation
request).
Sum of pageheap_free_bytes and pageheap_unmapped_bytes. Provided
for backwards compatibility only. Do not use.
</td>
</tr>

View File

@ -141,8 +141,25 @@ int RunningOnValgrind(void) {
static volatile int running_on_valgrind = -1;
/* C doesn't have thread-safe initialization of statics, and we
don't want to depend on pthread_once here, so hack it. */
ANNOTATE_BENIGN_RACE(&running_on_valgrind, "safe hack");
int local_running_on_valgrind = running_on_valgrind;
if (local_running_on_valgrind == -1)
running_on_valgrind = local_running_on_valgrind = GetRunningOnValgrind();
return local_running_on_valgrind;
}
/* See the comments in dynamic_annotations.h */
double ValgrindSlowdown() {
if (RunningOnValgrind() == 0) {
return 1.0;
}
/* Same initialization hack as in RunningOnValgrind(). */
static volatile double slowdown = 0.0;
ANNOTATE_BENIGN_RACE(&slowdown, "safe hack");
int local_slowdown = slowdown;
if (local_slowdown == 0.0) {
char *env = getenv("VALGRIND_SLOWDOWN");
slowdown = local_slowdown = env ? atof(env) : 50.0;
}
return local_slowdown;
}

View File

@ -457,6 +457,19 @@ void AnnotateFlushState(const char *file, int line);
*/
int RunningOnValgrind(void);
/* ValgrindSlowdown returns:
* 1.0, if (RunningOnValgrind() == 0)
* 50.0, if (RunningOnValgrind() != 0 && getenv("VALGRIND_SLOWDOWN") == NULL)
* atof(getenv("VALGRIND_SLOWDOWN")) otherwise
This function can be used to scale timeout values:
EXAMPLE:
for (;;) {
DoExpensiveBackgroundTask();
SleepForSeconds(5 * ValgrindSlowdown());
}
*/
double ValgrindSlowdown();
#ifdef __cplusplus
}
#endif

View File

@ -45,7 +45,7 @@
// Generic allocator class for STL objects
// that uses a given type-less allocator Alloc, which must provide:
// static void* Alloc::Allocate(size_t size);
// static void Alloc::Free(void* ptr);
// static void Alloc::Free(void* ptr, size_t size);
//
// STL_Allocator<T, MyAlloc> provides the same thread-safety
// guarantees as MyAlloc.
@ -82,7 +82,7 @@ class STL_Allocator {
RAW_DCHECK((n * sizeof(T)) / sizeof(T) == n, "n is too big to allocate");
return static_cast<T*>(Alloc::Allocate(n * sizeof(T)));
}
void deallocate(pointer p, size_type /*n*/) { Alloc::Free(p); }
void deallocate(pointer p, size_type n) { Alloc::Free(p, n * sizeof(T)); }
size_type max_size() const { return size_t(-1) / sizeof(T); }

View File

@ -57,9 +57,22 @@ void CentralFreeList::ReleaseListToSpans(void* start) {
}
}
void CentralFreeList::ReleaseToSpans(void* object) {
// MapObjectToSpan should logically be part of ReleaseToSpans. But
// this triggers an optimization bug in gcc 4.5.0. Moving to a
// separate function, and making sure that function isn't inlined,
// seems to fix the problem. It also should be fixed for gcc 4.5.1.
static
#if __GNUC__ == 4 && __GNUC_MINOR__ == 5 && __GNUC_PATCHLEVEL__ == 0
__attribute__ ((noinline))
#endif
Span* MapObjectToSpan(void* object) {
const PageID p = reinterpret_cast<uintptr_t>(object) >> kPageShift;
Span* span = Static::pageheap()->GetDescriptor(p);
return span;
}
void CentralFreeList::ReleaseToSpans(void* object) {
Span* span = MapObjectToSpan(object);
ASSERT(span != NULL);
ASSERT(span->refcount > 0);
@ -266,7 +279,8 @@ void CentralFreeList::Populate() {
Span* span;
{
SpinLockHolder h(Static::pageheap_lock());
span = Static::pageheap()->New(npages, size_class_, kPageSize);
span = Static::pageheap()->New(npages);
if (span) Static::pageheap()->RegisterSizeClass(span, size_class_);
}
if (span == NULL) {
MESSAGE("tcmalloc: allocation failed", npages << kPageShift);
@ -274,6 +288,12 @@ void CentralFreeList::Populate() {
return;
}
ASSERT(span->length == npages);
// Cache sizeclass info eagerly. Locking is not necessary.
// (Instead of being eager, we could just replace any stale info
// about this span, but that seems to be no better in practice.)
for (int i = 0; i < npages; i++) {
Static::pageheap()->CacheSizeClass(span->start + i, size_class_);
}
// Split the block into pieces and add to the free-list
// TODO: coloring of objects to avoid cache conflicts?

View File

@ -54,16 +54,45 @@ typedef uintptr_t Length;
// Configuration
//-------------------------------------------------------------------
// Not all possible combinations of the following parameters make
// sense. In particular, if kMaxSize increases, you may have to
// increase kNumClasses as well.
// Using large pages speeds up the execution at a cost of larger memory use.
// Deallocation may speed up by a factor as the page map gets 8x smaller, so
// lookups in the page map result in fewer L2 cache misses, which translates to
// speedup for application/platform combinations with high L2 cache pressure.
// As the number of size classes increases with large pages, we increase
// the thread cache allowance to avoid passing more free ranges to and from
// central lists. Also, larger pages are less likely to get freed.
// These two factors cause a bounded increase in memory use.
#if defined(TCMALLOC_LARGE_PAGES)
static const size_t kPageShift = 15;
static const size_t kNumClasses = 95;
static const size_t kMaxThreadCacheSize = 4 << 20;
#else
static const size_t kPageShift = 12;
static const size_t kNumClasses = 61;
static const size_t kMaxThreadCacheSize = 2 << 20;
#endif
static const size_t kPageSize = 1 << kPageShift;
static const size_t kMaxSize = 8u * kPageSize;
static const size_t kAlignment = 8;
static const size_t kNumClasses = 61;
static const size_t kLargeSizeClass = 0;
// Default bound on the total amount of thread caches.
static const size_t kDefaultOverallThreadCacheSize = 8u * kMaxThreadCacheSize;
// Lower bound on the per-thread cache sizes
static const size_t kMinThreadCacheSize = kMaxSize * 2;
// The number of bytes one ThreadCache will steal from another when
// the first ThreadCache is forced to Scavenge(), delaying the
// next call to Scavenge for this thread.
static const size_t kStealAmount = 1 << 16;
// The number of times that a deallocation can cause a freelist to
// go over its max_length() before shrinking max_length().
static const int kMaxOverages = 3;
// Maximum length we allow a per-thread free-list to have before we
// move objects from it into the corresponding central free-list. We
// want this big to avoid locking the central free-list too often. It
@ -115,8 +144,10 @@ class SizeMap {
// ...
// 32768 (32768 + 127 + (120<<7)) / 128 376
static const int kMaxSmallSize = 1024;
unsigned char class_array_[377];
static const size_t kClassArraySize =
(((1 << kPageShift) * 8u + 127 + (120 << 7)) >> 7) + 1;
unsigned char class_array_[kClassArraySize];
// Compute index of the class_array[] entry for a given size
static inline int ClassIndex(int s) {
ASSERT(0 <= s);

View File

@ -1,5 +1,10 @@
/* src/config.h.in. Generated from configure.ac by autoheader. */
#ifndef GOOGLE_PERFTOOLS_CONFIG_H_
#define GOOGLE_PERFTOOLS_CONFIG_H_
/* Define to 1 if compiler supports __builtin_stack_pointer */
#undef HAVE_BUILTIN_STACK_POINTER
@ -240,3 +245,5 @@
#include "windows/mingw.h"
#endif
#endif /* #ifndef GOOGLE_PERFTOOLS_CONFIG_H_ */

View File

@ -145,21 +145,22 @@ class PERFTOOLS_DLL_DECL MallocExtension {
// Number of bytes used across all thread caches.
// This property is not writable.
//
// "tcmalloc.slack_bytes"
// Number of bytes allocated from system, but not currently in
// use by malloced objects. I.e., bytes available for
// allocation without needing more bytes from system. It is
// the sum of pageheap_free_bytes and pageheap_unmapped_bytes.
// This property is not writable.
//
// "tcmalloc.pageheap_free_bytes"
// Number of bytes in free, mapped pages in pageheap
// This property is not writable.
// Number of bytes in free, mapped pages in page heap. These
// bytes can be used to fulfill allocation requests. They
// always count towards virtual memory usage, and unless the
// underlying memory is swapped out by the OS, they also count
// towards physical memory usage. This property is not writable.
//
// "tcmalloc.pageheap_unmapped_bytes"
// Number of bytes in free, unmapped pages in pageheap
// This property is not writable.
//
// Number of bytes in free, unmapped pages in page heap.
// These are bytes that have been released back to the OS,
// possibly by one of the MallocExtension "Release" calls.
// They can be used to fulfill allocation requests, but
// typically incur a page fault. They always count towards
// virtual memory usage, and depending on the OS, typically
// do not count towards physical memory usage. This property
// is not writable.
// -------------------------------------------------------------------
// Get the named "property"'s value. Returns true if the property

View File

@ -304,6 +304,9 @@ class HeapLeakChecker::Allocator {
if (p) alloc_count_ -= 1;
LowLevelAlloc::Free(p);
}
static void Free(void* p, size_t /* n */) {
Free(p);
}
// destruct, free, and make *p to be NULL
template<typename T> static void DeleteAndNull(T** p) {
(*p)->~T();

View File

@ -119,9 +119,7 @@ do { \
#ifndef NDEBUG
#define ASSERT(cond) CHECK_CONDITION(cond)
#else
#define ASSERT(cond) \
do { \
} while (0 && (cond))
#define ASSERT(cond) ((void) 0)
#endif
// Print into buffer

View File

@ -36,6 +36,8 @@
#ifndef TCMALLOC_LINKED_LIST_H_
#define TCMALLOC_LINKED_LIST_H_
#include <stddef.h>
namespace tcmalloc {
inline void *SLL_Next(void *t) {

View File

@ -231,7 +231,7 @@ class MemoryRegionMap {
static void *Allocate(size_t n) {
return LowLevelAlloc::AllocWithArena(n, arena_);
}
static void Free(const void *p) {
static void Free(const void *p, size_t /* n */) {
LowLevelAlloc::Free(const_cast<void*>(p));
}
};

View File

@ -61,64 +61,49 @@ PageHeap::PageHeap()
}
}
// Returns the minimum number of pages necessary to ensure that an
// allocation of size n can be aligned to the given alignment.
static Length AlignedAllocationSize(Length n, size_t alignment) {
ASSERT(alignment >= kPageSize);
return n + tcmalloc::pages(alignment - kPageSize);
}
Span* PageHeap::New(Length n, size_t sc, size_t align) {
Span* PageHeap::New(Length n) {
ASSERT(Check());
ASSERT(n > 0);
if (align < kPageSize) {
align = kPageSize;
}
Length aligned_size = AlignedAllocationSize(n, align);
// Find first size >= n that has a non-empty list
for (Length s = aligned_size; s < kMaxPages; s++) {
for (Length s = n; s < kMaxPages; s++) {
Span* ll = &free_[s].normal;
// If we're lucky, ll is non-empty, meaning it has a suitable span.
if (!DLL_IsEmpty(ll)) {
ASSERT(ll->next->location == Span::ON_NORMAL_FREELIST);
return Carve(ll->next, n, sc, align);
return Carve(ll->next, n);
}
// Alternatively, maybe there's a usable returned span.
ll = &free_[s].returned;
if (!DLL_IsEmpty(ll)) {
ASSERT(ll->next->location == Span::ON_RETURNED_FREELIST);
return Carve(ll->next, n, sc, align);
return Carve(ll->next, n);
}
// Still no luck, so keep looking in larger classes.
}
Span* result = AllocLarge(n, sc, align);
Span* result = AllocLarge(n);
if (result != NULL) return result;
// Grow the heap and try again
if (!GrowHeap(aligned_size)) {
if (!GrowHeap(n)) {
ASSERT(Check());
return NULL;
}
return AllocLarge(n, sc, align);
return AllocLarge(n);
}
Span* PageHeap::AllocLarge(Length n, size_t sc, size_t align) {
// Find the best span (closest to n in size).
Span* PageHeap::AllocLarge(Length n) {
// find the best span (closest to n in size).
// The following loops implements address-ordered best-fit.
Span *best = NULL;
Length aligned_size = AlignedAllocationSize(n, align);
// Search through normal list
for (Span* span = large_.normal.next;
span != &large_.normal;
span = span->next) {
if (span->length >= aligned_size) {
if (span->length >= n) {
if ((best == NULL)
|| (span->length < best->length)
|| ((span->length == best->length) && (span->start < best->start))) {
@ -132,7 +117,7 @@ Span* PageHeap::AllocLarge(Length n, size_t sc, size_t align) {
for (Span* span = large_.returned.next;
span != &large_.returned;
span = span->next) {
if (span->length >= aligned_size) {
if (span->length >= n) {
if ((best == NULL)
|| (span->length < best->length)
|| ((span->length == best->length) && (span->start < best->start))) {
@ -142,18 +127,19 @@ Span* PageHeap::AllocLarge(Length n, size_t sc, size_t align) {
}
}
return best == NULL ? NULL : Carve(best, n, sc, align);
return best == NULL ? NULL : Carve(best, n);
}
Span* PageHeap::Split(Span* span, Length n) {
ASSERT(0 < n);
ASSERT(n < span->length);
ASSERT((span->location != Span::IN_USE) || span->sizeclass == 0);
ASSERT(span->location == Span::IN_USE);
ASSERT(span->sizeclass == 0);
Event(span, 'T', n);
const int extra = span->length - n;
Span* leftover = NewSpan(span->start + n, extra);
leftover->location = span->location;
ASSERT(leftover->location == Span::IN_USE);
Event(leftover, 'U', extra);
RecordSpan(leftover);
pagemap_.set(span->start + n - 1, span); // Update map from pageid to span
@ -162,44 +148,25 @@ Span* PageHeap::Split(Span* span, Length n) {
return leftover;
}
Span* PageHeap::Carve(Span* span, Length n, size_t sc, size_t align) {
Span* PageHeap::Carve(Span* span, Length n) {
ASSERT(n > 0);
ASSERT(span->location != Span::IN_USE);
ASSERT(align >= kPageSize);
Length align_pages = align >> kPageShift;
const int old_location = span->location;
RemoveFromFreeList(span);
if (span->start & (align_pages - 1)) {
Length skip_for_alignment = align_pages - (span->start & (align_pages - 1));
Span* aligned = Split(span, skip_for_alignment);
PrependToFreeList(span); // Skip coalescing - no candidates possible
span = aligned;
}
span->location = Span::IN_USE;
Event(span, 'A', n);
const int extra = span->length - n;
ASSERT(extra >= 0);
if (extra > 0) {
Span* leftover = Split(span, n);
PrependToFreeList(leftover);
Span* leftover = NewSpan(span->start + n, extra);
leftover->location = old_location;
Event(leftover, 'S', extra);
RecordSpan(leftover);
PrependToFreeList(leftover); // Skip coalescing - no candidates possible
span->length = n;
pagemap_.set(span->start + n - 1, span);
}
span->location = Span::IN_USE;
span->sizeclass = sc;
Event(span, 'A', n);
// Cache sizeclass info eagerly. Locking is not necessary.
// (Instead of being eager, we could just replace any stale info
// about this span, but that seems to be no better in practice.)
CacheSizeClass(span->start, sc);
if (sc != kLargeSizeClass) {
for (Length i = 1; i < n; i++) {
pagemap_.set(span->start + i, span);
CacheSizeClass(span->start + i, sc);
}
}
ASSERT(Check());
return span;
}
@ -351,6 +318,18 @@ Length PageHeap::ReleaseAtLeastNPages(Length num_pages) {
return released_pages;
}
void PageHeap::RegisterSizeClass(Span* span, size_t sc) {
// Associate span object with all interior pages as well
ASSERT(span->location == Span::IN_USE);
ASSERT(GetDescriptor(span->start) == span);
ASSERT(GetDescriptor(span->start+span->length-1) == span);
Event(span, 'C', sc);
span->sizeclass = sc;
for (Length i = 1; i < span->length-1; i++) {
pagemap_.set(span->start+i, span);
}
}
static double MB(uint64_t bytes) {
return bytes / 1048576.0;
}

View File

@ -93,49 +93,21 @@ class PERFTOOLS_DLL_DECL PageHeap {
public:
PageHeap();
// Allocate a run of "n" pages. Returns NULL if out of memory.
// Caller should not pass "n == 0" -- instead, n should have been
// rounded up already. The span will be used for allocating objects
// with the specifled sizeclass sc (sc must be zero for large
// objects). The first page of the span will be aligned to the value
// specified by align, which must be a power of two.
Span* New(Length n, size_t sc, size_t align);
// Allocate a run of "n" pages. Returns zero if out of memory.
// Caller should not pass "n == 0" -- instead, n should have
// been rounded up already.
Span* New(Length n);
// Delete the span "[p, p+n-1]".
// REQUIRES: span was returned by earlier call to New() and
// has not yet been deleted.
void Delete(Span* span);
// Gets either the size class of addr, if it is a small object, or it's span.
// Return:
// if addr is invalid:
// leave *out_sc and *out_span unchanged and return false;
// if addr is valid and has a small size class:
// *out_sc = the size class
// *out_span = <undefined>
// return true
// if addr is valid and has a large size class:
// *out_sc = kLargeSizeClass
// *out_span = the span pointer
// return true
bool GetSizeClassOrSpan(void* addr, size_t* out_sc, Span** out_span) {
const PageID p = reinterpret_cast<uintptr_t>(addr) >> kPageShift;
size_t cl = GetSizeClassIfCached(p);
Span* span = NULL;
if (cl != kLargeSizeClass) {
ASSERT(cl == GetDescriptor(p)->sizeclass);
} else {
span = GetDescriptor(p);
if (!span) {
return false;
}
cl = span->sizeclass;
}
*out_span = span;
*out_sc = cl;
return true;
}
// Mark an allocated span as being used for small objects of the
// specified size-class.
// REQUIRES: span was returned by an earlier call to New()
// and has not yet been deleted.
void RegisterSizeClass(Span* span, size_t sc);
// Split an allocated span into two spans: one of length "n" pages
// followed by another span of length "span->length - n" pages.
@ -143,29 +115,14 @@ class PERFTOOLS_DLL_DECL PageHeap {
// Returns a pointer to the second span.
//
// REQUIRES: "0 < n < span->length"
// REQUIRES: a) the span is free or b) sizeclass == 0
// REQUIRES: span->location == IN_USE
// REQUIRES: span->sizeclass == 0
Span* Split(Span* span, Length n);
// Return the descriptor for the specified page. Returns NULL if
// this PageID was not allocated previously.
inline Span* GetDescriptor(PageID p) const {
Span* ret = reinterpret_cast<Span*>(pagemap_.get(p));
#ifndef NDEBUG
if (ret != NULL && ret->location == Span::IN_USE) {
size_t cl = GetSizeClassIfCached(p);
// Three cases:
// - The object is not cached
// - The object is cached correctly
// - It is a large object and we're not looking at the first
// page. This happens in coalescing.
ASSERT(cl == kLargeSizeClass || cl == ret->sizeclass ||
(ret->start != p && ret->sizeclass == kLargeSizeClass));
// If the object is sampled, it must have be kLargeSizeClass
ASSERT(ret->sizeclass == kLargeSizeClass || !ret->sample);
}
#endif
return ret;
return reinterpret_cast<Span*>(pagemap_.get(p));
}
// Dump state to stderr
@ -266,7 +223,7 @@ class PERFTOOLS_DLL_DECL PageHeap {
// length exactly "n" and mark it as non-free so it can be returned
// to the client. After all that, decrease free_pages_ by n and
// return span.
Span* Carve(Span* span, Length n, size_t sc, size_t align);
Span* Carve(Span* span, Length n);
void RecordSpan(Span* span) {
pagemap_.set(span->start, span);
@ -277,7 +234,7 @@ class PERFTOOLS_DLL_DECL PageHeap {
// Allocate a large span of length == n. If successful, returns a
// span of exactly the specified length. Else, returns NULL.
Span* AllocLarge(Length n, size_t sc, size_t align);
Span* AllocLarge(Length n);
// Coalesce span with neighboring spans if possible, prepend to
// appropriate free list, and adjust stats.

View File

@ -215,7 +215,7 @@ Call-graph Options:
(i.e. direct leak generators) more visible
Miscellaneous:
--tools=<prefix> Prefix for object tool pathnames
--tools=<prefix or binary:fullpath>[,...] \$PATH for object tool pathnames
--test Run unit tests
--help This message
--version Version information
@ -4331,18 +4331,27 @@ sub ConfigureTool {
my $tool = shift;
my $path;
if ($main::opt_tools ne "") {
# Use a prefix specified by the --tools option...
$path = $main::opt_tools . $tool;
if (!-x $path) {
error("No '$tool' found with prefix specified by --tools $main::opt_tools\n");
# --tools (or $PPROF_TOOLS) is a comma separated list, where each
# item is either a) a pathname prefix, or b) a map of the form
# <tool>:<path>. First we look for an entry of type (b) for our
# tool. If one is found, we use it. Otherwise, we consider all the
# pathname prefixes in turn, until one yields an existing file. If
# none does, we use a default path.
my $tools = $main::opt_tools || $ENV{"PPROF_TOOLS"} || "";
if ($tools =~ m/(,|^)\Q$tool\E:([^,]*)/) {
$path = $2;
# TODO(csilvers): sanity-check that $path exists? Hard if it's relative.
} elsif ($tools) {
foreach my $prefix (split(',', $tools)) {
next if ($prefix =~ /:/); # ignore "tool:fullpath" entries in the list
if (-x $prefix . $tool) {
$path = $prefix . $tool;
last;
}
}
} elsif (exists $ENV{"PPROF_TOOLS"} &&
$ENV{"PPROF_TOOLS"} ne "") {
#... or specified with the PPROF_TOOLS environment variable...
$path = $ENV{"PPROF_TOOLS"} . $tool;
if (!-x $path) {
error("No '$tool' found with prefix specified by PPROF_TOOLS=$ENV{PPROF_TOOLS}\n");
if (!$path) {
error("No '$tool' found with prefix specified by " .
"--tools (or \$PPROF_TOOLS) '$tools'\n");
}
} else {
# ... otherwise use the version that exists in the same directory as

View File

@ -42,16 +42,15 @@ using std::min;
// The approximate gap in bytes between sampling actions.
// I.e., we take one sample approximately once every
// tcmalloc_sample_parameter bytes of allocation
// i.e. about once every 512KB.
// i.e. about once every 512KB if value is 1<<19.
#ifdef NO_TCMALLOC_SAMPLES
DEFINE_int64(tcmalloc_sample_parameter, 0,
"Unused: code is compiled with NO_TCMALLOC_SAMPLES");
#else
DEFINE_int64(tcmalloc_sample_parameter,
EnvToInt64("TCMALLOC_SAMPLE_PARAMETER", 1<<19),
EnvToInt64("TCMALLOC_SAMPLE_PARAMETER", 0),
"The approximate gap in bytes between sampling actions. "
"This must be between 1 and 1<<58.");
// Note: there are other places in this file where the number 19 occurs.
"This must be between 1 and 2^58.");
#endif
namespace tcmalloc {

View File

@ -60,10 +60,6 @@ struct Span {
int value[64];
#endif
void* start_ptr() {
return reinterpret_cast<void*>(start << kPageShift);
}
// What freelist the span is on: IN_USE if on none, or normal or returned
enum { IN_USE, ON_NORMAL_FREELIST, ON_RETURNED_FREELIST };
};

View File

@ -469,6 +469,7 @@ static void DumpStats(TCMalloc_Printer* out, int level) {
"MALLOC: %12" PRIu64 " Spans in use\n"
"MALLOC: %12" PRIu64 " Thread heaps in use\n"
"MALLOC: %12" PRIu64 " (%7.1f MB) Metadata allocated\n"
"MALLOC: %12" PRIu64 " Tcmalloc page size\n"
"------------------------------------------------\n",
stats.pageheap.system_bytes, stats.pageheap.system_bytes / MB,
bytes_in_use, bytes_in_use / MB,
@ -479,7 +480,8 @@ static void DumpStats(TCMalloc_Printer* out, int level) {
stats.thread_bytes, stats.thread_bytes / MB,
uint64_t(Static::span_allocator()->inuse()),
uint64_t(ThreadCache::HeapsInUse()),
stats.metadata_bytes, stats.metadata_bytes / MB);
stats.metadata_bytes, stats.metadata_bytes / MB,
uint64_t(kPageSize));
}
static void PrintStats(int level) {
@ -637,9 +639,8 @@ class TCMallocImplementation : public MallocExtension {
}
if (strcmp(name, "tcmalloc.slack_bytes") == 0) {
// We assume that bytes in the page heap are not fragmented too
// badly, and are therefore available for allocation without
// growing the pageheap system byte count.
// Kept for backwards compatibility. Now defined externally as:
// pageheap_free_bytes + pageheap_unmapped_bytes.
SpinLockHolder l(Static::pageheap_lock());
PageHeap::Stats stats = Static::pageheap()->stats();
*value = stats.free_bytes + stats.unmapped_bytes;
@ -798,25 +799,22 @@ static TCMallocGuard module_enter_exit_hook;
// Helpers for the exported routines below
//-------------------------------------------------------------------
static inline bool CheckCachedSizeClass(void *ptr) {
PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
size_t cached_value = Static::pageheap()->GetSizeClassIfCached(p);
return cached_value == 0 ||
cached_value == Static::pageheap()->GetDescriptor(p)->sizeclass;
}
static inline void* CheckedMallocResult(void *result) {
Span* fetched_span;
size_t cl;
if (result != NULL) {
ASSERT(Static::pageheap()->GetSizeClassOrSpan(result, &cl, &fetched_span));
}
ASSERT(result == NULL || CheckCachedSizeClass(result));
return result;
}
static inline void* SpanToMallocResult(Span *span) {
Span* fetched_span = NULL;
size_t cl = 0;
ASSERT(Static::pageheap()->GetSizeClassOrSpan(span->start_ptr(),
&cl, &fetched_span));
ASSERT(cl == kLargeSizeClass);
ASSERT(span == fetched_span);
return span->start_ptr();
Static::pageheap()->CacheSizeClass(span->start, 0);
return
CheckedMallocResult(reinterpret_cast<void*>(span->start << kPageShift));
}
static void* DoSampledAllocation(size_t size) {
@ -827,8 +825,7 @@ static void* DoSampledAllocation(size_t size) {
SpinLockHolder h(Static::pageheap_lock());
// Allocate span
Span *span = Static::pageheap()->New(tcmalloc::pages(size == 0 ? 1 : size),
kLargeSizeClass, kPageSize);
Span *span = Static::pageheap()->New(tcmalloc::pages(size == 0 ? 1 : size));
if (span == NULL) {
return NULL;
}
@ -919,7 +916,7 @@ inline void* do_malloc_pages(ThreadCache* heap, size_t size) {
report_large = should_report_large(num_pages);
} else {
SpinLockHolder h(Static::pageheap_lock());
Span* span = Static::pageheap()->New(num_pages, kLargeSizeClass, kPageSize);
Span* span = Static::pageheap()->New(num_pages);
result = (span == NULL ? NULL : SpanToMallocResult(span));
report_large = should_report_large(num_pages);
}
@ -975,22 +972,28 @@ static inline ThreadCache* GetCacheIfPresent() {
inline void do_free_with_callback(void* ptr, void (*invalid_free_fn)(void*)) {
if (ptr == NULL) return;
ASSERT(Static::pageheap() != NULL); // Should not call free() before malloc()
Span* span;
size_t cl;
const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
Span* span = NULL;
size_t cl = Static::pageheap()->GetSizeClassIfCached(p);
if (!Static::pageheap()->GetSizeClassOrSpan(ptr, &cl, &span)) {
// result can be false because the pointer passed in is invalid
// (not something returned by malloc or friends), or because the
// pointer was allocated with some other allocator besides
// tcmalloc. The latter can happen if tcmalloc is linked in via
// a dynamic library, but is not listed last on the link line.
// In that case, libraries after it on the link line will
// allocate with libc malloc, but free with tcmalloc's free.
(*invalid_free_fn)(ptr); // Decide how to handle the bad free request
return;
if (cl == 0) {
span = Static::pageheap()->GetDescriptor(p);
if (!span) {
// span can be NULL because the pointer passed in is invalid
// (not something returned by malloc or friends), or because the
// pointer was allocated with some other allocator besides
// tcmalloc. The latter can happen if tcmalloc is linked in via
// a dynamic library, but is not listed last on the link line.
// In that case, libraries after it on the link line will
// allocate with libc malloc, but free with tcmalloc's free.
(*invalid_free_fn)(ptr); // Decide how to handle the bad free request
return;
}
cl = span->sizeclass;
Static::pageheap()->CacheSizeClass(p, cl);
}
if (cl != kLargeSizeClass) {
if (cl != 0) {
ASSERT(!Static::pageheap()->GetDescriptor(p)->sample);
ThreadCache* heap = GetCacheIfPresent();
if (heap != NULL) {
heap->Deallocate(ptr, cl);
@ -1001,7 +1004,8 @@ inline void do_free_with_callback(void* ptr, void (*invalid_free_fn)(void*)) {
}
} else {
SpinLockHolder h(Static::pageheap_lock());
ASSERT(span != NULL && ptr == span->start_ptr());
ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0);
ASSERT(span != NULL && span->start == p);
if (span->sample) {
tcmalloc::DLL_Remove(span);
Static::stacktrace_allocator()->Delete(
@ -1021,17 +1025,20 @@ inline size_t GetSizeWithCallback(void* ptr,
size_t (*invalid_getsize_fn)(void*)) {
if (ptr == NULL)
return 0;
Span* span;
size_t cl;
if (!Static::pageheap()->GetSizeClassOrSpan(ptr, &cl, &span)) {
return (*invalid_getsize_fn)(ptr);
}
if (cl != kLargeSizeClass) {
const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
size_t cl = Static::pageheap()->GetSizeClassIfCached(p);
if (cl != 0) {
return Static::sizemap()->ByteSizeForClass(cl);
} else {
return span->length << kPageShift;
Span *span = Static::pageheap()->GetDescriptor(p);
if (span == NULL) { // means we do not own this memory
return (*invalid_getsize_fn)(ptr);
} else if (span->sizeclass != 0) {
Static::pageheap()->CacheSizeClass(p, span->sizeclass);
return Static::sizemap()->ByteSizeForClass(span->sizeclass);
} else {
return span->length << kPageShift;
}
}
}
@ -1126,10 +1133,39 @@ void* do_memalign(size_t align, size_t size) {
// We will allocate directly from the page heap
SpinLockHolder h(Static::pageheap_lock());
// Any page-level allocation will be fine
Span* span = Static::pageheap()->New(tcmalloc::pages(size),
kLargeSizeClass, align);
return span == NULL ? NULL : SpanToMallocResult(span);
if (align <= kPageSize) {
// Any page-level allocation will be fine
// TODO: We could put the rest of this page in the appropriate
// TODO: cache but it does not seem worth it.
Span* span = Static::pageheap()->New(tcmalloc::pages(size));
return span == NULL ? NULL : SpanToMallocResult(span);
}
// Allocate extra pages and carve off an aligned portion
const Length alloc = tcmalloc::pages(size + align);
Span* span = Static::pageheap()->New(alloc);
if (span == NULL) return NULL;
// Skip starting portion so that we end up aligned
Length skip = 0;
while ((((span->start+skip) << kPageShift) & (align - 1)) != 0) {
skip++;
}
ASSERT(skip < alloc);
if (skip > 0) {
Span* rest = Static::pageheap()->Split(span, skip);
Static::pageheap()->Delete(span);
span = rest;
}
// Skip trailing portion that we do not need to return
const Length needed = tcmalloc::pages(size);
ASSERT(span->length >= needed);
if (span->length > needed) {
Span* trailer = Static::pageheap()->Split(span, needed);
Static::pageheap()->Delete(trailer);
}
return SpanToMallocResult(span);
}
// Helpers for use by exported routines below:

View File

@ -44,13 +44,16 @@
#endif
#include <vector>
#include "base/logging.h"
#include "common.h"
#include <google/malloc_extension.h>
using std::vector;
int main(int argc, char** argv) {
static const int kAllocSize = 36<<10; // Bigger than tcmalloc page size
static const int kTotalAlloc = 400 << 20; // Allocate 400MB in total
// Make kAllocSize larger than tcmalloc page size.
static const int kAllocSize = 9 << kPageShift;
// Allocate 400MB in total.
static const int kTotalAlloc = 400 << 20;
static const int kAllocIterations = kTotalAlloc / kAllocSize;
// Allocate lots of objects

View File

@ -26,7 +26,7 @@ static void TestPageHeap_Stats() {
CheckStats(ph, 0, 0, 0);
// Allocate a span 's1'
tcmalloc::Span* s1 = ph->New(256, kLargeSizeClass, kPageSize);
tcmalloc::Span* s1 = ph->New(256);
CheckStats(ph, 256, 0, 0);
// Split span 's1' into 's1', 's2'. Delete 's2'

View File

@ -80,7 +80,7 @@ struct FunctionAndId {
int id;
};
#if defined(NO_THREADS) || !(defined(HAVE_PTHREADS) || defined(_WIN32))
#if defined(NO_THREADS) || !(defined(HAVE_PTHREAD) || defined(_WIN32))
extern "C" void RunThread(void (*fn)()) {
(*fn)();

View File

@ -42,7 +42,8 @@ using std::min;
using std::max;
DEFINE_int64(tcmalloc_max_total_thread_cache_bytes,
EnvToInt64("TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES", 16<<20),
EnvToInt64("TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES",
kDefaultOverallThreadCacheSize),
"Bound on the total amount of bytes allocated to "
"thread caches. This bound is not strict, so it is possible "
"for the cache to go over this bound in certain circumstances. ");

View File

@ -63,9 +63,6 @@ inline bool KernelSupportsTLS() {
class ThreadCache {
public:
// Default bound on the total amount of thread caches.
static const size_t kDefaultOverallThreadCacheSize = 16 << 20;
// All ThreadCache objects are kept in a linked list (for stats collection)
ThreadCache* next_;
ThreadCache* prev_;
@ -213,19 +210,6 @@ class ThreadCache {
}
};
// The number of bytes one ThreadCache will steal from another when
// the first ThreadCache is forced to Scavenge(), delaying the
// next call to Scavenge for this thread.
static const size_t kStealAmount = 1 << 16;
// Lower and upper bounds on the per-thread cache sizes
static const size_t kMinThreadCacheSize = kMaxSize * 2; //kStealAmount;
static const size_t kMaxThreadCacheSize = 2 << 20;
// The number of times that a deallocation can cause a freelist to
// go over its max_length() before shrinking max_length().
static const int kMaxOverages = 3;
// Gets and returns an object from the central cache, and, if possible,
// also adds some objects of that size class to this thread cache.
void* FetchFromCentralCache(size_t cl, size_t byte_size);

View File

@ -154,7 +154,7 @@
/* Define to 1 if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H 1
/* Define to 1 if you have the <sys/ucontext.h> header file. */
/* <sys/ucontext.h> is broken on redhat 7 */
#undef HAVE_SYS_UCONTEXT_H
/* Define to 1 if you have the <sys/wait.h> header file. */
@ -172,6 +172,9 @@
/* Define to 1 if you have the <unwind.h> header file. */
#undef HAVE_UNWIND_H
/* Define to 1 if you have the <valgrind.h> header file. */
#undef HAVE_VALGRIND_H
/* define if your compiler has __attribute__ */
#undef HAVE___ATTRIBUTE__

View File

@ -45,10 +45,23 @@
# define PERFTOOLS_NO_ALIGNED_MALLOC 1
#endif
// This must be defined before the windows.h is included. We need at
// least 0x0400 for mutex.h to have access to TryLock, and at least
// 0x0501 for patch_functions.cc to have access to GetModuleHandleEx.
// (This latter is an optimization we could take out if need be.)
#ifndef _WIN32_WINNT
# define _WIN32_WINNT 0x0501
#endif
#include "windows/port.h"
#define HAVE_SNPRINTF 1
// Some mingw distributions have a pthreads wrapper, but it doesn't
// work as well as native windows spinlocks (at least for us). So
// pretend the pthreads wrapper doesn't exist, even when it does.
#undef HAVE_PTHREAD
#endif /* __MINGW32__ */
#endif /* GOOGLE_PERFTOOLS_WINDOWS_MINGW_H_ */