mirror of
https://github.com/gperftools/gperftools
synced 2025-02-22 23:26:49 +00:00
* Default to not sampling in tcmalloc (csilvers)
* Add -DTCMALLOC_LARGE_PAGES: better perf for some workloads (rus) * Extend pprof --tools to allow per-tool configs (csilvers) * Have STL_Allocator pass on # bytes to free (richardfang) * Add a header guard to config.h (csilvers) * DOC: Clean up documentation around tcmalloc.slack_bytes (fikes) * DOC: Document ProfilerFlush, ProfilerStartWithOptions (csilvers) * PORTING: Work around a gcc 4.5.0 optimization bug (csilvers) * PORTING: Use -fno-builtin-malloc and friends when compiling tcmalloc * PORTING: Define _WIN32_WINNT high enough for mingw (csilvers) * PORTING: Work around libtool bug getting deps wrong in some cases * Update README.windows to emphasize $IncludeDir more (csilvers) * Rename README.windows to README_windows.txt (csilvers) git-svn-id: http://gperftools.googlecode.com/svn/trunk@95 6b5cf1ce-ec42-a296-1ba9-69fdba395a50
This commit is contained in:
parent
d8c0276168
commit
cb7393cbe2
64
Makefile.am
64
Makefile.am
@ -17,9 +17,17 @@ endif !WITH_STACK_TRACE
|
||||
# This is mostly based on configure options
|
||||
AM_CXXFLAGS =
|
||||
|
||||
# These are good warnings to turn on by default,
|
||||
# These are good warnings to turn on by default. We also tell gcc
|
||||
# that malloc, free, realloc, mmap, etc. are not builtins (these flags
|
||||
# are supported since gcc 3.1.1). gcc doesn't think most of them are
|
||||
# builtins now in any case, but it's best to be explicit in case that
|
||||
# changes one day. gcc ignores functions it doesn't understand.
|
||||
if GCC
|
||||
AM_CXXFLAGS += -Wall -Wwrite-strings -Woverloaded-virtual -Wno-sign-compare
|
||||
AM_CXXFLAGS += -Wall -Wwrite-strings -Woverloaded-virtual -Wno-sign-compare \
|
||||
-fno-builtin-malloc -fno-builtin-free -fno-builtin-realloc \
|
||||
-fno-builtin-calloc -fno-builtin-cfree \
|
||||
-fno-builtin-memalign -fno-builtin-posix_memalign \
|
||||
-fno-builtin-valloc -fno-builtin-pvalloc
|
||||
endif GCC
|
||||
|
||||
# The -no-undefined flag allows libtool to generate shared libraries for
|
||||
@ -96,7 +104,7 @@ docdir = $(prefix)/share/doc/$(PACKAGE)-$(VERSION)
|
||||
# Add your documentation files (in doc/) in addition to these
|
||||
# top-level boilerplate files. Also add a TODO file if you have one.
|
||||
# We'll add to this later, on a library-by-library basis
|
||||
dist_doc_DATA = AUTHORS COPYING ChangeLog INSTALL NEWS README README.windows \
|
||||
dist_doc_DATA = AUTHORS COPYING ChangeLog INSTALL NEWS README README_windows.txt \
|
||||
TODO
|
||||
|
||||
# The libraries (.so's) you want to install
|
||||
@ -400,7 +408,7 @@ libtcmalloc_minimal_la_SOURCES = $(TCMALLOC_CC) $(TCMALLOC_MINIMAL_INCLUDES)
|
||||
libtcmalloc_minimal_la_CXXFLAGS = -DNO_TCMALLOC_SAMPLES \
|
||||
$(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS)
|
||||
libtcmalloc_minimal_la_LDFLAGS = $(PTHREAD_CFLAGS)
|
||||
libtcmalloc_minimal_la_LIBADD = $(PTHREAD_LIBS) libtcmalloc_minimal_internal.la
|
||||
libtcmalloc_minimal_la_LIBADD = libtcmalloc_minimal_internal.la $(PTHREAD_LIBS)
|
||||
|
||||
# For windows, we're playing around with trying to do some stacktrace
|
||||
# support even with libtcmalloc_minimal. For everyone else, though,
|
||||
@ -442,6 +450,13 @@ tcmalloc_minimal_unittest_SOURCES = src/tests/tcmalloc_unittest.cc \
|
||||
$(TCMALLOC_UNITTEST_INCLUDES)
|
||||
tcmalloc_minimal_unittest_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS)
|
||||
tcmalloc_minimal_unittest_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
|
||||
# We want libtcmalloc last on the link line, but due to a bug in
|
||||
# libtool involving convenience libs, they need to come last on the
|
||||
# link line in order to get dependency ordering right. This is ok:
|
||||
# convenience libraries are .a's, so tcmalloc is still the last .so.
|
||||
# We also put pthreads after tcmalloc, because some pthread
|
||||
# implementations define their own malloc, and we need to go on the
|
||||
# first linkline to make sure our malloc 'wins'.
|
||||
tcmalloc_minimal_unittest_LDADD = $(LIBTCMALLOC_MINIMAL) \
|
||||
liblogging.la $(PTHREAD_LIBS)
|
||||
|
||||
@ -750,13 +765,13 @@ libtcmalloc_internal_la_SOURCES = $(libtcmalloc_minimal_internal_la_SOURCES) \
|
||||
libtcmalloc_internal_la_CXXFLAGS = $(PTHREAD_CFLAGS) -DNDEBUG \
|
||||
$(AM_CXXFLAGS) $(NO_EXCEPTIONS)
|
||||
libtcmalloc_internal_la_LDFLAGS = $(PTHREAD_CFLAGS)
|
||||
libtcmalloc_internal_la_LIBADD = $(PTHREAD_LIBS) libstacktrace.la
|
||||
libtcmalloc_internal_la_LIBADD = libstacktrace.la $(PTHREAD_LIBS)
|
||||
|
||||
lib_LTLIBRARIES += libtcmalloc.la
|
||||
libtcmalloc_la_SOURCES = $(TCMALLOC_CC) $(TCMALLOC_INCLUDES)
|
||||
libtcmalloc_la_CXXFLAGS = $(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS)
|
||||
libtcmalloc_la_LDFLAGS = $(PTHREAD_CFLAGS)
|
||||
libtcmalloc_la_LIBADD = $(PTHREAD_LIBS) libtcmalloc_internal.la
|
||||
libtcmalloc_la_LIBADD = libtcmalloc_internal.la $(PTHREAD_LIBS)
|
||||
|
||||
if WITH_HEAP_CHECKER
|
||||
# heap-checker-bcad is last, in hopes its global ctor will run first.
|
||||
@ -789,6 +804,13 @@ tcmalloc_unittest_SOURCES = src/tests/tcmalloc_unittest.cc \
|
||||
$(TCMALLOC_UNITTEST_INCLUDES)
|
||||
tcmalloc_unittest_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS)
|
||||
tcmalloc_unittest_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
|
||||
# We want libtcmalloc last on the link line, but due to a bug in
|
||||
# libtool involving convenience libs, they need to come last on the
|
||||
# link line in order to get dependency ordering right. This is ok:
|
||||
# convenience libraries are .a's, so tcmalloc is still the last .so.
|
||||
# We also put pthreads after tcmalloc, because some pthread
|
||||
# implementations define their own malloc, and we need to go on the
|
||||
# first linkline to make sure our malloc 'wins'.
|
||||
tcmalloc_unittest_LDADD = $(LIBTCMALLOC) liblogging.la $(PTHREAD_LIBS)
|
||||
|
||||
# This makes sure it's safe to link in both tcmalloc and
|
||||
@ -803,6 +825,13 @@ tcmalloc_both_unittest_SOURCES = src/tests/tcmalloc_unittest.cc \
|
||||
tcmalloc_both_unittest_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS)
|
||||
tcmalloc_both_unittest_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
|
||||
if WITH_CPU_PROFILER
|
||||
# We want libtcmalloc last on the link line, but due to a bug in
|
||||
# libtool involving convenience libs, they need to come last on the
|
||||
# link line in order to get dependency ordering right. This is ok:
|
||||
# convenience libraries are .a's, so tcmalloc is still the last .so.
|
||||
# We also put pthreads after tcmalloc, because some pthread
|
||||
# implementations define their own malloc, and we need to go on the
|
||||
# first linkline to make sure our malloc 'wins'.
|
||||
tcmalloc_both_unittest_LDADD = $(LIBTCMALLOC) $(LIBTCMALLOC_MINIMAL) \
|
||||
libprofiler.la liblogging.la $(PTHREAD_LIBS)
|
||||
else
|
||||
@ -822,6 +851,10 @@ raw_printer_test_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS)
|
||||
raw_printer_test_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
|
||||
raw_printer_test_LDADD = $(LIBTCMALLOC) $(PTHREAD_LIBS)
|
||||
|
||||
# sampler_test and sampling_test both require sampling to be turned
|
||||
# on, which it's not by default. Use the "standard" value of 2^19.
|
||||
TESTS_ENVIRONMENT += TCMALLOC_SAMPLE_PARAMETER=524288
|
||||
|
||||
TESTS += sampler_test
|
||||
WINDOWS_PROJECTS += vsprojects/sampler_test/sampler_test.vcproj
|
||||
sampler_test_SOURCES = src/tests/sampler_test.cc \
|
||||
@ -909,8 +942,14 @@ heap_checker_unittest_SOURCES = src/tests/heap-checker_unittest.cc \
|
||||
$(HEAP_CHECKER_UNITTEST_INCLUDES)
|
||||
heap_checker_unittest_CXXFLAGS = -g $(PTHREAD_CFLAGS) $(AM_CXXFLAGS)
|
||||
heap_checker_unittest_LDFLAGS = -g $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
|
||||
# tcmalloc has to be specified last!
|
||||
heap_checker_unittest_LDADD = $(PTHREAD_LIBS) liblogging.la $(LIBTCMALLOC)
|
||||
# We want libtcmalloc last on the link line, but due to a bug in
|
||||
# libtool involving convenience libs, they need to come last on the
|
||||
# link line in order to get dependency ordering right. This is ok:
|
||||
# convenience libraries are .a's, so tcmalloc is still the last .so.
|
||||
# We also put pthreads after tcmalloc, because some pthread
|
||||
# implementations define their own malloc, and we need to go on the
|
||||
# first linkline to make sure our malloc 'wins'.
|
||||
heap_checker_unittest_LDADD = $(LIBTCMALLOC) liblogging.la $(PTHREAD_LIBS)
|
||||
|
||||
endif WITH_HEAP_CHECKER
|
||||
|
||||
@ -1003,9 +1042,12 @@ noinst_PROGRAMS += heap-checker_debug_unittest
|
||||
heap_checker_debug_unittest_SOURCES = $(heap_checker_unittest_SOURCES)
|
||||
heap_checker_debug_unittest_CXXFLAGS = $(heap_checker_unittest_CXXFLAGS)
|
||||
heap_checker_debug_unittest_LDFLAGS = $(heap_checker_unittest_LDFLAGS)
|
||||
# tcmalloc has to be specified last!
|
||||
heap_checker_debug_unittest_LDADD = $(PTHREAD_LIBS) liblogging.la \
|
||||
libtcmalloc_debug.la
|
||||
# We want libtcmalloc last on the link line, but due to a bug in
|
||||
# libtool involving convenience libs, they need to come last on the
|
||||
# link line in order to get dependency ordering right. This is ok:
|
||||
# convenience libraries are .a's, so tcmalloc is still the last .so.
|
||||
heap_checker_debug_unittest_LDADD = libtcmalloc_debug.la liblogging.la \
|
||||
$(PTHREAD_LIBS)
|
||||
|
||||
endif WITH_HEAP_CHECKER
|
||||
endif WITH_DEBUGALLOC
|
||||
|
103
Makefile.in
103
Makefile.in
@ -46,8 +46,17 @@ build_triplet = @build@
|
||||
host_triplet = @host@
|
||||
@WITH_STACK_TRACE_FALSE@am__append_1 = -DNO_TCMALLOC_SAMPLES
|
||||
|
||||
# These are good warnings to turn on by default,
|
||||
@GCC_TRUE@am__append_2 = -Wall -Wwrite-strings -Woverloaded-virtual -Wno-sign-compare
|
||||
# These are good warnings to turn on by default. We also tell gcc
|
||||
# that malloc, free, realloc, mmap, etc. are not builtins (these flags
|
||||
# are supported since gcc 3.1.1). gcc doesn't think most of them are
|
||||
# builtins now in any case, but it's best to be explicit in case that
|
||||
# changes one day. gcc ignores functions it doesn't understand.
|
||||
@GCC_TRUE@am__append_2 = -Wall -Wwrite-strings -Woverloaded-virtual -Wno-sign-compare \
|
||||
@GCC_TRUE@ -fno-builtin-malloc -fno-builtin-free -fno-builtin-realloc \
|
||||
@GCC_TRUE@ -fno-builtin-calloc -fno-builtin-cfree \
|
||||
@GCC_TRUE@ -fno-builtin-memalign -fno-builtin-posix_memalign \
|
||||
@GCC_TRUE@ -fno-builtin-valloc -fno-builtin-pvalloc
|
||||
|
||||
|
||||
# These are x86-specific, having to do with frame-pointers. In
|
||||
# particular, some x86_64 systems do not insert frame pointers by
|
||||
@ -152,11 +161,15 @@ bin_PROGRAMS =
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ raw_printer_test \
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ sampler_test \
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ sampling_test.sh$(EXEEXT)
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_35 = vsprojects/sampler_test/sampler_test.vcproj
|
||||
|
||||
# sampler_test and sampling_test both require sampling to be turned
|
||||
# on, which it's not by default. Use the "standard" value of 2^19.
|
||||
|
||||
# These unittests often need to run binaries. They're in the current dir
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_36 = BINDIR=. \
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_35 = TCMALLOC_SAMPLE_PARAMETER=524288 \
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ BINDIR=. \
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ TMPDIR=/tmp/perftools
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_36 = vsprojects/sampler_test/sampler_test.vcproj
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__append_37 = $(sampling_test_sh_SOURCES)
|
||||
|
||||
# This is the sub-program used by sampling_test.sh
|
||||
@ -345,8 +358,8 @@ libsysinfo_la_DEPENDENCIES = $(am__DEPENDENCIES_1) \
|
||||
am_libsysinfo_la_OBJECTS = sysinfo.lo $(am__objects_1)
|
||||
libsysinfo_la_OBJECTS = $(am_libsysinfo_la_OBJECTS)
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_la_DEPENDENCIES = \
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__DEPENDENCIES_1) \
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ libtcmalloc_internal.la
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ libtcmalloc_internal.la \
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__DEPENDENCIES_1)
|
||||
am__libtcmalloc_la_SOURCES_DIST = src/tcmalloc.cc src/common.h \
|
||||
src/internal_logging.h src/system-alloc.h \
|
||||
src/packed-cache-inl.h src/base/spinlock.h \
|
||||
@ -394,8 +407,8 @@ libtcmalloc_la_OBJECTS = $(am_libtcmalloc_la_OBJECTS)
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am_libtcmalloc_la_rpath = -rpath \
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(libdir)
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__DEPENDENCIES_3 = \
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__DEPENDENCIES_1) \
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ libtcmalloc_internal.la
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ libtcmalloc_internal.la \
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__DEPENDENCIES_1)
|
||||
@WITH_CPU_PROFILER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_and_profiler_la_DEPENDENCIES = $(am__DEPENDENCIES_3)
|
||||
am__libtcmalloc_and_profiler_la_SOURCES_DIST = src/tcmalloc.cc \
|
||||
src/common.h src/internal_logging.h src/system-alloc.h \
|
||||
@ -486,8 +499,8 @@ libtcmalloc_debug_la_OBJECTS = $(am_libtcmalloc_debug_la_OBJECTS)
|
||||
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am_libtcmalloc_debug_la_rpath = -rpath \
|
||||
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(libdir)
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_internal_la_DEPENDENCIES = \
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__DEPENDENCIES_1) \
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ libstacktrace.la
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ libstacktrace.la \
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__DEPENDENCIES_1)
|
||||
am__libtcmalloc_internal_la_SOURCES_DIST = src/common.cc \
|
||||
src/internal_logging.cc src/system-alloc.cc \
|
||||
src/memfs_malloc.cc src/central_freelist.cc src/page_heap.cc \
|
||||
@ -550,8 +563,8 @@ am__objects_21 = libtcmalloc_internal_la-common.lo \
|
||||
libtcmalloc_internal_la_OBJECTS = \
|
||||
$(am_libtcmalloc_internal_la_OBJECTS)
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am_libtcmalloc_internal_la_rpath =
|
||||
libtcmalloc_minimal_la_DEPENDENCIES = $(am__DEPENDENCIES_1) \
|
||||
libtcmalloc_minimal_internal.la
|
||||
libtcmalloc_minimal_la_DEPENDENCIES = libtcmalloc_minimal_internal.la \
|
||||
$(am__DEPENDENCIES_1)
|
||||
am__libtcmalloc_minimal_la_SOURCES_DIST = src/tcmalloc.cc src/common.h \
|
||||
src/internal_logging.h src/system-alloc.h \
|
||||
src/packed-cache-inl.h src/base/spinlock.h \
|
||||
@ -574,8 +587,8 @@ am__libtcmalloc_minimal_la_SOURCES_DIST = src/tcmalloc.cc src/common.h \
|
||||
am_libtcmalloc_minimal_la_OBJECTS = $(am__objects_22) \
|
||||
$(am__objects_20)
|
||||
libtcmalloc_minimal_la_OBJECTS = $(am_libtcmalloc_minimal_la_OBJECTS)
|
||||
am__DEPENDENCIES_4 = $(am__DEPENDENCIES_1) \
|
||||
libtcmalloc_minimal_internal.la
|
||||
am__DEPENDENCIES_4 = libtcmalloc_minimal_internal.la \
|
||||
$(am__DEPENDENCIES_1)
|
||||
@WITH_DEBUGALLOC_TRUE@libtcmalloc_minimal_debug_la_DEPENDENCIES = \
|
||||
@WITH_DEBUGALLOC_TRUE@ $(am__DEPENDENCIES_4)
|
||||
am__libtcmalloc_minimal_debug_la_SOURCES_DIST = \
|
||||
@ -782,9 +795,9 @@ am__heap_checker_debug_unittest_SOURCES_DIST = \
|
||||
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@am_heap_checker_debug_unittest_OBJECTS = $(am__objects_27)
|
||||
heap_checker_debug_unittest_OBJECTS = \
|
||||
$(am_heap_checker_debug_unittest_OBJECTS)
|
||||
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@heap_checker_debug_unittest_DEPENDENCIES = $(am__DEPENDENCIES_1) \
|
||||
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@heap_checker_debug_unittest_DEPENDENCIES = libtcmalloc_debug.la \
|
||||
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@ liblogging.la \
|
||||
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@ libtcmalloc_debug.la
|
||||
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@ $(am__DEPENDENCIES_1)
|
||||
am__heap_checker_debug_unittest_sh_SOURCES_DIST = \
|
||||
src/tests/heap-checker_unittest.sh
|
||||
am_heap_checker_debug_unittest_sh_OBJECTS =
|
||||
@ -803,8 +816,8 @@ heap_checker_unittest_OBJECTS = $(am_heap_checker_unittest_OBJECTS)
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__DEPENDENCIES_6 = \
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ libtcmalloc.la
|
||||
@WITH_HEAP_CHECKER_TRUE@heap_checker_unittest_DEPENDENCIES = \
|
||||
@WITH_HEAP_CHECKER_TRUE@ $(am__DEPENDENCIES_1) liblogging.la \
|
||||
@WITH_HEAP_CHECKER_TRUE@ $(am__DEPENDENCIES_6)
|
||||
@WITH_HEAP_CHECKER_TRUE@ $(am__DEPENDENCIES_6) liblogging.la \
|
||||
@WITH_HEAP_CHECKER_TRUE@ $(am__DEPENDENCIES_1)
|
||||
am__heap_checker_unittest_sh_SOURCES_DIST = \
|
||||
src/tests/heap-checker_unittest.sh
|
||||
am_heap_checker_unittest_sh_OBJECTS =
|
||||
@ -1329,7 +1342,7 @@ man1dir = $(mandir)/man1
|
||||
NROFF = nroff
|
||||
MANS = $(dist_man_MANS)
|
||||
am__dist_doc_DATA_DIST = AUTHORS COPYING ChangeLog INSTALL NEWS README \
|
||||
README.windows TODO doc/index.html doc/designstyle.css \
|
||||
README_windows.txt TODO doc/index.html doc/designstyle.css \
|
||||
doc/pprof_remote_servers.html doc/tcmalloc.html \
|
||||
doc/overview.gif doc/pageheap.gif doc/spanmap.gif \
|
||||
doc/threadheap.gif doc/t-test1.times.txt \
|
||||
@ -1597,7 +1610,7 @@ noinst_HEADERS = src/google/tcmalloc.h.in
|
||||
# one day we figure it out. Regardless, installing the dot files isn't the
|
||||
# end of the world.
|
||||
dist_doc_DATA = AUTHORS COPYING ChangeLog INSTALL NEWS README \
|
||||
README.windows TODO doc/index.html doc/designstyle.css \
|
||||
README_windows.txt TODO doc/index.html doc/designstyle.css \
|
||||
$(am__append_12) doc/tcmalloc.html doc/overview.gif \
|
||||
doc/pageheap.gif doc/spanmap.gif doc/threadheap.gif \
|
||||
doc/t-test1.times.txt \
|
||||
@ -1658,7 +1671,7 @@ WINDOWS_PROJECTS = google-perftools.sln \
|
||||
vsprojects/realloc_unittest/realloc_unittest.vcproj \
|
||||
vsprojects/stack_trace_table_test/stack_trace_table_test.vcproj \
|
||||
vsprojects/thread_dealloc_unittest/thread_dealloc_unittest.vcproj \
|
||||
$(am__append_35)
|
||||
$(am__append_36)
|
||||
|
||||
# unittests you want to run when people type 'make check'.
|
||||
# Note: tests cannot take any arguments!
|
||||
@ -1690,7 +1703,7 @@ TESTS = low_level_alloc_unittest atomicops_unittest $(am__append_11) \
|
||||
# TESTS_ENVIRONMENT sets environment variables for when you run unittest.
|
||||
# We always get "srcdir" set for free.
|
||||
# We'll add to this later, on a library-by-library basis.
|
||||
TESTS_ENVIRONMENT = $(am__append_13) $(am__append_36)
|
||||
TESTS_ENVIRONMENT = $(am__append_13) $(am__append_35)
|
||||
# All script tests should be added here
|
||||
noinst_SCRIPTS = $(am__append_16) $(am__append_25) $(am__append_37) \
|
||||
$(am__append_40) $(am__append_43) $(am__append_58)
|
||||
@ -1916,7 +1929,7 @@ libtcmalloc_minimal_la_CXXFLAGS = -DNO_TCMALLOC_SAMPLES \
|
||||
$(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS)
|
||||
|
||||
libtcmalloc_minimal_la_LDFLAGS = $(PTHREAD_CFLAGS)
|
||||
libtcmalloc_minimal_la_LIBADD = $(PTHREAD_LIBS) libtcmalloc_minimal_internal.la
|
||||
libtcmalloc_minimal_la_LIBADD = libtcmalloc_minimal_internal.la $(PTHREAD_LIBS)
|
||||
@MINGW_FALSE@LIBTCMALLOC_MINIMAL = libtcmalloc_minimal.la
|
||||
|
||||
# For windows, we're playing around with trying to do some stacktrace
|
||||
@ -1930,6 +1943,13 @@ tcmalloc_minimal_unittest_SOURCES = src/tests/tcmalloc_unittest.cc \
|
||||
|
||||
tcmalloc_minimal_unittest_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS)
|
||||
tcmalloc_minimal_unittest_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
|
||||
# We want libtcmalloc last on the link line, but due to a bug in
|
||||
# libtool involving convenience libs, they need to come last on the
|
||||
# link line in order to get dependency ordering right. This is ok:
|
||||
# convenience libraries are .a's, so tcmalloc is still the last .so.
|
||||
# We also put pthreads after tcmalloc, because some pthread
|
||||
# implementations define their own malloc, and we need to go on the
|
||||
# first linkline to make sure our malloc 'wins'.
|
||||
tcmalloc_minimal_unittest_LDADD = $(LIBTCMALLOC_MINIMAL) \
|
||||
liblogging.la $(PTHREAD_LIBS)
|
||||
|
||||
@ -2098,7 +2118,7 @@ thread_dealloc_unittest_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS)
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(NO_EXCEPTIONS) \
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__append_31)
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_internal_la_LDFLAGS = $(PTHREAD_CFLAGS)
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_internal_la_LIBADD = $(PTHREAD_LIBS) libstacktrace.la
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_internal_la_LIBADD = libstacktrace.la $(PTHREAD_LIBS)
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_la_SOURCES = \
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(TCMALLOC_CC) \
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(TCMALLOC_INCLUDES) \
|
||||
@ -2108,7 +2128,7 @@ thread_dealloc_unittest_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS)
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(AM_CXXFLAGS) \
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__append_32)
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_la_LDFLAGS = $(PTHREAD_CFLAGS)
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_la_LIBADD = $(PTHREAD_LIBS) libtcmalloc_internal.la
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_la_LIBADD = libtcmalloc_internal.la $(PTHREAD_LIBS)
|
||||
@WITH_HEAP_CHECKER_FALSE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@HEAP_CHECKER_SOURCES =
|
||||
|
||||
# heap-checker-bcad is last, in hopes its global ctor will run first.
|
||||
@ -2131,6 +2151,13 @@ thread_dealloc_unittest_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS)
|
||||
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@tcmalloc_unittest_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS)
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@tcmalloc_unittest_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
|
||||
# We want libtcmalloc last on the link line, but due to a bug in
|
||||
# libtool involving convenience libs, they need to come last on the
|
||||
# link line in order to get dependency ordering right. This is ok:
|
||||
# convenience libraries are .a's, so tcmalloc is still the last .so.
|
||||
# We also put pthreads after tcmalloc, because some pthread
|
||||
# implementations define their own malloc, and we need to go on the
|
||||
# first linkline to make sure our malloc 'wins'.
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@tcmalloc_unittest_LDADD = $(LIBTCMALLOC) liblogging.la $(PTHREAD_LIBS)
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@tcmalloc_both_unittest_SOURCES = src/tests/tcmalloc_unittest.cc \
|
||||
@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ src/tests/testutil.h src/tests/testutil.cc \
|
||||
@ -2141,6 +2168,13 @@ thread_dealloc_unittest_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS)
|
||||
@WITH_CPU_PROFILER_FALSE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@tcmalloc_both_unittest_LDADD = $(LIBTCMALLOC) $(LIBTCMALLOC_MINIMAL) \
|
||||
@WITH_CPU_PROFILER_FALSE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ liblogging.la $(PTHREAD_LIBS)
|
||||
|
||||
# We want libtcmalloc last on the link line, but due to a bug in
|
||||
# libtool involving convenience libs, they need to come last on the
|
||||
# link line in order to get dependency ordering right. This is ok:
|
||||
# convenience libraries are .a's, so tcmalloc is still the last .so.
|
||||
# We also put pthreads after tcmalloc, because some pthread
|
||||
# implementations define their own malloc, and we need to go on the
|
||||
# first linkline to make sure our malloc 'wins'.
|
||||
@WITH_CPU_PROFILER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@tcmalloc_both_unittest_LDADD = $(LIBTCMALLOC) $(LIBTCMALLOC_MINIMAL) \
|
||||
@WITH_CPU_PROFILER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ libprofiler.la liblogging.la $(PTHREAD_LIBS)
|
||||
|
||||
@ -2193,8 +2227,14 @@ thread_dealloc_unittest_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS)
|
||||
|
||||
@WITH_HEAP_CHECKER_TRUE@heap_checker_unittest_CXXFLAGS = -g $(PTHREAD_CFLAGS) $(AM_CXXFLAGS)
|
||||
@WITH_HEAP_CHECKER_TRUE@heap_checker_unittest_LDFLAGS = -g $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
|
||||
# tcmalloc has to be specified last!
|
||||
@WITH_HEAP_CHECKER_TRUE@heap_checker_unittest_LDADD = $(PTHREAD_LIBS) liblogging.la $(LIBTCMALLOC)
|
||||
# We want libtcmalloc last on the link line, but due to a bug in
|
||||
# libtool involving convenience libs, they need to come last on the
|
||||
# link line in order to get dependency ordering right. This is ok:
|
||||
# convenience libraries are .a's, so tcmalloc is still the last .so.
|
||||
# We also put pthreads after tcmalloc, because some pthread
|
||||
# implementations define their own malloc, and we need to go on the
|
||||
# first linkline to make sure our malloc 'wins'.
|
||||
@WITH_HEAP_CHECKER_TRUE@heap_checker_unittest_LDADD = $(LIBTCMALLOC) liblogging.la $(PTHREAD_LIBS)
|
||||
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_debug_la_SOURCES = src/debugallocation.cc $(HEAP_CHECKER_SOURCES) \
|
||||
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(TCMALLOC_INCLUDES)
|
||||
|
||||
@ -2227,9 +2267,12 @@ thread_dealloc_unittest_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS)
|
||||
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@heap_checker_debug_unittest_SOURCES = $(heap_checker_unittest_SOURCES)
|
||||
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@heap_checker_debug_unittest_CXXFLAGS = $(heap_checker_unittest_CXXFLAGS)
|
||||
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@heap_checker_debug_unittest_LDFLAGS = $(heap_checker_unittest_LDFLAGS)
|
||||
# tcmalloc has to be specified last!
|
||||
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@heap_checker_debug_unittest_LDADD = $(PTHREAD_LIBS) liblogging.la \
|
||||
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@ libtcmalloc_debug.la
|
||||
# We want libtcmalloc last on the link line, but due to a bug in
|
||||
# libtool involving convenience libs, they need to come last on the
|
||||
# link line in order to get dependency ordering right. This is ok:
|
||||
# convenience libraries are .a's, so tcmalloc is still the last .so.
|
||||
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@heap_checker_debug_unittest_LDADD = libtcmalloc_debug.la liblogging.la \
|
||||
@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@ $(PTHREAD_LIBS)
|
||||
|
||||
|
||||
### ------- CPU profiler
|
||||
|
69
README
69
README
@ -4,34 +4,6 @@ There are known issues with some perftools functionality on x86_64
|
||||
systems. See 64-BIT ISSUES, below.
|
||||
|
||||
|
||||
CPU PROFILER
|
||||
------------
|
||||
See doc/cpu-profiler.html for information about how to use the CPU
|
||||
profiler and analyze its output.
|
||||
|
||||
As a quick-start, do the following after installing this package:
|
||||
|
||||
1) Link your executable with -lprofiler
|
||||
2) Run your executable with the CPUPROFILE environment var set:
|
||||
$ CPUPROFILE=/tmp/prof.out <path/to/binary> [binary args]
|
||||
3) Run pprof to analyze the CPU usage
|
||||
$ pprof <path/to/binary> /tmp/prof.out # -pg-like text output
|
||||
$ pprof --gv <path/to/binary> /tmp/prof.out # really cool graphical output
|
||||
|
||||
There are other environment variables, besides CPUPROFILE, you can set
|
||||
to adjust the cpu-profiler behavior; cf "ENVIRONMENT VARIABLES" below.
|
||||
|
||||
The CPU profiler is available on all unix-based systems we've tested;
|
||||
see INSTALL for more details. It is not currently available on Windows.
|
||||
|
||||
NOTE: CPU profiling doesn't work after fork (unless you immediately
|
||||
do an exec()-like call afterwards). Furthermore, if you do
|
||||
fork, and the child calls exit(), it may corrupt the profile
|
||||
data. You can use _exit() to work around this. We hope to have
|
||||
a fix for both problems in the next release of perftools
|
||||
(hopefully perftools 1.2).
|
||||
|
||||
|
||||
TCMALLOC
|
||||
--------
|
||||
Just link in -ltcmalloc or -ltcmalloc_minimal to get the advantages of
|
||||
@ -42,6 +14,19 @@ tcmalloc functionality is available on all systems we've tested; see
|
||||
INSTALL for more details. See README.windows for instructions on
|
||||
using tcmalloc on Windows.
|
||||
|
||||
NOTE: When compiling with programs with gcc, that you plan to link
|
||||
with libtcmalloc, it's safest to pass in the flags
|
||||
|
||||
-fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free
|
||||
|
||||
when compiling. gcc makes some optimizations assuming it is using its
|
||||
own, built-in malloc; that assumption obviously isn't true with
|
||||
tcmalloc. In practice, we haven't seen any problems with this, but
|
||||
the expected risk is highest for users who register their own malloc
|
||||
hooks with tcmalloc (using google/malloc_hook.h). The risk is lowest
|
||||
for folks who use tcmalloc_minimal (or, of course, who pass in the
|
||||
above flags :-) ).
|
||||
|
||||
|
||||
HEAP PROFILER
|
||||
-------------
|
||||
@ -96,6 +81,34 @@ The heap checker is only available on Linux at this time; see INSTALL
|
||||
for more details.
|
||||
|
||||
|
||||
CPU PROFILER
|
||||
------------
|
||||
See doc/cpu-profiler.html for information about how to use the CPU
|
||||
profiler and analyze its output.
|
||||
|
||||
As a quick-start, do the following after installing this package:
|
||||
|
||||
1) Link your executable with -lprofiler
|
||||
2) Run your executable with the CPUPROFILE environment var set:
|
||||
$ CPUPROFILE=/tmp/prof.out <path/to/binary> [binary args]
|
||||
3) Run pprof to analyze the CPU usage
|
||||
$ pprof <path/to/binary> /tmp/prof.out # -pg-like text output
|
||||
$ pprof --gv <path/to/binary> /tmp/prof.out # really cool graphical output
|
||||
|
||||
There are other environment variables, besides CPUPROFILE, you can set
|
||||
to adjust the cpu-profiler behavior; cf "ENVIRONMENT VARIABLES" below.
|
||||
|
||||
The CPU profiler is available on all unix-based systems we've tested;
|
||||
see INSTALL for more details. It is not currently available on Windows.
|
||||
|
||||
NOTE: CPU profiling doesn't work after fork (unless you immediately
|
||||
do an exec()-like call afterwards). Furthermore, if you do
|
||||
fork, and the child calls exit(), it may corrupt the profile
|
||||
data. You can use _exit() to work around this. We hope to have
|
||||
a fix for both problems in the next release of perftools
|
||||
(hopefully perftools 1.2).
|
||||
|
||||
|
||||
EVERYTHING IN ONE
|
||||
-----------------
|
||||
If you want the CPU profiler, heap profiler, and heap leak-checker to
|
||||
|
9
configure
vendored
9
configure
vendored
@ -21533,6 +21533,15 @@ _ACEOF
|
||||
$as_echo "#define PERFTOOLS_DLL_DECL /**/" >>confdefs.h
|
||||
|
||||
|
||||
# In theory, config.h files shouldn't need a header guard, but we do,
|
||||
# because we (maybe) #include windows/mingw.h from within config.h,
|
||||
# and it #includes other .h files. These all have header guards, so
|
||||
# the end result is if config.h is #included twice, its #undefs get
|
||||
# evaluated twice, but all the ones in mingw.h/etc only get evaluated
|
||||
# once, potentially causing trouble. c.f.
|
||||
# http://code.google.com/p/google-perftools/issues/detail?id=246
|
||||
|
||||
|
||||
# MinGW uses autoconf, but also needs the windows shim routines
|
||||
# (since it doesn't have its own support for, say, pthreads).
|
||||
# This requires us to #include a special header file, and also to
|
||||
|
14
configure.ac
14
configure.ac
@ -301,6 +301,18 @@ AC_DEFINE(PERFTOOLS_DLL_DECL,,
|
||||
internally, to compile the DLL, and every DLL source file
|
||||
#includes "config.h" before anything else.])
|
||||
|
||||
# In theory, config.h files shouldn't need a header guard, but we do,
|
||||
# because we (maybe) #include windows/mingw.h from within config.h,
|
||||
# and it #includes other .h files. These all have header guards, so
|
||||
# the end result is if config.h is #included twice, its #undefs get
|
||||
# evaluated twice, but all the ones in mingw.h/etc only get evaluated
|
||||
# once, potentially causing trouble. c.f.
|
||||
# http://code.google.com/p/google-perftools/issues/detail?id=246
|
||||
AH_TOP([
|
||||
#ifndef GOOGLE_PERFTOOLS_CONFIG_H_
|
||||
#define GOOGLE_PERFTOOLS_CONFIG_H_
|
||||
])
|
||||
|
||||
# MinGW uses autoconf, but also needs the windows shim routines
|
||||
# (since it doesn't have its own support for, say, pthreads).
|
||||
# This requires us to #include a special header file, and also to
|
||||
@ -309,6 +321,8 @@ AH_BOTTOM([
|
||||
#ifdef __MINGW32__
|
||||
#include "windows/mingw.h"
|
||||
#endif
|
||||
|
||||
#endif /* #ifndef GOOGLE_PERFTOOLS_CONFIG_H_ */
|
||||
])
|
||||
AM_CONDITIONAL(MINGW, expr $host : '.*-mingw' >/dev/null 2>&1)
|
||||
|
||||
|
@ -71,6 +71,11 @@ CPUPROFILE with the child's process id).</p>
|
||||
<p>For security reasons, CPU profiling will not write to a file -- and
|
||||
is thus not usable -- for setuid programs.</p>
|
||||
|
||||
<p>See the include-file <code>google/profiler.h</code> for
|
||||
advanced-use functions, including <code>ProfilerFlush()</code> and
|
||||
<code>ProfilerStartWithOptions()</code>.</p>
|
||||
|
||||
|
||||
<H2>Modifying Runtime Behavior</H2>
|
||||
|
||||
<p>You can more finely control the behavior of the CPU profiler via
|
||||
|
@ -462,11 +462,15 @@ environment variables.</p>
|
||||
|
||||
<tr valign=top>
|
||||
<td><code>TCMALLOC_SAMPLE_PARAMETER</code></td>
|
||||
<td>default: 524288</td>
|
||||
<td>default: 0</td>
|
||||
<td>
|
||||
The approximate gap between sampling actions. That is, we
|
||||
take one sample approximately once every
|
||||
<code>tcmalloc_sample_parmeter</code> bytes of allocation.
|
||||
This sampled heap information is available via
|
||||
<code>MallocExtension::GetHeapSample()</code> or
|
||||
<code>MallocExtension::ReadStackTraces()</code>. A reasonable
|
||||
value is 524288.
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
@ -674,12 +678,34 @@ you can access them with a call like
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr valign=top>
|
||||
<td><code>tcmalloc.pageheap_free_bytes</code></td>
|
||||
<td>
|
||||
Number of bytes in free, mapped pages in page heap. These bytes
|
||||
can be used to fulfill allocation requests. They always count
|
||||
towards virtual memory usage, and unless the underlying memory is
|
||||
swapped out by the OS, they also count towards physical memory
|
||||
usage.
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr valign=top>
|
||||
<td><code>tcmalloc.pageheap_unmapped_bytes</code></td>
|
||||
<td>
|
||||
Number of bytes in free, unmapped pages in page heap. These are
|
||||
bytes that have been released back to the OS, possibly by one of
|
||||
the MallocExtension "Release" calls. They can be used to fulfill
|
||||
allocation requests, but typically incur a page fault. They
|
||||
always count towards virtual memory usage, and depending on the
|
||||
OS, typically do not count towards physical memory usage.
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr valign=top>
|
||||
<td><code>tcmalloc.slack_bytes</code></td>
|
||||
<td>
|
||||
A measure of memory fragmentation (how much memory is reserved by
|
||||
TCMalloc but unlikely to ever be able to serve an allocation
|
||||
request).
|
||||
Sum of pageheap_free_bytes and pageheap_unmapped_bytes. Provided
|
||||
for backwards compatibility only. Do not use.
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
|
@ -141,8 +141,25 @@ int RunningOnValgrind(void) {
|
||||
static volatile int running_on_valgrind = -1;
|
||||
/* C doesn't have thread-safe initialization of statics, and we
|
||||
don't want to depend on pthread_once here, so hack it. */
|
||||
ANNOTATE_BENIGN_RACE(&running_on_valgrind, "safe hack");
|
||||
int local_running_on_valgrind = running_on_valgrind;
|
||||
if (local_running_on_valgrind == -1)
|
||||
running_on_valgrind = local_running_on_valgrind = GetRunningOnValgrind();
|
||||
return local_running_on_valgrind;
|
||||
}
|
||||
|
||||
/* See the comments in dynamic_annotations.h */
|
||||
double ValgrindSlowdown() {
|
||||
if (RunningOnValgrind() == 0) {
|
||||
return 1.0;
|
||||
}
|
||||
/* Same initialization hack as in RunningOnValgrind(). */
|
||||
static volatile double slowdown = 0.0;
|
||||
ANNOTATE_BENIGN_RACE(&slowdown, "safe hack");
|
||||
int local_slowdown = slowdown;
|
||||
if (local_slowdown == 0.0) {
|
||||
char *env = getenv("VALGRIND_SLOWDOWN");
|
||||
slowdown = local_slowdown = env ? atof(env) : 50.0;
|
||||
}
|
||||
return local_slowdown;
|
||||
}
|
||||
|
@ -457,6 +457,19 @@ void AnnotateFlushState(const char *file, int line);
|
||||
*/
|
||||
int RunningOnValgrind(void);
|
||||
|
||||
/* ValgrindSlowdown returns:
|
||||
* 1.0, if (RunningOnValgrind() == 0)
|
||||
* 50.0, if (RunningOnValgrind() != 0 && getenv("VALGRIND_SLOWDOWN") == NULL)
|
||||
* atof(getenv("VALGRIND_SLOWDOWN")) otherwise
|
||||
This function can be used to scale timeout values:
|
||||
EXAMPLE:
|
||||
for (;;) {
|
||||
DoExpensiveBackgroundTask();
|
||||
SleepForSeconds(5 * ValgrindSlowdown());
|
||||
}
|
||||
*/
|
||||
double ValgrindSlowdown();
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -45,7 +45,7 @@
|
||||
// Generic allocator class for STL objects
|
||||
// that uses a given type-less allocator Alloc, which must provide:
|
||||
// static void* Alloc::Allocate(size_t size);
|
||||
// static void Alloc::Free(void* ptr);
|
||||
// static void Alloc::Free(void* ptr, size_t size);
|
||||
//
|
||||
// STL_Allocator<T, MyAlloc> provides the same thread-safety
|
||||
// guarantees as MyAlloc.
|
||||
@ -82,7 +82,7 @@ class STL_Allocator {
|
||||
RAW_DCHECK((n * sizeof(T)) / sizeof(T) == n, "n is too big to allocate");
|
||||
return static_cast<T*>(Alloc::Allocate(n * sizeof(T)));
|
||||
}
|
||||
void deallocate(pointer p, size_type /*n*/) { Alloc::Free(p); }
|
||||
void deallocate(pointer p, size_type n) { Alloc::Free(p, n * sizeof(T)); }
|
||||
|
||||
size_type max_size() const { return size_t(-1) / sizeof(T); }
|
||||
|
||||
|
@ -57,9 +57,22 @@ void CentralFreeList::ReleaseListToSpans(void* start) {
|
||||
}
|
||||
}
|
||||
|
||||
void CentralFreeList::ReleaseToSpans(void* object) {
|
||||
// MapObjectToSpan should logically be part of ReleaseToSpans. But
|
||||
// this triggers an optimization bug in gcc 4.5.0. Moving to a
|
||||
// separate function, and making sure that function isn't inlined,
|
||||
// seems to fix the problem. It also should be fixed for gcc 4.5.1.
|
||||
static
|
||||
#if __GNUC__ == 4 && __GNUC_MINOR__ == 5 && __GNUC_PATCHLEVEL__ == 0
|
||||
__attribute__ ((noinline))
|
||||
#endif
|
||||
Span* MapObjectToSpan(void* object) {
|
||||
const PageID p = reinterpret_cast<uintptr_t>(object) >> kPageShift;
|
||||
Span* span = Static::pageheap()->GetDescriptor(p);
|
||||
return span;
|
||||
}
|
||||
|
||||
void CentralFreeList::ReleaseToSpans(void* object) {
|
||||
Span* span = MapObjectToSpan(object);
|
||||
ASSERT(span != NULL);
|
||||
ASSERT(span->refcount > 0);
|
||||
|
||||
@ -266,7 +279,8 @@ void CentralFreeList::Populate() {
|
||||
Span* span;
|
||||
{
|
||||
SpinLockHolder h(Static::pageheap_lock());
|
||||
span = Static::pageheap()->New(npages, size_class_, kPageSize);
|
||||
span = Static::pageheap()->New(npages);
|
||||
if (span) Static::pageheap()->RegisterSizeClass(span, size_class_);
|
||||
}
|
||||
if (span == NULL) {
|
||||
MESSAGE("tcmalloc: allocation failed", npages << kPageShift);
|
||||
@ -274,6 +288,12 @@ void CentralFreeList::Populate() {
|
||||
return;
|
||||
}
|
||||
ASSERT(span->length == npages);
|
||||
// Cache sizeclass info eagerly. Locking is not necessary.
|
||||
// (Instead of being eager, we could just replace any stale info
|
||||
// about this span, but that seems to be no better in practice.)
|
||||
for (int i = 0; i < npages; i++) {
|
||||
Static::pageheap()->CacheSizeClass(span->start + i, size_class_);
|
||||
}
|
||||
|
||||
// Split the block into pieces and add to the free-list
|
||||
// TODO: coloring of objects to avoid cache conflicts?
|
||||
|
43
src/common.h
43
src/common.h
@ -54,16 +54,45 @@ typedef uintptr_t Length;
|
||||
// Configuration
|
||||
//-------------------------------------------------------------------
|
||||
|
||||
// Not all possible combinations of the following parameters make
|
||||
// sense. In particular, if kMaxSize increases, you may have to
|
||||
// increase kNumClasses as well.
|
||||
// Using large pages speeds up the execution at a cost of larger memory use.
|
||||
// Deallocation may speed up by a factor as the page map gets 8x smaller, so
|
||||
// lookups in the page map result in fewer L2 cache misses, which translates to
|
||||
// speedup for application/platform combinations with high L2 cache pressure.
|
||||
// As the number of size classes increases with large pages, we increase
|
||||
// the thread cache allowance to avoid passing more free ranges to and from
|
||||
// central lists. Also, larger pages are less likely to get freed.
|
||||
// These two factors cause a bounded increase in memory use.
|
||||
|
||||
#if defined(TCMALLOC_LARGE_PAGES)
|
||||
static const size_t kPageShift = 15;
|
||||
static const size_t kNumClasses = 95;
|
||||
static const size_t kMaxThreadCacheSize = 4 << 20;
|
||||
#else
|
||||
static const size_t kPageShift = 12;
|
||||
static const size_t kNumClasses = 61;
|
||||
static const size_t kMaxThreadCacheSize = 2 << 20;
|
||||
#endif
|
||||
|
||||
static const size_t kPageSize = 1 << kPageShift;
|
||||
static const size_t kMaxSize = 8u * kPageSize;
|
||||
static const size_t kAlignment = 8;
|
||||
static const size_t kNumClasses = 61;
|
||||
static const size_t kLargeSizeClass = 0;
|
||||
|
||||
// Default bound on the total amount of thread caches.
|
||||
static const size_t kDefaultOverallThreadCacheSize = 8u * kMaxThreadCacheSize;
|
||||
|
||||
// Lower bound on the per-thread cache sizes
|
||||
static const size_t kMinThreadCacheSize = kMaxSize * 2;
|
||||
|
||||
// The number of bytes one ThreadCache will steal from another when
|
||||
// the first ThreadCache is forced to Scavenge(), delaying the
|
||||
// next call to Scavenge for this thread.
|
||||
static const size_t kStealAmount = 1 << 16;
|
||||
|
||||
// The number of times that a deallocation can cause a freelist to
|
||||
// go over its max_length() before shrinking max_length().
|
||||
static const int kMaxOverages = 3;
|
||||
|
||||
// Maximum length we allow a per-thread free-list to have before we
|
||||
// move objects from it into the corresponding central free-list. We
|
||||
// want this big to avoid locking the central free-list too often. It
|
||||
@ -115,8 +144,10 @@ class SizeMap {
|
||||
// ...
|
||||
// 32768 (32768 + 127 + (120<<7)) / 128 376
|
||||
static const int kMaxSmallSize = 1024;
|
||||
unsigned char class_array_[377];
|
||||
|
||||
static const size_t kClassArraySize =
|
||||
(((1 << kPageShift) * 8u + 127 + (120 << 7)) >> 7) + 1;
|
||||
unsigned char class_array_[kClassArraySize];
|
||||
|
||||
// Compute index of the class_array[] entry for a given size
|
||||
static inline int ClassIndex(int s) {
|
||||
ASSERT(0 <= s);
|
||||
|
@ -1,5 +1,10 @@
|
||||
/* src/config.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
|
||||
#ifndef GOOGLE_PERFTOOLS_CONFIG_H_
|
||||
#define GOOGLE_PERFTOOLS_CONFIG_H_
|
||||
|
||||
|
||||
/* Define to 1 if compiler supports __builtin_stack_pointer */
|
||||
#undef HAVE_BUILTIN_STACK_POINTER
|
||||
|
||||
@ -240,3 +245,5 @@
|
||||
#include "windows/mingw.h"
|
||||
#endif
|
||||
|
||||
#endif /* #ifndef GOOGLE_PERFTOOLS_CONFIG_H_ */
|
||||
|
||||
|
@ -145,21 +145,22 @@ class PERFTOOLS_DLL_DECL MallocExtension {
|
||||
// Number of bytes used across all thread caches.
|
||||
// This property is not writable.
|
||||
//
|
||||
// "tcmalloc.slack_bytes"
|
||||
// Number of bytes allocated from system, but not currently in
|
||||
// use by malloced objects. I.e., bytes available for
|
||||
// allocation without needing more bytes from system. It is
|
||||
// the sum of pageheap_free_bytes and pageheap_unmapped_bytes.
|
||||
// This property is not writable.
|
||||
//
|
||||
// "tcmalloc.pageheap_free_bytes"
|
||||
// Number of bytes in free, mapped pages in pageheap
|
||||
// This property is not writable.
|
||||
// Number of bytes in free, mapped pages in page heap. These
|
||||
// bytes can be used to fulfill allocation requests. They
|
||||
// always count towards virtual memory usage, and unless the
|
||||
// underlying memory is swapped out by the OS, they also count
|
||||
// towards physical memory usage. This property is not writable.
|
||||
//
|
||||
// "tcmalloc.pageheap_unmapped_bytes"
|
||||
// Number of bytes in free, unmapped pages in pageheap
|
||||
// This property is not writable.
|
||||
//
|
||||
// Number of bytes in free, unmapped pages in page heap.
|
||||
// These are bytes that have been released back to the OS,
|
||||
// possibly by one of the MallocExtension "Release" calls.
|
||||
// They can be used to fulfill allocation requests, but
|
||||
// typically incur a page fault. They always count towards
|
||||
// virtual memory usage, and depending on the OS, typically
|
||||
// do not count towards physical memory usage. This property
|
||||
// is not writable.
|
||||
// -------------------------------------------------------------------
|
||||
|
||||
// Get the named "property"'s value. Returns true if the property
|
||||
|
@ -304,6 +304,9 @@ class HeapLeakChecker::Allocator {
|
||||
if (p) alloc_count_ -= 1;
|
||||
LowLevelAlloc::Free(p);
|
||||
}
|
||||
static void Free(void* p, size_t /* n */) {
|
||||
Free(p);
|
||||
}
|
||||
// destruct, free, and make *p to be NULL
|
||||
template<typename T> static void DeleteAndNull(T** p) {
|
||||
(*p)->~T();
|
||||
|
@ -119,9 +119,7 @@ do { \
|
||||
#ifndef NDEBUG
|
||||
#define ASSERT(cond) CHECK_CONDITION(cond)
|
||||
#else
|
||||
#define ASSERT(cond) \
|
||||
do { \
|
||||
} while (0 && (cond))
|
||||
#define ASSERT(cond) ((void) 0)
|
||||
#endif
|
||||
|
||||
// Print into buffer
|
||||
|
@ -36,6 +36,8 @@
|
||||
#ifndef TCMALLOC_LINKED_LIST_H_
|
||||
#define TCMALLOC_LINKED_LIST_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
namespace tcmalloc {
|
||||
|
||||
inline void *SLL_Next(void *t) {
|
||||
|
@ -231,7 +231,7 @@ class MemoryRegionMap {
|
||||
static void *Allocate(size_t n) {
|
||||
return LowLevelAlloc::AllocWithArena(n, arena_);
|
||||
}
|
||||
static void Free(const void *p) {
|
||||
static void Free(const void *p, size_t /* n */) {
|
||||
LowLevelAlloc::Free(const_cast<void*>(p));
|
||||
}
|
||||
};
|
||||
|
@ -61,64 +61,49 @@ PageHeap::PageHeap()
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the minimum number of pages necessary to ensure that an
|
||||
// allocation of size n can be aligned to the given alignment.
|
||||
static Length AlignedAllocationSize(Length n, size_t alignment) {
|
||||
ASSERT(alignment >= kPageSize);
|
||||
return n + tcmalloc::pages(alignment - kPageSize);
|
||||
}
|
||||
|
||||
Span* PageHeap::New(Length n, size_t sc, size_t align) {
|
||||
Span* PageHeap::New(Length n) {
|
||||
ASSERT(Check());
|
||||
ASSERT(n > 0);
|
||||
|
||||
if (align < kPageSize) {
|
||||
align = kPageSize;
|
||||
}
|
||||
|
||||
Length aligned_size = AlignedAllocationSize(n, align);
|
||||
|
||||
// Find first size >= n that has a non-empty list
|
||||
for (Length s = aligned_size; s < kMaxPages; s++) {
|
||||
for (Length s = n; s < kMaxPages; s++) {
|
||||
Span* ll = &free_[s].normal;
|
||||
// If we're lucky, ll is non-empty, meaning it has a suitable span.
|
||||
if (!DLL_IsEmpty(ll)) {
|
||||
ASSERT(ll->next->location == Span::ON_NORMAL_FREELIST);
|
||||
return Carve(ll->next, n, sc, align);
|
||||
return Carve(ll->next, n);
|
||||
}
|
||||
// Alternatively, maybe there's a usable returned span.
|
||||
ll = &free_[s].returned;
|
||||
if (!DLL_IsEmpty(ll)) {
|
||||
ASSERT(ll->next->location == Span::ON_RETURNED_FREELIST);
|
||||
return Carve(ll->next, n, sc, align);
|
||||
return Carve(ll->next, n);
|
||||
}
|
||||
// Still no luck, so keep looking in larger classes.
|
||||
}
|
||||
|
||||
Span* result = AllocLarge(n, sc, align);
|
||||
Span* result = AllocLarge(n);
|
||||
if (result != NULL) return result;
|
||||
|
||||
// Grow the heap and try again
|
||||
if (!GrowHeap(aligned_size)) {
|
||||
if (!GrowHeap(n)) {
|
||||
ASSERT(Check());
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return AllocLarge(n, sc, align);
|
||||
return AllocLarge(n);
|
||||
}
|
||||
|
||||
Span* PageHeap::AllocLarge(Length n, size_t sc, size_t align) {
|
||||
// Find the best span (closest to n in size).
|
||||
Span* PageHeap::AllocLarge(Length n) {
|
||||
// find the best span (closest to n in size).
|
||||
// The following loops implements address-ordered best-fit.
|
||||
Span *best = NULL;
|
||||
|
||||
Length aligned_size = AlignedAllocationSize(n, align);
|
||||
|
||||
// Search through normal list
|
||||
for (Span* span = large_.normal.next;
|
||||
span != &large_.normal;
|
||||
span = span->next) {
|
||||
if (span->length >= aligned_size) {
|
||||
if (span->length >= n) {
|
||||
if ((best == NULL)
|
||||
|| (span->length < best->length)
|
||||
|| ((span->length == best->length) && (span->start < best->start))) {
|
||||
@ -132,7 +117,7 @@ Span* PageHeap::AllocLarge(Length n, size_t sc, size_t align) {
|
||||
for (Span* span = large_.returned.next;
|
||||
span != &large_.returned;
|
||||
span = span->next) {
|
||||
if (span->length >= aligned_size) {
|
||||
if (span->length >= n) {
|
||||
if ((best == NULL)
|
||||
|| (span->length < best->length)
|
||||
|| ((span->length == best->length) && (span->start < best->start))) {
|
||||
@ -142,18 +127,19 @@ Span* PageHeap::AllocLarge(Length n, size_t sc, size_t align) {
|
||||
}
|
||||
}
|
||||
|
||||
return best == NULL ? NULL : Carve(best, n, sc, align);
|
||||
return best == NULL ? NULL : Carve(best, n);
|
||||
}
|
||||
|
||||
Span* PageHeap::Split(Span* span, Length n) {
|
||||
ASSERT(0 < n);
|
||||
ASSERT(n < span->length);
|
||||
ASSERT((span->location != Span::IN_USE) || span->sizeclass == 0);
|
||||
ASSERT(span->location == Span::IN_USE);
|
||||
ASSERT(span->sizeclass == 0);
|
||||
Event(span, 'T', n);
|
||||
|
||||
const int extra = span->length - n;
|
||||
Span* leftover = NewSpan(span->start + n, extra);
|
||||
leftover->location = span->location;
|
||||
ASSERT(leftover->location == Span::IN_USE);
|
||||
Event(leftover, 'U', extra);
|
||||
RecordSpan(leftover);
|
||||
pagemap_.set(span->start + n - 1, span); // Update map from pageid to span
|
||||
@ -162,44 +148,25 @@ Span* PageHeap::Split(Span* span, Length n) {
|
||||
return leftover;
|
||||
}
|
||||
|
||||
Span* PageHeap::Carve(Span* span, Length n, size_t sc, size_t align) {
|
||||
Span* PageHeap::Carve(Span* span, Length n) {
|
||||
ASSERT(n > 0);
|
||||
ASSERT(span->location != Span::IN_USE);
|
||||
ASSERT(align >= kPageSize);
|
||||
|
||||
Length align_pages = align >> kPageShift;
|
||||
const int old_location = span->location;
|
||||
RemoveFromFreeList(span);
|
||||
|
||||
if (span->start & (align_pages - 1)) {
|
||||
Length skip_for_alignment = align_pages - (span->start & (align_pages - 1));
|
||||
Span* aligned = Split(span, skip_for_alignment);
|
||||
PrependToFreeList(span); // Skip coalescing - no candidates possible
|
||||
span = aligned;
|
||||
}
|
||||
span->location = Span::IN_USE;
|
||||
Event(span, 'A', n);
|
||||
|
||||
const int extra = span->length - n;
|
||||
ASSERT(extra >= 0);
|
||||
if (extra > 0) {
|
||||
Span* leftover = Split(span, n);
|
||||
PrependToFreeList(leftover);
|
||||
Span* leftover = NewSpan(span->start + n, extra);
|
||||
leftover->location = old_location;
|
||||
Event(leftover, 'S', extra);
|
||||
RecordSpan(leftover);
|
||||
PrependToFreeList(leftover); // Skip coalescing - no candidates possible
|
||||
span->length = n;
|
||||
pagemap_.set(span->start + n - 1, span);
|
||||
}
|
||||
|
||||
span->location = Span::IN_USE;
|
||||
span->sizeclass = sc;
|
||||
Event(span, 'A', n);
|
||||
|
||||
// Cache sizeclass info eagerly. Locking is not necessary.
|
||||
// (Instead of being eager, we could just replace any stale info
|
||||
// about this span, but that seems to be no better in practice.)
|
||||
CacheSizeClass(span->start, sc);
|
||||
|
||||
if (sc != kLargeSizeClass) {
|
||||
for (Length i = 1; i < n; i++) {
|
||||
pagemap_.set(span->start + i, span);
|
||||
CacheSizeClass(span->start + i, sc);
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT(Check());
|
||||
return span;
|
||||
}
|
||||
@ -351,6 +318,18 @@ Length PageHeap::ReleaseAtLeastNPages(Length num_pages) {
|
||||
return released_pages;
|
||||
}
|
||||
|
||||
void PageHeap::RegisterSizeClass(Span* span, size_t sc) {
|
||||
// Associate span object with all interior pages as well
|
||||
ASSERT(span->location == Span::IN_USE);
|
||||
ASSERT(GetDescriptor(span->start) == span);
|
||||
ASSERT(GetDescriptor(span->start+span->length-1) == span);
|
||||
Event(span, 'C', sc);
|
||||
span->sizeclass = sc;
|
||||
for (Length i = 1; i < span->length-1; i++) {
|
||||
pagemap_.set(span->start+i, span);
|
||||
}
|
||||
}
|
||||
|
||||
static double MB(uint64_t bytes) {
|
||||
return bytes / 1048576.0;
|
||||
}
|
||||
|
@ -93,49 +93,21 @@ class PERFTOOLS_DLL_DECL PageHeap {
|
||||
public:
|
||||
PageHeap();
|
||||
|
||||
// Allocate a run of "n" pages. Returns NULL if out of memory.
|
||||
// Caller should not pass "n == 0" -- instead, n should have been
|
||||
// rounded up already. The span will be used for allocating objects
|
||||
// with the specifled sizeclass sc (sc must be zero for large
|
||||
// objects). The first page of the span will be aligned to the value
|
||||
// specified by align, which must be a power of two.
|
||||
Span* New(Length n, size_t sc, size_t align);
|
||||
// Allocate a run of "n" pages. Returns zero if out of memory.
|
||||
// Caller should not pass "n == 0" -- instead, n should have
|
||||
// been rounded up already.
|
||||
Span* New(Length n);
|
||||
|
||||
// Delete the span "[p, p+n-1]".
|
||||
// REQUIRES: span was returned by earlier call to New() and
|
||||
// has not yet been deleted.
|
||||
void Delete(Span* span);
|
||||
|
||||
// Gets either the size class of addr, if it is a small object, or it's span.
|
||||
// Return:
|
||||
// if addr is invalid:
|
||||
// leave *out_sc and *out_span unchanged and return false;
|
||||
// if addr is valid and has a small size class:
|
||||
// *out_sc = the size class
|
||||
// *out_span = <undefined>
|
||||
// return true
|
||||
// if addr is valid and has a large size class:
|
||||
// *out_sc = kLargeSizeClass
|
||||
// *out_span = the span pointer
|
||||
// return true
|
||||
bool GetSizeClassOrSpan(void* addr, size_t* out_sc, Span** out_span) {
|
||||
const PageID p = reinterpret_cast<uintptr_t>(addr) >> kPageShift;
|
||||
size_t cl = GetSizeClassIfCached(p);
|
||||
Span* span = NULL;
|
||||
|
||||
if (cl != kLargeSizeClass) {
|
||||
ASSERT(cl == GetDescriptor(p)->sizeclass);
|
||||
} else {
|
||||
span = GetDescriptor(p);
|
||||
if (!span) {
|
||||
return false;
|
||||
}
|
||||
cl = span->sizeclass;
|
||||
}
|
||||
*out_span = span;
|
||||
*out_sc = cl;
|
||||
return true;
|
||||
}
|
||||
// Mark an allocated span as being used for small objects of the
|
||||
// specified size-class.
|
||||
// REQUIRES: span was returned by an earlier call to New()
|
||||
// and has not yet been deleted.
|
||||
void RegisterSizeClass(Span* span, size_t sc);
|
||||
|
||||
// Split an allocated span into two spans: one of length "n" pages
|
||||
// followed by another span of length "span->length - n" pages.
|
||||
@ -143,29 +115,14 @@ class PERFTOOLS_DLL_DECL PageHeap {
|
||||
// Returns a pointer to the second span.
|
||||
//
|
||||
// REQUIRES: "0 < n < span->length"
|
||||
// REQUIRES: a) the span is free or b) sizeclass == 0
|
||||
// REQUIRES: span->location == IN_USE
|
||||
// REQUIRES: span->sizeclass == 0
|
||||
Span* Split(Span* span, Length n);
|
||||
|
||||
// Return the descriptor for the specified page. Returns NULL if
|
||||
// this PageID was not allocated previously.
|
||||
inline Span* GetDescriptor(PageID p) const {
|
||||
Span* ret = reinterpret_cast<Span*>(pagemap_.get(p));
|
||||
#ifndef NDEBUG
|
||||
if (ret != NULL && ret->location == Span::IN_USE) {
|
||||
size_t cl = GetSizeClassIfCached(p);
|
||||
// Three cases:
|
||||
// - The object is not cached
|
||||
// - The object is cached correctly
|
||||
// - It is a large object and we're not looking at the first
|
||||
// page. This happens in coalescing.
|
||||
ASSERT(cl == kLargeSizeClass || cl == ret->sizeclass ||
|
||||
(ret->start != p && ret->sizeclass == kLargeSizeClass));
|
||||
// If the object is sampled, it must have be kLargeSizeClass
|
||||
ASSERT(ret->sizeclass == kLargeSizeClass || !ret->sample);
|
||||
}
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
return reinterpret_cast<Span*>(pagemap_.get(p));
|
||||
}
|
||||
|
||||
// Dump state to stderr
|
||||
@ -266,7 +223,7 @@ class PERFTOOLS_DLL_DECL PageHeap {
|
||||
// length exactly "n" and mark it as non-free so it can be returned
|
||||
// to the client. After all that, decrease free_pages_ by n and
|
||||
// return span.
|
||||
Span* Carve(Span* span, Length n, size_t sc, size_t align);
|
||||
Span* Carve(Span* span, Length n);
|
||||
|
||||
void RecordSpan(Span* span) {
|
||||
pagemap_.set(span->start, span);
|
||||
@ -277,7 +234,7 @@ class PERFTOOLS_DLL_DECL PageHeap {
|
||||
|
||||
// Allocate a large span of length == n. If successful, returns a
|
||||
// span of exactly the specified length. Else, returns NULL.
|
||||
Span* AllocLarge(Length n, size_t sc, size_t align);
|
||||
Span* AllocLarge(Length n);
|
||||
|
||||
// Coalesce span with neighboring spans if possible, prepend to
|
||||
// appropriate free list, and adjust stats.
|
||||
|
33
src/pprof
33
src/pprof
@ -215,7 +215,7 @@ Call-graph Options:
|
||||
(i.e. direct leak generators) more visible
|
||||
|
||||
Miscellaneous:
|
||||
--tools=<prefix> Prefix for object tool pathnames
|
||||
--tools=<prefix or binary:fullpath>[,...] \$PATH for object tool pathnames
|
||||
--test Run unit tests
|
||||
--help This message
|
||||
--version Version information
|
||||
@ -4331,18 +4331,27 @@ sub ConfigureTool {
|
||||
my $tool = shift;
|
||||
my $path;
|
||||
|
||||
if ($main::opt_tools ne "") {
|
||||
# Use a prefix specified by the --tools option...
|
||||
$path = $main::opt_tools . $tool;
|
||||
if (!-x $path) {
|
||||
error("No '$tool' found with prefix specified by --tools $main::opt_tools\n");
|
||||
# --tools (or $PPROF_TOOLS) is a comma separated list, where each
|
||||
# item is either a) a pathname prefix, or b) a map of the form
|
||||
# <tool>:<path>. First we look for an entry of type (b) for our
|
||||
# tool. If one is found, we use it. Otherwise, we consider all the
|
||||
# pathname prefixes in turn, until one yields an existing file. If
|
||||
# none does, we use a default path.
|
||||
my $tools = $main::opt_tools || $ENV{"PPROF_TOOLS"} || "";
|
||||
if ($tools =~ m/(,|^)\Q$tool\E:([^,]*)/) {
|
||||
$path = $2;
|
||||
# TODO(csilvers): sanity-check that $path exists? Hard if it's relative.
|
||||
} elsif ($tools) {
|
||||
foreach my $prefix (split(',', $tools)) {
|
||||
next if ($prefix =~ /:/); # ignore "tool:fullpath" entries in the list
|
||||
if (-x $prefix . $tool) {
|
||||
$path = $prefix . $tool;
|
||||
last;
|
||||
}
|
||||
}
|
||||
} elsif (exists $ENV{"PPROF_TOOLS"} &&
|
||||
$ENV{"PPROF_TOOLS"} ne "") {
|
||||
#... or specified with the PPROF_TOOLS environment variable...
|
||||
$path = $ENV{"PPROF_TOOLS"} . $tool;
|
||||
if (!-x $path) {
|
||||
error("No '$tool' found with prefix specified by PPROF_TOOLS=$ENV{PPROF_TOOLS}\n");
|
||||
if (!$path) {
|
||||
error("No '$tool' found with prefix specified by " .
|
||||
"--tools (or \$PPROF_TOOLS) '$tools'\n");
|
||||
}
|
||||
} else {
|
||||
# ... otherwise use the version that exists in the same directory as
|
||||
|
@ -42,16 +42,15 @@ using std::min;
|
||||
// The approximate gap in bytes between sampling actions.
|
||||
// I.e., we take one sample approximately once every
|
||||
// tcmalloc_sample_parameter bytes of allocation
|
||||
// i.e. about once every 512KB.
|
||||
// i.e. about once every 512KB if value is 1<<19.
|
||||
#ifdef NO_TCMALLOC_SAMPLES
|
||||
DEFINE_int64(tcmalloc_sample_parameter, 0,
|
||||
"Unused: code is compiled with NO_TCMALLOC_SAMPLES");
|
||||
#else
|
||||
DEFINE_int64(tcmalloc_sample_parameter,
|
||||
EnvToInt64("TCMALLOC_SAMPLE_PARAMETER", 1<<19),
|
||||
EnvToInt64("TCMALLOC_SAMPLE_PARAMETER", 0),
|
||||
"The approximate gap in bytes between sampling actions. "
|
||||
"This must be between 1 and 1<<58.");
|
||||
// Note: there are other places in this file where the number 19 occurs.
|
||||
"This must be between 1 and 2^58.");
|
||||
#endif
|
||||
|
||||
namespace tcmalloc {
|
||||
|
@ -60,10 +60,6 @@ struct Span {
|
||||
int value[64];
|
||||
#endif
|
||||
|
||||
void* start_ptr() {
|
||||
return reinterpret_cast<void*>(start << kPageShift);
|
||||
}
|
||||
|
||||
// What freelist the span is on: IN_USE if on none, or normal or returned
|
||||
enum { IN_USE, ON_NORMAL_FREELIST, ON_RETURNED_FREELIST };
|
||||
};
|
||||
|
134
src/tcmalloc.cc
134
src/tcmalloc.cc
@ -469,6 +469,7 @@ static void DumpStats(TCMalloc_Printer* out, int level) {
|
||||
"MALLOC: %12" PRIu64 " Spans in use\n"
|
||||
"MALLOC: %12" PRIu64 " Thread heaps in use\n"
|
||||
"MALLOC: %12" PRIu64 " (%7.1f MB) Metadata allocated\n"
|
||||
"MALLOC: %12" PRIu64 " Tcmalloc page size\n"
|
||||
"------------------------------------------------\n",
|
||||
stats.pageheap.system_bytes, stats.pageheap.system_bytes / MB,
|
||||
bytes_in_use, bytes_in_use / MB,
|
||||
@ -479,7 +480,8 @@ static void DumpStats(TCMalloc_Printer* out, int level) {
|
||||
stats.thread_bytes, stats.thread_bytes / MB,
|
||||
uint64_t(Static::span_allocator()->inuse()),
|
||||
uint64_t(ThreadCache::HeapsInUse()),
|
||||
stats.metadata_bytes, stats.metadata_bytes / MB);
|
||||
stats.metadata_bytes, stats.metadata_bytes / MB,
|
||||
uint64_t(kPageSize));
|
||||
}
|
||||
|
||||
static void PrintStats(int level) {
|
||||
@ -637,9 +639,8 @@ class TCMallocImplementation : public MallocExtension {
|
||||
}
|
||||
|
||||
if (strcmp(name, "tcmalloc.slack_bytes") == 0) {
|
||||
// We assume that bytes in the page heap are not fragmented too
|
||||
// badly, and are therefore available for allocation without
|
||||
// growing the pageheap system byte count.
|
||||
// Kept for backwards compatibility. Now defined externally as:
|
||||
// pageheap_free_bytes + pageheap_unmapped_bytes.
|
||||
SpinLockHolder l(Static::pageheap_lock());
|
||||
PageHeap::Stats stats = Static::pageheap()->stats();
|
||||
*value = stats.free_bytes + stats.unmapped_bytes;
|
||||
@ -798,25 +799,22 @@ static TCMallocGuard module_enter_exit_hook;
|
||||
// Helpers for the exported routines below
|
||||
//-------------------------------------------------------------------
|
||||
|
||||
static inline bool CheckCachedSizeClass(void *ptr) {
|
||||
PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
|
||||
size_t cached_value = Static::pageheap()->GetSizeClassIfCached(p);
|
||||
return cached_value == 0 ||
|
||||
cached_value == Static::pageheap()->GetDescriptor(p)->sizeclass;
|
||||
}
|
||||
|
||||
static inline void* CheckedMallocResult(void *result) {
|
||||
Span* fetched_span;
|
||||
size_t cl;
|
||||
|
||||
if (result != NULL) {
|
||||
ASSERT(Static::pageheap()->GetSizeClassOrSpan(result, &cl, &fetched_span));
|
||||
}
|
||||
|
||||
ASSERT(result == NULL || CheckCachedSizeClass(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline void* SpanToMallocResult(Span *span) {
|
||||
Span* fetched_span = NULL;
|
||||
size_t cl = 0;
|
||||
ASSERT(Static::pageheap()->GetSizeClassOrSpan(span->start_ptr(),
|
||||
&cl, &fetched_span));
|
||||
ASSERT(cl == kLargeSizeClass);
|
||||
ASSERT(span == fetched_span);
|
||||
return span->start_ptr();
|
||||
Static::pageheap()->CacheSizeClass(span->start, 0);
|
||||
return
|
||||
CheckedMallocResult(reinterpret_cast<void*>(span->start << kPageShift));
|
||||
}
|
||||
|
||||
static void* DoSampledAllocation(size_t size) {
|
||||
@ -827,8 +825,7 @@ static void* DoSampledAllocation(size_t size) {
|
||||
|
||||
SpinLockHolder h(Static::pageheap_lock());
|
||||
// Allocate span
|
||||
Span *span = Static::pageheap()->New(tcmalloc::pages(size == 0 ? 1 : size),
|
||||
kLargeSizeClass, kPageSize);
|
||||
Span *span = Static::pageheap()->New(tcmalloc::pages(size == 0 ? 1 : size));
|
||||
if (span == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
@ -919,7 +916,7 @@ inline void* do_malloc_pages(ThreadCache* heap, size_t size) {
|
||||
report_large = should_report_large(num_pages);
|
||||
} else {
|
||||
SpinLockHolder h(Static::pageheap_lock());
|
||||
Span* span = Static::pageheap()->New(num_pages, kLargeSizeClass, kPageSize);
|
||||
Span* span = Static::pageheap()->New(num_pages);
|
||||
result = (span == NULL ? NULL : SpanToMallocResult(span));
|
||||
report_large = should_report_large(num_pages);
|
||||
}
|
||||
@ -975,22 +972,28 @@ static inline ThreadCache* GetCacheIfPresent() {
|
||||
inline void do_free_with_callback(void* ptr, void (*invalid_free_fn)(void*)) {
|
||||
if (ptr == NULL) return;
|
||||
ASSERT(Static::pageheap() != NULL); // Should not call free() before malloc()
|
||||
Span* span;
|
||||
size_t cl;
|
||||
const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
|
||||
Span* span = NULL;
|
||||
size_t cl = Static::pageheap()->GetSizeClassIfCached(p);
|
||||
|
||||
if (!Static::pageheap()->GetSizeClassOrSpan(ptr, &cl, &span)) {
|
||||
// result can be false because the pointer passed in is invalid
|
||||
// (not something returned by malloc or friends), or because the
|
||||
// pointer was allocated with some other allocator besides
|
||||
// tcmalloc. The latter can happen if tcmalloc is linked in via
|
||||
// a dynamic library, but is not listed last on the link line.
|
||||
// In that case, libraries after it on the link line will
|
||||
// allocate with libc malloc, but free with tcmalloc's free.
|
||||
(*invalid_free_fn)(ptr); // Decide how to handle the bad free request
|
||||
return;
|
||||
if (cl == 0) {
|
||||
span = Static::pageheap()->GetDescriptor(p);
|
||||
if (!span) {
|
||||
// span can be NULL because the pointer passed in is invalid
|
||||
// (not something returned by malloc or friends), or because the
|
||||
// pointer was allocated with some other allocator besides
|
||||
// tcmalloc. The latter can happen if tcmalloc is linked in via
|
||||
// a dynamic library, but is not listed last on the link line.
|
||||
// In that case, libraries after it on the link line will
|
||||
// allocate with libc malloc, but free with tcmalloc's free.
|
||||
(*invalid_free_fn)(ptr); // Decide how to handle the bad free request
|
||||
return;
|
||||
}
|
||||
cl = span->sizeclass;
|
||||
Static::pageheap()->CacheSizeClass(p, cl);
|
||||
}
|
||||
|
||||
if (cl != kLargeSizeClass) {
|
||||
if (cl != 0) {
|
||||
ASSERT(!Static::pageheap()->GetDescriptor(p)->sample);
|
||||
ThreadCache* heap = GetCacheIfPresent();
|
||||
if (heap != NULL) {
|
||||
heap->Deallocate(ptr, cl);
|
||||
@ -1001,7 +1004,8 @@ inline void do_free_with_callback(void* ptr, void (*invalid_free_fn)(void*)) {
|
||||
}
|
||||
} else {
|
||||
SpinLockHolder h(Static::pageheap_lock());
|
||||
ASSERT(span != NULL && ptr == span->start_ptr());
|
||||
ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0);
|
||||
ASSERT(span != NULL && span->start == p);
|
||||
if (span->sample) {
|
||||
tcmalloc::DLL_Remove(span);
|
||||
Static::stacktrace_allocator()->Delete(
|
||||
@ -1021,17 +1025,20 @@ inline size_t GetSizeWithCallback(void* ptr,
|
||||
size_t (*invalid_getsize_fn)(void*)) {
|
||||
if (ptr == NULL)
|
||||
return 0;
|
||||
|
||||
Span* span;
|
||||
size_t cl;
|
||||
if (!Static::pageheap()->GetSizeClassOrSpan(ptr, &cl, &span)) {
|
||||
return (*invalid_getsize_fn)(ptr);
|
||||
}
|
||||
|
||||
if (cl != kLargeSizeClass) {
|
||||
const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
|
||||
size_t cl = Static::pageheap()->GetSizeClassIfCached(p);
|
||||
if (cl != 0) {
|
||||
return Static::sizemap()->ByteSizeForClass(cl);
|
||||
} else {
|
||||
return span->length << kPageShift;
|
||||
Span *span = Static::pageheap()->GetDescriptor(p);
|
||||
if (span == NULL) { // means we do not own this memory
|
||||
return (*invalid_getsize_fn)(ptr);
|
||||
} else if (span->sizeclass != 0) {
|
||||
Static::pageheap()->CacheSizeClass(p, span->sizeclass);
|
||||
return Static::sizemap()->ByteSizeForClass(span->sizeclass);
|
||||
} else {
|
||||
return span->length << kPageShift;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1126,10 +1133,39 @@ void* do_memalign(size_t align, size_t size) {
|
||||
// We will allocate directly from the page heap
|
||||
SpinLockHolder h(Static::pageheap_lock());
|
||||
|
||||
// Any page-level allocation will be fine
|
||||
Span* span = Static::pageheap()->New(tcmalloc::pages(size),
|
||||
kLargeSizeClass, align);
|
||||
return span == NULL ? NULL : SpanToMallocResult(span);
|
||||
if (align <= kPageSize) {
|
||||
// Any page-level allocation will be fine
|
||||
// TODO: We could put the rest of this page in the appropriate
|
||||
// TODO: cache but it does not seem worth it.
|
||||
Span* span = Static::pageheap()->New(tcmalloc::pages(size));
|
||||
return span == NULL ? NULL : SpanToMallocResult(span);
|
||||
}
|
||||
|
||||
// Allocate extra pages and carve off an aligned portion
|
||||
const Length alloc = tcmalloc::pages(size + align);
|
||||
Span* span = Static::pageheap()->New(alloc);
|
||||
if (span == NULL) return NULL;
|
||||
|
||||
// Skip starting portion so that we end up aligned
|
||||
Length skip = 0;
|
||||
while ((((span->start+skip) << kPageShift) & (align - 1)) != 0) {
|
||||
skip++;
|
||||
}
|
||||
ASSERT(skip < alloc);
|
||||
if (skip > 0) {
|
||||
Span* rest = Static::pageheap()->Split(span, skip);
|
||||
Static::pageheap()->Delete(span);
|
||||
span = rest;
|
||||
}
|
||||
|
||||
// Skip trailing portion that we do not need to return
|
||||
const Length needed = tcmalloc::pages(size);
|
||||
ASSERT(span->length >= needed);
|
||||
if (span->length > needed) {
|
||||
Span* trailer = Static::pageheap()->Split(span, needed);
|
||||
Static::pageheap()->Delete(trailer);
|
||||
}
|
||||
return SpanToMallocResult(span);
|
||||
}
|
||||
|
||||
// Helpers for use by exported routines below:
|
||||
|
@ -44,13 +44,16 @@
|
||||
#endif
|
||||
#include <vector>
|
||||
#include "base/logging.h"
|
||||
#include "common.h"
|
||||
#include <google/malloc_extension.h>
|
||||
|
||||
using std::vector;
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
static const int kAllocSize = 36<<10; // Bigger than tcmalloc page size
|
||||
static const int kTotalAlloc = 400 << 20; // Allocate 400MB in total
|
||||
// Make kAllocSize larger than tcmalloc page size.
|
||||
static const int kAllocSize = 9 << kPageShift;
|
||||
// Allocate 400MB in total.
|
||||
static const int kTotalAlloc = 400 << 20;
|
||||
static const int kAllocIterations = kTotalAlloc / kAllocSize;
|
||||
|
||||
// Allocate lots of objects
|
||||
|
@ -26,7 +26,7 @@ static void TestPageHeap_Stats() {
|
||||
CheckStats(ph, 0, 0, 0);
|
||||
|
||||
// Allocate a span 's1'
|
||||
tcmalloc::Span* s1 = ph->New(256, kLargeSizeClass, kPageSize);
|
||||
tcmalloc::Span* s1 = ph->New(256);
|
||||
CheckStats(ph, 256, 0, 0);
|
||||
|
||||
// Split span 's1' into 's1', 's2'. Delete 's2'
|
||||
|
@ -80,7 +80,7 @@ struct FunctionAndId {
|
||||
int id;
|
||||
};
|
||||
|
||||
#if defined(NO_THREADS) || !(defined(HAVE_PTHREADS) || defined(_WIN32))
|
||||
#if defined(NO_THREADS) || !(defined(HAVE_PTHREAD) || defined(_WIN32))
|
||||
|
||||
extern "C" void RunThread(void (*fn)()) {
|
||||
(*fn)();
|
||||
|
@ -42,7 +42,8 @@ using std::min;
|
||||
using std::max;
|
||||
|
||||
DEFINE_int64(tcmalloc_max_total_thread_cache_bytes,
|
||||
EnvToInt64("TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES", 16<<20),
|
||||
EnvToInt64("TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES",
|
||||
kDefaultOverallThreadCacheSize),
|
||||
"Bound on the total amount of bytes allocated to "
|
||||
"thread caches. This bound is not strict, so it is possible "
|
||||
"for the cache to go over this bound in certain circumstances. ");
|
||||
|
@ -63,9 +63,6 @@ inline bool KernelSupportsTLS() {
|
||||
|
||||
class ThreadCache {
|
||||
public:
|
||||
// Default bound on the total amount of thread caches.
|
||||
static const size_t kDefaultOverallThreadCacheSize = 16 << 20;
|
||||
|
||||
// All ThreadCache objects are kept in a linked list (for stats collection)
|
||||
ThreadCache* next_;
|
||||
ThreadCache* prev_;
|
||||
@ -213,19 +210,6 @@ class ThreadCache {
|
||||
}
|
||||
};
|
||||
|
||||
// The number of bytes one ThreadCache will steal from another when
|
||||
// the first ThreadCache is forced to Scavenge(), delaying the
|
||||
// next call to Scavenge for this thread.
|
||||
static const size_t kStealAmount = 1 << 16;
|
||||
|
||||
// Lower and upper bounds on the per-thread cache sizes
|
||||
static const size_t kMinThreadCacheSize = kMaxSize * 2; //kStealAmount;
|
||||
static const size_t kMaxThreadCacheSize = 2 << 20;
|
||||
|
||||
// The number of times that a deallocation can cause a freelist to
|
||||
// go over its max_length() before shrinking max_length().
|
||||
static const int kMaxOverages = 3;
|
||||
|
||||
// Gets and returns an object from the central cache, and, if possible,
|
||||
// also adds some objects of that size class to this thread cache.
|
||||
void* FetchFromCentralCache(size_t cl, size_t byte_size);
|
||||
|
@ -154,7 +154,7 @@
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
#define HAVE_SYS_TYPES_H 1
|
||||
|
||||
/* Define to 1 if you have the <sys/ucontext.h> header file. */
|
||||
/* <sys/ucontext.h> is broken on redhat 7 */
|
||||
#undef HAVE_SYS_UCONTEXT_H
|
||||
|
||||
/* Define to 1 if you have the <sys/wait.h> header file. */
|
||||
@ -172,6 +172,9 @@
|
||||
/* Define to 1 if you have the <unwind.h> header file. */
|
||||
#undef HAVE_UNWIND_H
|
||||
|
||||
/* Define to 1 if you have the <valgrind.h> header file. */
|
||||
#undef HAVE_VALGRIND_H
|
||||
|
||||
/* define if your compiler has __attribute__ */
|
||||
#undef HAVE___ATTRIBUTE__
|
||||
|
||||
|
@ -45,10 +45,23 @@
|
||||
# define PERFTOOLS_NO_ALIGNED_MALLOC 1
|
||||
#endif
|
||||
|
||||
// This must be defined before the windows.h is included. We need at
|
||||
// least 0x0400 for mutex.h to have access to TryLock, and at least
|
||||
// 0x0501 for patch_functions.cc to have access to GetModuleHandleEx.
|
||||
// (This latter is an optimization we could take out if need be.)
|
||||
#ifndef _WIN32_WINNT
|
||||
# define _WIN32_WINNT 0x0501
|
||||
#endif
|
||||
|
||||
#include "windows/port.h"
|
||||
|
||||
#define HAVE_SNPRINTF 1
|
||||
|
||||
// Some mingw distributions have a pthreads wrapper, but it doesn't
|
||||
// work as well as native windows spinlocks (at least for us). So
|
||||
// pretend the pthreads wrapper doesn't exist, even when it does.
|
||||
#undef HAVE_PTHREAD
|
||||
|
||||
#endif /* __MINGW32__ */
|
||||
|
||||
#endif /* GOOGLE_PERFTOOLS_WINDOWS_MINGW_H_ */
|
||||
|
Loading…
Reference in New Issue
Block a user