From 8e188310f7d8732d81b7b04f193f89964b7af6c5 Mon Sep 17 00:00:00 2001 From: csilvers Date: Thu, 22 Mar 2007 04:55:49 +0000 Subject: [PATCH] Wed Jun 14 15:11:14 2006 Google Inc. * google-perftools: version 0.8 release * Experimental support for remote profiling added to pprof (many) * Fixed race condition in ProfileData::FlushTable (etune) * Better support for weird /proc maps (maxim, mec) * Fix heap-checker interaction with gdb (markus) * Better 64-bit support in pprof (aruns) * Reduce scavenging cost in tcmalloc by capping NumMoveSize (sanjay) * Cast syscall(SYS_mmap); works on more 64-bit systems now (menage) * Document the text output of pprof! (csilvers) * Better compiler support for no-THREADS and for old compilers (csilvers) * Make libunwind the default stack unwinder for x86-64 (aruns) * Somehow the COPYING file got erased. Regenerate it (csilvers) git-svn-id: http://gperftools.googlecode.com/svn/trunk@23 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 --- COPYING | 28 + ChangeLog | 15 + Makefile.am | 33 +- aclocal.m4 | 184 ++++- configure | 259 ++++++- configure.ac | 2 +- doc/cpu_profiler.html | 18 + doc/pprof_remote_servers.html | 190 ++++++ src/base/linux_syscall_support.h | 125 ++-- src/base/linuxthreads.c | 37 +- src/base/thread_lister.c | 16 +- src/google/heap-checker.h | 13 + src/heap-checker.cc | 123 +++- src/malloc_extension.cc | 15 + src/malloc_hook.cc | 2 +- src/pprof | 1005 +++++++++++++++++++--------- src/profiler.cc | 10 +- src/stacktrace.cc | 9 +- src/stacktrace_libunwind-inl.h | 4 +- src/tcmalloc.cc | 56 +- src/tests/heap-checker_unittest.cc | 26 +- src/tests/tcmalloc_unittest.cc | 48 +- 22 files changed, 1727 insertions(+), 491 deletions(-) create mode 100644 doc/pprof_remote_servers.html diff --git a/COPYING b/COPYING index e69de29..e4956cf 100644 --- a/COPYING +++ b/COPYING @@ -0,0 +1,28 @@ +Copyright (c) 2005, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/ChangeLog b/ChangeLog index 22597c3..90bf766 100644 --- a/ChangeLog +++ b/ChangeLog @@ -85,3 +85,18 @@ Thu Apr 13 20:59:09 2006 Google Inc. * Syscall support for older kernels, including _syscall6 (markus) * Support PIC mode (markus, mbland, iant) * Better support for running in non-threaded contexts (csilvers) + +Wed Jun 14 15:11:14 2006 Google Inc. + + * google-perftools: version 0.8 release + * Experimental support for remote profiling added to pprof (many) + * Fixed race condition in ProfileData::FlushTable (etune) + * Better support for weird /proc maps (maxim, mec) + * Fix heap-checker interaction with gdb (markus) + * Better 64-bit support in pprof (aruns) + * Reduce scavenging cost in tcmalloc by capping NumMoveSize (sanjay) + * Cast syscall(SYS_mmap); works on more 64-bit systems now (menage) + * Document the text output of pprof! (csilvers) + * Better compiler support for no-THREADS and for old compilers (csilvers) + * Make libunwind the default stack unwinder for x86-64 (aruns) + * Somehow the COPYING file got erased. Regenerate it (csilvers) diff --git a/Makefile.am b/Makefile.am index 8d39bbb..83fa966 100644 --- a/Makefile.am +++ b/Makefile.am @@ -115,21 +115,24 @@ libtcmalloc_minimal_la_LDFLAGS = $(PTHREAD_CFLAGS) -export-symbols-regex $(TCMAL libtcmalloc_minimal_la_LIBADD = $(PTHREAD_LIBS) libstacktrace.la ### Unittests -TESTS += malloc_unittest -MALLOC_UNITEST_INCLUDES = src/config.h \ - src/google/malloc_extension.h \ - src/google/malloc_hook.h \ - src/base/basictypes.h \ - src/google/perftools/hash_set.h \ - src/maybe_threads.h -malloc_unittest_SOURCES = src/tests/tcmalloc_unittest.cc \ - src/malloc_hook.cc \ - src/malloc_extension.cc \ - src/maybe_threads.cc \ - $(MALLOC_UNITTEST_INCLUDES) -malloc_unittest_CXXFLAGS = $(PTHREAD_CFLAGS) -malloc_unittest_LDFLAGS = $(PTHREAD_CFLAGS) -malloc_unittest_LDADD = $(PTHREAD_LIBS) + +# Commented out for the moment because malloc(very_big_num) is broken in +# standard libc! At least, in some situations, some of the time. +## TESTS += malloc_unittest +## MALLOC_UNITEST_INCLUDES = src/config.h \ +## src/google/malloc_extension.h \ +## src/google/malloc_hook.h \ +## src/base/basictypes.h \ +## src/google/perftools/hash_set.h \ +## src/maybe_threads.h +## malloc_unittest_SOURCES = src/tests/tcmalloc_unittest.cc \ +## src/malloc_hook.cc \ +## src/malloc_extension.cc \ +## src/maybe_threads.cc \ +## $(MALLOC_UNITTEST_INCLUDES) +## malloc_unittest_CXXFLAGS = $(PTHREAD_CFLAGS) +## malloc_unittest_LDFLAGS = $(PTHREAD_CFLAGS) +## malloc_unittest_LDADD = $(PTHREAD_LIBS) TESTS += tcmalloc_unittest TCMALLOC_UNITTEST_INCLUDES = src/google/malloc_extension.h diff --git a/aclocal.m4 b/aclocal.m4 index 0b68740..d98f614 100644 --- a/aclocal.m4 +++ b/aclocal.m4 @@ -6751,7 +6751,61 @@ AC_DEFUN([AC_COMPILER_CHARACTERISTICS], # This was retrieved from -# http://www.gnu.org/software/ac-archive/htmldoc/acx_pthread.html +# http://0pointer.de/cgi-bin/viewcvs.cgi/trunk/common/acx_pthread.m4?rev=1220 +# See also (perhaps for new versions?) +# http://0pointer.de/cgi-bin/viewcvs.cgi/trunk/common/acx_pthread.m4 + +dnl @synopsis ACX_PTHREAD([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) +dnl +dnl @summary figure out how to build C programs using POSIX threads +dnl +dnl This macro figures out how to build C programs using POSIX threads. +dnl It sets the PTHREAD_LIBS output variable to the threads library and +dnl linker flags, and the PTHREAD_CFLAGS output variable to any special +dnl C compiler flags that are needed. (The user can also force certain +dnl compiler flags/libs to be tested by setting these environment +dnl variables.) +dnl +dnl Also sets PTHREAD_CC to any special C compiler that is needed for +dnl multi-threaded programs (defaults to the value of CC otherwise). +dnl (This is necessary on AIX to use the special cc_r compiler alias.) +dnl +dnl NOTE: You are assumed to not only compile your program with these +dnl flags, but also link it with them as well. e.g. you should link +dnl with $PTHREAD_CC $CFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS +dnl $LIBS +dnl +dnl If you are only building threads programs, you may wish to use +dnl these variables in your default LIBS, CFLAGS, and CC: +dnl +dnl LIBS="$PTHREAD_LIBS $LIBS" +dnl CFLAGS="$CFLAGS $PTHREAD_CFLAGS" +dnl CC="$PTHREAD_CC" +dnl +dnl In addition, if the PTHREAD_CREATE_JOINABLE thread-attribute +dnl constant has a nonstandard name, defines PTHREAD_CREATE_JOINABLE to +dnl that name (e.g. PTHREAD_CREATE_UNDETACHED on AIX). +dnl +dnl ACTION-IF-FOUND is a list of shell commands to run if a threads +dnl library is found, and ACTION-IF-NOT-FOUND is a list of commands to +dnl run it if it is not found. If ACTION-IF-FOUND is not specified, the +dnl default action will define HAVE_PTHREAD. +dnl +dnl Please let the authors know if this macro fails on any platform, or +dnl if you have any other suggestions or comments. This macro was based +dnl on work by SGJ on autoconf scripts for FFTW (www.fftw.org) (with +dnl help from M. Frigo), as well as ac_pthread and hb_pthread macros +dnl posted by Alejandro Forero Cuervo to the autoconf macro repository. +dnl We are also grateful for the helpful feedback of numerous users. +dnl +dnl @category InstalledPackages +dnl @author Steven G. Johnson +dnl @version 2005-06-15 +dnl @license GPLWithACException +dnl +dnl Checks for GCC shared/pthread inconsistency based on work by +dnl Marcin Owsiany + AC_DEFUN([ACX_PTHREAD], [ AC_REQUIRE([AC_CANONICAL_HOST]) @@ -6809,6 +6863,7 @@ acx_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -m # -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it # doesn't hurt to check since this sometimes defines pthreads too; # also defines -D_REENTRANT) +# ... -mt is also the pthreads flag for HP/aCC # pthread: Linux, etcetera # --thread-safe: KAI C++ # pthread-config: use pthread-config program (for GNU Pth library) @@ -6818,13 +6873,13 @@ case "${host_cpu}-${host_os}" in # On Solaris (at least, for some versions), libc contains stubbed # (non-functional) versions of the pthreads routines, so link-based - # tests will erroneously succeed. (We need to link with -pthread or + # tests will erroneously succeed. (We need to link with -pthreads/-mt/ # -lpthread.) (The stubs are missing pthread_cleanup_push, or rather # a function called by this macro, so we could check for that, but # who knows whether they'll stub that too in a future libc.) So, # we'll just look for -pthreads and -lpthread first: - acx_pthread_flags="-pthread -pthreads pthread -mt $acx_pthread_flags" + acx_pthread_flags="-pthreads pthread -mt -pthread $acx_pthread_flags" ;; esac @@ -6841,12 +6896,12 @@ for flag in $acx_pthread_flags; do PTHREAD_CFLAGS="$flag" ;; - pthread-config) - AC_CHECK_PROG(acx_pthread_config, pthread-config, yes, no) - if test x"$acx_pthread_config" = xno; then continue; fi - PTHREAD_CFLAGS="`pthread-config --cflags`" - PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`" - ;; + pthread-config) + AC_CHECK_PROG(acx_pthread_config, pthread-config, yes, no) + if test x"$acx_pthread_config" = xno; then continue; fi + PTHREAD_CFLAGS="`pthread-config --cflags`" + PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`" + ;; *) AC_MSG_CHECKING([for the pthreads library -l$flag]) @@ -6895,12 +6950,12 @@ if test "x$acx_pthread_ok" = xyes; then CFLAGS="$CFLAGS $PTHREAD_CFLAGS" # Detect AIX lossage: JOINABLE attribute is called UNDETACHED. - AC_MSG_CHECKING([for joinable pthread attribute]) - attr_name=unknown - for attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do - AC_TRY_LINK([#include ], [int attr=$attr;], + AC_MSG_CHECKING([for joinable pthread attribute]) + attr_name=unknown + for attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do + AC_TRY_LINK([#include ], [int attr=$attr; return attr;], [attr_name=$attr; break]) - done + done AC_MSG_RESULT($attr_name) if test "$attr_name" != PTHREAD_CREATE_JOINABLE; then AC_DEFINE_UNQUOTED(PTHREAD_CREATE_JOINABLE, $attr_name, @@ -6924,6 +6979,107 @@ if test "x$acx_pthread_ok" = xyes; then # More AIX lossage: must compile with cc_r AC_CHECK_PROG(PTHREAD_CC, cc_r, cc_r, ${CC}) + + # The next part tries to detect GCC inconsistency with -shared on some + # architectures and systems. The problem is that in certain + # configurations, when -shared is specified, GCC "forgets" to + # internally use various flags which are still necessary. + + # First, check whether caller wants us to skip -shared checks + # this is useful + AC_MSG_CHECKING([whether to check for GCC pthread/shared inconsistencies]) + if test x"$GCC" != xyes; then + AC_MSG_RESULT([no]) + else + AC_MSG_RESULT([yes]) + + # In order not to create several levels of indentation, we test + # the value of "$ok" until we find out the cure or run out of + # ideas. + ok="no" + + # + # Prepare the flags + # + save_CFLAGS="$CFLAGS" + save_LIBS="$LIBS" + save_CC="$CC" + # Try with the flags determined by the earlier checks. + # + # -Wl,-z,defs forces link-time symbol resolution, so that the + # linking checks with -shared actually have any value + # + # FIXME: -fPIC is required for -shared on many architectures, + # so we specify it here, but the right way would probably be to + # properly detect whether it is actually required. + CFLAGS="-shared -fPIC -Wl,-z,defs $CFLAGS $PTHREAD_CFLAGS" + LIBS="$PTHREAD_LIBS $LIBS" + CC="$PTHREAD_CC" + + AC_MSG_CHECKING([whether -pthread is sufficient with -shared]) + AC_TRY_LINK([#include ], + [pthread_t th; pthread_join(th, 0); + pthread_attr_init(0); pthread_cleanup_push(0, 0); + pthread_create(0,0,0,0); pthread_cleanup_pop(0); ], + [ok=yes]) + + if test "x$ok" = xyes; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + fi + + # + # Linux gcc on some architectures such as mips/mipsel forgets + # about -lpthread + # + if test x"$ok" = xno; then + AC_MSG_CHECKING([whether -lpthread fixes that]) + LIBS="-lpthread $PTHREAD_LIBS $save_LIBS" + AC_TRY_LINK([#include ], + [pthread_t th; pthread_join(th, 0); + pthread_attr_init(0); pthread_cleanup_push(0, 0); + pthread_create(0,0,0,0); pthread_cleanup_pop(0); ], + [ok=yes]) + + if test "x$ok" = xyes; then + AC_MSG_RESULT([yes]) + PTHREAD_LIBS="-lpthread $PTHREAD_LIBS" + else + AC_MSG_RESULT([no]) + fi + fi + # + # FreeBSD 4.10 gcc forgets to use -lc_r instead of -lc + # + if test x"$ok" = xno; then + AC_MSG_CHECKING([whether -lc_r fixes that]) + LIBS="-lc_r $PTHREAD_LIBS $save_LIBS" + AC_TRY_LINK([#include ], + [pthread_t th; pthread_join(th, 0); + pthread_attr_init(0); pthread_cleanup_push(0, 0); + pthread_create(0,0,0,0); pthread_cleanup_pop(0); ], + [ok=yes]) + + if test "x$ok" = xyes; then + AC_MSG_RESULT([yes]) + PTHREAD_LIBS="-lc_r $PTHREAD_LIBS" + else + AC_MSG_RESULT([no]) + fi + fi + if test x"$ok" = xno; then + # OK, we have run out of ideas + AC_MSG_WARN([Impossible to determine how to use pthreads with shared libraries]) + + # so it's not safe to assume that we may use pthreads + acx_pthread_ok=no + fi + + CFLAGS="$save_CFLAGS" + LIBS="$save_LIBS" + CC="$save_CC" + fi else PTHREAD_CC="$CC" fi diff --git a/configure b/configure index 2a11d9e..9147d97 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.57 for google-perftools 0.7. +# Generated by GNU Autoconf 2.57 for google-perftools 0.8. # # Report bugs to . # @@ -422,8 +422,8 @@ SHELL=${CONFIG_SHELL-/bin/sh} # Identity of this package. PACKAGE_NAME='google-perftools' PACKAGE_TARNAME='google-perftools' -PACKAGE_VERSION='0.7' -PACKAGE_STRING='google-perftools 0.7' +PACKAGE_VERSION='0.8' +PACKAGE_STRING='google-perftools 0.8' PACKAGE_BUGREPORT='opensource@google.com' ac_unique_file="README" @@ -953,7 +953,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures google-perftools 0.7 to adapt to many kinds of systems. +\`configure' configures google-perftools 0.8 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1019,7 +1019,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of google-perftools 0.7:";; + short | recursive ) echo "Configuration of google-perftools 0.8:";; esac cat <<\_ACEOF @@ -1125,7 +1125,7 @@ fi test -n "$ac_init_help" && exit 0 if $ac_init_version; then cat <<\_ACEOF -google-perftools configure 0.7 +google-perftools configure 0.8 generated by GNU Autoconf 2.57 Copyright 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, 2002 @@ -1140,7 +1140,7 @@ cat >&5 <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by google-perftools $as_me 0.7, which was +It was created by google-perftools $as_me 0.8, which was generated by GNU Autoconf 2.57. Invocation command line was $ $0 $@ @@ -1733,7 +1733,7 @@ fi # Define the identity of the package. PACKAGE=google-perftools - VERSION=0.7 + VERSION=0.8 cat >>confdefs.h <<_ACEOF @@ -21171,6 +21171,7 @@ acx_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -m # -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it # doesn't hurt to check since this sometimes defines pthreads too; # also defines -D_REENTRANT) +# ... -mt is also the pthreads flag for HP/aCC # pthread: Linux, etcetera # --thread-safe: KAI C++ # pthread-config: use pthread-config program (for GNU Pth library) @@ -21180,13 +21181,13 @@ case "${host_cpu}-${host_os}" in # On Solaris (at least, for some versions), libc contains stubbed # (non-functional) versions of the pthreads routines, so link-based - # tests will erroneously succeed. (We need to link with -pthread or + # tests will erroneously succeed. (We need to link with -pthreads/-mt/ # -lpthread.) (The stubs are missing pthread_cleanup_push, or rather # a function called by this macro, so we could check for that, but # who knows whether they'll stub that too in a future libc.) So, # we'll just look for -pthreads and -lpthread first: - acx_pthread_flags="-pthread -pthreads pthread -mt $acx_pthread_flags" + acx_pthread_flags="-pthreads pthread -mt -pthread $acx_pthread_flags" ;; esac @@ -21205,8 +21206,8 @@ echo $ECHO_N "checking whether pthreads work with $flag... $ECHO_C" >&6 PTHREAD_CFLAGS="$flag" ;; - pthread-config) - # Extract the first word of "pthread-config", so it can be a program name with args. + pthread-config) + # Extract the first word of "pthread-config", so it can be a program name with args. set dummy pthread-config; ac_word=$2 echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 @@ -21242,10 +21243,10 @@ else echo "${ECHO_T}no" >&6 fi - if test x"$acx_pthread_config" = xno; then continue; fi - PTHREAD_CFLAGS="`pthread-config --cflags`" - PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`" - ;; + if test x"$acx_pthread_config" = xno; then continue; fi + PTHREAD_CFLAGS="`pthread-config --cflags`" + PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`" + ;; *) echo "$as_me:$LINENO: checking for the pthreads library -l$flag" >&5 @@ -21328,11 +21329,11 @@ if test "x$acx_pthread_ok" = xyes; then CFLAGS="$CFLAGS $PTHREAD_CFLAGS" # Detect AIX lossage: JOINABLE attribute is called UNDETACHED. - echo "$as_me:$LINENO: checking for joinable pthread attribute" >&5 + echo "$as_me:$LINENO: checking for joinable pthread attribute" >&5 echo $ECHO_N "checking for joinable pthread attribute... $ECHO_C" >&6 - attr_name=unknown - for attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do - cat >conftest.$ac_ext <<_ACEOF + attr_name=unknown + for attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do + cat >conftest.$ac_ext <<_ACEOF #line $LINENO "configure" /* confdefs.h. */ _ACEOF @@ -21343,7 +21344,7 @@ cat >>conftest.$ac_ext <<_ACEOF int main () { -int attr=$attr; +int attr=$attr; return attr; ; return 0; } @@ -21367,7 +21368,7 @@ sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f conftest.$ac_objext conftest$ac_exeext conftest.$ac_ext - done + done echo "$as_me:$LINENO: result: $attr_name" >&5 echo "${ECHO_T}$attr_name" >&6 if test "$attr_name" != PTHREAD_CREATE_JOINABLE; then @@ -21431,6 +21432,216 @@ else echo "${ECHO_T}no" >&6 fi + + # The next part tries to detect GCC inconsistency with -shared on some + # architectures and systems. The problem is that in certain + # configurations, when -shared is specified, GCC "forgets" to + # internally use various flags which are still necessary. + + # First, check whether caller wants us to skip -shared checks + # this is useful + echo "$as_me:$LINENO: checking whether to check for GCC pthread/shared inconsistencies" >&5 +echo $ECHO_N "checking whether to check for GCC pthread/shared inconsistencies... $ECHO_C" >&6 + if test x"$GCC" != xyes; then + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + else + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + + # In order not to create several levels of indentation, we test + # the value of "$ok" until we find out the cure or run out of + # ideas. + ok="no" + + # + # Prepare the flags + # + save_CFLAGS="$CFLAGS" + save_LIBS="$LIBS" + save_CC="$CC" + # Try with the flags determined by the earlier checks. + # + # -Wl,-z,defs forces link-time symbol resolution, so that the + # linking checks with -shared actually have any value + # + # FIXME: -fPIC is required for -shared on many architectures, + # so we specify it here, but the right way would probably be to + # properly detect whether it is actually required. + CFLAGS="-shared -fPIC -Wl,-z,defs $CFLAGS $PTHREAD_CFLAGS" + LIBS="$PTHREAD_LIBS $LIBS" + CC="$PTHREAD_CC" + + echo "$as_me:$LINENO: checking whether -pthread is sufficient with -shared" >&5 +echo $ECHO_N "checking whether -pthread is sufficient with -shared... $ECHO_C" >&6 + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +int +main () +{ +pthread_t th; pthread_join(th, 0); + pthread_attr_init(0); pthread_cleanup_push(0, 0); + pthread_create(0,0,0,0); pthread_cleanup_pop(0); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ok=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +fi +rm -f conftest.$ac_objext conftest$ac_exeext conftest.$ac_ext + + if test "x$ok" = xyes; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + fi + + # + # Linux gcc on some architectures such as mips/mipsel forgets + # about -lpthread + # + if test x"$ok" = xno; then + echo "$as_me:$LINENO: checking whether -lpthread fixes that" >&5 +echo $ECHO_N "checking whether -lpthread fixes that... $ECHO_C" >&6 + LIBS="-lpthread $PTHREAD_LIBS $save_LIBS" + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +int +main () +{ +pthread_t th; pthread_join(th, 0); + pthread_attr_init(0); pthread_cleanup_push(0, 0); + pthread_create(0,0,0,0); pthread_cleanup_pop(0); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ok=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +fi +rm -f conftest.$ac_objext conftest$ac_exeext conftest.$ac_ext + + if test "x$ok" = xyes; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + PTHREAD_LIBS="-lpthread $PTHREAD_LIBS" + else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + fi + fi + # + # FreeBSD 4.10 gcc forgets to use -lc_r instead of -lc + # + if test x"$ok" = xno; then + echo "$as_me:$LINENO: checking whether -lc_r fixes that" >&5 +echo $ECHO_N "checking whether -lc_r fixes that... $ECHO_C" >&6 + LIBS="-lc_r $PTHREAD_LIBS $save_LIBS" + cat >conftest.$ac_ext <<_ACEOF +#line $LINENO "configure" +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +int +main () +{ +pthread_t th; pthread_join(th, 0); + pthread_attr_init(0); pthread_cleanup_push(0, 0); + pthread_create(0,0,0,0); pthread_cleanup_pop(0); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ok=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +fi +rm -f conftest.$ac_objext conftest$ac_exeext conftest.$ac_ext + + if test "x$ok" = xyes; then + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + PTHREAD_LIBS="-lc_r $PTHREAD_LIBS" + else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + fi + fi + if test x"$ok" = xno; then + # OK, we have run out of ideas + { echo "$as_me:$LINENO: WARNING: Impossible to determine how to use pthreads with shared libraries" >&5 +echo "$as_me: WARNING: Impossible to determine how to use pthreads with shared libraries" >&2;} + + # so it's not safe to assume that we may use pthreads + acx_pthread_ok=no + fi + + CFLAGS="$save_CFLAGS" + LIBS="$save_LIBS" + CC="$save_CC" + fi else PTHREAD_CC="$CC" fi @@ -22393,7 +22604,7 @@ _ASBOX } >&5 cat >&5 <<_CSEOF -This file was extended by google-perftools $as_me 0.7, which was +This file was extended by google-perftools $as_me 0.8, which was generated by GNU Autoconf 2.57. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -22456,7 +22667,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF ac_cs_version="\\ -google-perftools config.status 0.7 +google-perftools config.status 0.8 configured by $0, generated by GNU Autoconf 2.57, with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\" diff --git a/configure.ac b/configure.ac index 2e1ab8b..f72e687 100644 --- a/configure.ac +++ b/configure.ac @@ -5,7 +5,7 @@ # make sure we're interpreted by some minimal autoconf AC_PREREQ(2.57) -AC_INIT(google-perftools, 0.7, opensource@google.com) +AC_INIT(google-perftools, 0.8, opensource@google.com) # The argument here is just something that should be in the current directory # (for sanity checking) AC_CONFIG_SRCDIR(README) diff --git a/doc/cpu_profiler.html b/doc/cpu_profiler.html index bc18940..ff98321 100644 --- a/doc/cpu_profiler.html +++ b/doc/cpu_profiler.html @@ -109,6 +109,24 @@ detail below.

annotated with the flat and cumulative sample counts at each PC value. +

Analyzing Text Output

+ +

Text mode has lines of output that look like this:

+
+       14   2.1%  17.2%       58   8.7% std::_Rb_tree::find
+
+ +

Here is how to interpret the columns:

+
    +
  1. Number of profiling samples in this function +
  2. Percentage of profiling samples in this function +
  3. Percentage of profiling samples in the functions printed so far +
  4. Number of profiling samples in this function and its callees +
  5. Percentage of profiling samples in this function and its callees +
  6. Function name +
+ +

Node Information

In the various graphical modes of pprof, the output is a call graph diff --git a/doc/pprof_remote_servers.html b/doc/pprof_remote_servers.html new file mode 100644 index 0000000..b93ccd3 --- /dev/null +++ b/doc/pprof_remote_servers.html @@ -0,0 +1,190 @@ + + + +pprof and Remote Servers + + + + +

pprof and Remote Servers

+ +

In mid-2006, we added an experimental facility to pprof, the tool that analyzes CPU and +heap profiles. This facility allows you to collect profile +information from running applications. It makes it easy to collect +profile information without having to stop the program first, and +without having to log into the machine where the application is +running. This is meant to be used on webservers, but will work on any +application that can be modified to accept TCP connections on a port +of its choosing, and to respond to HTTP requests on that port.

+ +

We do not currently have infrastructure, such as apache modules, +that you can pop into a webserver or other application to get the +necessary functionality "for free." However, it's easy to generate +the necessary data, which should allow the interested developer to add +the necessary support into his or her applications.

+ +

To use pprof in this experimental "server" mode, you +give the script a host and port it should query, replacing the normal +commandline arguments of application + profile file:

+
+   % pprof internalweb.mycompany.com:80
+
+ +

The host must be listening on that port, and be able to accept HTTP/1.0 +requests -- sent via wget and curl -- for +several urls. The following sections list the urls that +pprof can send, and the responses it expects in +return.

+ + +
  • /pprof/heap + +

    pprof asks for the url /pprof/heap to +get heap information. The actual url is controlled via the variable +HEAP_PAGE in the pprof script, so you +can change it if you'd like.

    + +

    The server should respond by calling

    +
    +    MallocExtension::instance()->GetHeapSample(&output);
    +
    +

    and sending output back as an HTTP response to +pprof. MallocExtension is defined in the +header file google/malloc_extension.h.

    + +

    Here's an example, from an actual Google webserver, of what the +output should look like:

    +
    +heap profile:   9369: 126987529 [  9369: 126987529] @ heap
    +     2:     1024 [     2:     1024] @ 0x87da913 0x8923ad4 0x891d4c2 0x892de12 0x8930519 0x83a16c2 0x836cb38 0x834cd1c 0x8349ba5 0x10a3177 0x8349961
    +     1:       36 [     1:       36] @ 0x87da913 0x83a0929 0x836cb38 0x834cd1c 0x8349ba5 0x10a3177 0x8349961
    +   308: 10092544 [   308: 10092544] @ 0x87da913 0x8970d66 0x8970e64 0x896e8e2 0x88e69d2 0x88e6add 0x88e6dec 0x88e7384 0x88e73fa 0x8838793 0x8838b36 0x88395f8 0x88f5a4b 0x890d03a 0x890d65a 0x8917666 0x890d1f3 0x890e6e4 0x8349c1b 0x10a3177 0x8349961
    +[...]
    +
    + + +
  • /pprof/growth + +

    pprof asks for the url /pprof/growth to +get heap-profiling delta (growth) information. The actual url is +controlled via the variable GROWTH_PAGE in the +pprof script, so you can change it if you'd like.

    + +

    The server should respond by calling

    +
    +    MallocExtension::instance()->GetHeapGrowthStacks(&output);
    +
    +

    and sending output back as an HTTP response to +pprof. MallocExtension is defined in the +header file google/malloc_extension.h.

    + +

    Here's an example, from an actual Google webserver, of what the +output should look like:

    +
    +heap profile:    741: 812122112 [   741: 812122112] @ growth
    +     1:  1572864 [     1:  1572864] @ 0x87da564 0x87db8a3 0x84787a4 0x846e851 0x836d12f 0x834cd1c 0x8349ba5 0x10a3177 0x8349961
    +     1:  1048576 [     1:  1048576] @ 0x87d92e8 0x87d9213 0x87d9178 0x87d94d3 0x87da9da 0x8a364ff 0x8a437e7 0x8ab7d23 0x8ab7da9 0x8ac7454 0x8348465 0x10a3161 0x8349961
    +[...]
    +
    + + +
  • /pprof/profile + +

    pprof asks for the url +/pprof/profile?seconds=XX to get cpu-profiling +information. The actual url is controlled via the variable +PROFILE_PAGE in the pprof script, so you can +change it if you'd like.

    + +

    The server should respond by calling +ProfilerStart(filename), continuing to do its work, and +then, XX seconds later, calling ProfilerStop(). (These +functions are declared in google/profiler.h.) The +application is responsible for picking a unique filename for +ProfilerStart(). After calling +ProfilerStop(), the server should read the contents of +filename and send them back as an HTTP response to +pprof.

    + +

    Obviously, to get useful profile information the application must +continue to run in the XX seconds that the profiler is running. Thus, +the profile start-stop calls should be done in a separate thread, or +be otherwise non-blocking.

    + +

    The profiler output file is binary, but near the end of it, it +should have lines of text somewhat like this:

    +
    +01016000-01017000 rw-p 00015000 03:01 59314      /lib/ld-2.2.2.so
    +
    + + +
  • /pprof/contention + +

    This is intended to be able to profile (thread) lock contention in +addition to CPU and memory use. It's not yet usable.

    + + +
  • /pprof/cmdline + +

    pprof asks for the url /pprof/cmdline to +figure out what application it's profiling. The actual url is +controlled via the variable PROGRAM_NAME_PAGE in the +pprof script, so you can change it if you'd like.

    + +

    The server should respond by reading the contents of +/proc/self/cmdline, converting all internal NUL (\0) +characters to newlines, and sending the result back as an HTTP +response to pprof.

    + +

    Here's an example return value:

    +

    +/root/server/custom_webserver
    +80
    +--configfile=/root/server/ws.config
    +
    + + +
  • /pprof/symbol + +

    pprof asks for the url /pprof/symbol to +map from hex addresses to variable names. The actual url is +controlled via the variable SYMBOL_PAGE in the +pprof script, so you can change it if you'd like.

    + +

    This is perhaps the hardest request to write code for, because +it must accept POST requests. This means that after the HTTP headers, +pprof will pass in a list of hex addresses connected by ++, like so:

    +
    +   curl -d '0x0824d061+0x0824d1cf' http://remote_host:80/pprof/symbol
    +
    + +

    The server should read the POST data, which will be in one line, +and for each hex value, should write one line of output to the output +stream, like so:

    +
    +<hex address><tab><function name>
    +
    +

    For instance:

    +
    +0x08b2dabd    _Update
    +
    + +

    The other reason this is the most difficult request to implement, +is that the application will have to figure out for itself how to map +from address to function name. One possibility is to run nm -C +-n <program name> to get the mappings, either statically +(say at program-compile time), or dynamically, by having the +application call out to nm for every +pprof/symbol call (presumably with some caching!).

    + +

    pprof itself does just this for local profiles (not +ones that talk to remote servers); look at the subroutine +GetProcedureBoundaries.

    + + +
    +Last modified: Mon Jun 12 21:30:14 PDT 2006 + + diff --git a/src/base/linux_syscall_support.h b/src/base/linux_syscall_support.h index 319455e..0dfdd8d 100644 --- a/src/base/linux_syscall_support.h +++ b/src/base/linux_syscall_support.h @@ -45,6 +45,14 @@ #if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__)) && \ defined(__linux) +#ifdef __cplusplus +/* Some system header files in older versions of gcc neglect to properly + * handle being included from C++. As it appears to be harmless to have + * multiple nested 'extern "C"' blocks, just add another one here. + */ +extern "C" { +#endif + #include #include #include @@ -79,35 +87,47 @@ #if defined(__i386__) #ifndef __NR_getdents64 -#define __NR_getdents64 220 +#define __NR_getdents64 220 #endif #ifndef __NR_gettid -#define __NR_gettid 224 +#define __NR_gettid 224 #endif #ifndef __NR_futex -#define __NR_futex 240 +#define __NR_futex 240 +#endif +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity 241 +#define __NR_sched_getaffinity 242 #endif /* End of i386 definitions */ #elif defined(__ARM_ARCH_3__) #ifndef __NR_getdents64 -#define __NR_getdents64 217 +#define __NR_getdents64 (__NR_SYSCALL_BASE + 217) #endif #ifndef __NR_gettid -#define __NR_gettid 224 +#define __NR_gettid (__NR_SYSCALL_BASE + 224) #endif #ifndef __NR_futex -#define __NR_futex 240 +#define __NR_futex (__NR_SYSCALL_BASE + 240) +#endif +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity (__NR_SYSCALL_BASE + 241) +#define __NR_sched_getaffinity (__NR_SYSCALL_BASE + 242) #endif /* End of ARM 3 definitions */ #elif defined(__x86_64__) #ifndef __NR_getdents64 -#define __NR_getdents64 217 +#define __NR_getdents64 217 #endif #ifndef __NR_gettid -#define __NR_gettid 186 +#define __NR_gettid 186 #endif #ifndef __NR_futex -#define __NR_futex 202 +#define __NR_futex 202 +#endif +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity 203 +#define __NR_sched_getaffinity 204 #endif /* End of x86-64 definitions */ #endif @@ -306,9 +326,11 @@ struct dirent64; #endif #if defined(__x86_64__) struct msghdr; + struct sockaddr; #define __NR_sys_mmap __NR_mmap #define __NR_sys_recvmsg __NR_recvmsg #define __NR_sys_sendmsg __NR_sendmsg + #define __NR_sys_sendto __NR_sendto #define __NR_sys_shutdown __NR_shutdown #define __NR_sys_rt_sigaction __NR_rt_sigaction #define __NR_sys_rt_sigprocmask __NR_rt_sigprocmask @@ -322,6 +344,10 @@ struct dirent64; struct msghdr*, m, int, f); static inline _syscall3(int, sys_sendmsg, int, s, const struct msghdr*, m, int, f); + static inline _syscall6(int, sys_sendto, int, s, + const void*, m, size_t, l, + int, f, + const struct sockaddr*, a, int, t); static inline _syscall2(int, sys_shutdown, int, s, int, h); static inline _syscall4(int, sys_rt_sigaction, int, s, @@ -378,6 +404,8 @@ struct dirent64; } #define sys_recvmsg(s,m,f) sys_socketcall(17, (s), (m), (f)) #define sys_sendmsg(s,m,f) sys_socketcall(16, (s), (m), (f)) + #define sys_sendto(s,m,l,f,a,t) sys_socketcall(11, (s), (m), (l),(f),\ + (a), (t)) #define sys_shutdown(s,h) sys_socketcall(13, (s), (h)) #define sys_socket(d,t,p) sys_socketcall(1, (d), (t), (p)) #define sys_socketpair(d,t,p,s) sys_socketcall(8, (d), (t), (p),(s)) @@ -387,39 +415,41 @@ struct dirent64; static inline _syscall3(pid_t, sys_waitpid, pid_t, p, int*, s, int, o); #endif - #define __NR_sys_close __NR_close - #define __NR_sys_dup __NR_dup - #define __NR_sys_dup2 __NR_dup2 - #define __NR_sys_execve __NR_execve - #define __NR_sys__exit __NR_exit - #define __NR_sys_fcntl __NR_fcntl - #define __NR_sys_fork __NR_fork - #define __NR_sys_fstat __NR_fstat - #define __NR_sys_getdents __NR_getdents - #define __NR_sys_getdents64 __NR_getdents64 - #define __NR_sys_getegid __NR_getegid - #define __NR_sys_geteuid __NR_geteuid - #define __NR_sys_getpgrp __NR_getpgrp - #define __NR_sys_getpid __NR_getpid - #define __NR_sys_getppid __NR_getppid - #define __NR_sys_getpriority __NR_getpriority - #define __NR_sys_getrlimit __NR_getrlimit - #define __NR_sys_getsid __NR_getsid - #define __NR__gettid __NR_gettid - #define __NR_sys_kill __NR_kill - #define __NR_sys_lseek __NR_lseek - #define __NR_sys_munmap __NR_munmap - #define __NR_sys_open __NR_open - #define __NR_sys_pipe __NR_pipe - #define __NR_sys_prctl __NR_prctl - #define __NR_sys_ptrace __NR_ptrace - #define __NR_sys_read __NR_read - #define __NR_sys_readlink __NR_readlink - #define __NR_sys_sched_yield __NR_sched_yield - #define __NR_sys_sigaltstack __NR_sigaltstack - #define __NR_sys_stat __NR_stat - #define __NR_sys_write __NR_write - #define __NR_sys_futex __NR_futex + #define __NR_sys_close __NR_close + #define __NR_sys_dup __NR_dup + #define __NR_sys_dup2 __NR_dup2 + #define __NR_sys_execve __NR_execve + #define __NR_sys__exit __NR_exit + #define __NR_sys_fcntl __NR_fcntl + #define __NR_sys_fork __NR_fork + #define __NR_sys_fstat __NR_fstat + #define __NR_sys_futex __NR_futex + #define __NR_sys_getdents __NR_getdents + #define __NR_sys_getdents64 __NR_getdents64 + #define __NR_sys_getegid __NR_getegid + #define __NR_sys_geteuid __NR_geteuid + #define __NR_sys_getpgrp __NR_getpgrp + #define __NR_sys_getpid __NR_getpid + #define __NR_sys_getppid __NR_getppid + #define __NR_sys_getpriority __NR_getpriority + #define __NR_sys_getrlimit __NR_getrlimit + #define __NR_sys_getsid __NR_getsid + #define __NR__gettid __NR_gettid + #define __NR_sys_kill __NR_kill + #define __NR_sys_lseek __NR_lseek + #define __NR_sys_munmap __NR_munmap + #define __NR_sys_open __NR_open + #define __NR_sys_pipe __NR_pipe + #define __NR_sys_prctl __NR_prctl + #define __NR_sys_ptrace __NR_ptrace + #define __NR_sys_read __NR_read + #define __NR_sys_readlink __NR_readlink + #define __NR_sys_sched_getaffinity __NR_sched_getaffinity + #define __NR_sys_sched_setaffinity __NR_sched_setaffinity + #define __NR_sys_sched_yield __NR_sched_yield + #define __NR_sys_sigaltstack __NR_sigaltstack + #define __NR_sys_stat __NR_stat + #define __NR_sys_write __NR_write static inline _syscall1(int, sys_close, int, f); static inline _syscall1(int, sys_dup, int, f); static inline _syscall2(int, sys_dup2, int, s, @@ -432,6 +462,8 @@ struct dirent64; static inline _syscall0(pid_t, sys_fork); static inline _syscall2(int, sys_fstat, int, f, struct stat*, b); + static inline _syscall4(int, sys_futex, int*, addrx, int, opx, int, valx, + struct timespec *, timeoutx); static inline _syscall3(int, sys_getdents, int, f, struct dirent*, d, int, c); static inline _syscall3(int, sys_getdents64, int, f, @@ -464,6 +496,10 @@ struct dirent64; void *, b, size_t, c); static inline _syscall3(int, sys_readlink, const char*, p, char*, b, size_t, s); + static inline _syscall3(int, sys_sched_getaffinity, pid_t, pid, + unsigned int, len, unsigned long *, mask); + static inline _syscall3(int, sys_sched_setaffinity, pid_t, pid, + unsigned int, len, unsigned long *, mask); static inline _syscall0(int, sys_sched_yield); static inline _syscall2(int, sys_sigaltstack, const stack_t*, s, const stack_t*, o); @@ -471,8 +507,6 @@ struct dirent64; struct stat*, b); static inline _syscall3(ssize_t, sys_write, int, f, const void *, b, size_t, c); - static inline _syscall4(int, sys_futex, int*, addrx, int, opx, int, valx, - struct timespec *, timeoutx); static inline int sys_sysconf(int name) { extern int __getpagesize(void); @@ -517,6 +551,9 @@ struct dirent64; #undef RETURN #endif +#ifdef __cplusplus +} +#endif #endif #endif diff --git a/src/base/linuxthreads.c b/src/base/linuxthreads.c index e721582..3696987 100644 --- a/src/base/linuxthreads.c +++ b/src/base/linuxthreads.c @@ -51,6 +51,10 @@ #include "base/linux_syscall_support.h" #include "base/thread_lister.h" +#ifndef CLONE_UNTRACED +#define CLONE_UNTRACED 0x00800000 +#endif + /* itoa() is not a standard function, and we cannot safely call printf() * after suspending threads. So, we just implement our own copy. A @@ -97,8 +101,19 @@ static int local_clone (int (*fn)(void *), void *arg, ...) { * Leave 4kB of gap between the callers stack and the new clone. This * should be more than sufficient for the caller to call waitpid() until * the cloned thread terminates. + * + * It is important that we set the CLONE_UNTRACED flag, because newer + * versions of "gdb" otherwise attempt to attach to our thread, and will + * attempt to reap its status codes. This subsequently results in the + * caller hanging indefinitely in waitpid(), waiting for a change in + * status that will never happen. By setting the CLONE_UNTRACED flag, we + * prevent "gdb" from stealing events, but we still expect the thread + * lister to fail, because it cannot PTRACE_ATTACH to the process that + * is being debugged. This is OK and the error code will be reported + * correctly. */ - return clone(fn, (char *)&arg - 4096, CLONE_VM|CLONE_FS|CLONE_FILES, arg); + return clone(fn, (char *)&arg - 4096, + CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_UNTRACED, arg); } @@ -209,7 +224,8 @@ struct ListerParams { static void ListerThread(struct ListerParams *args) { static const int signals[] = { SIGABRT, SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGXCPU, SIGXFSZ }; - pid_t clone_pid = sys_gettid(); + int found_parent = 0; + pid_t clone_pid = sys_gettid(), ppid = sys_getppid(); char proc_self_task[80], marker_name[48], *marker_path; const char *proc_paths[3]; const char *const *proc_path = proc_paths; @@ -239,8 +255,7 @@ static void ListerThread(struct ListerParams *args) { } /* Compute search paths for finding thread directories in /proc */ - local_itoa(strrchr(strcpy(proc_self_task, "/proc/"), '\000'), - sys_getppid()); + local_itoa(strrchr(strcpy(proc_self_task, "/proc/"), '\000'), ppid); marker_path = strrchr(strcpy(marker_name, proc_self_task), '\000'); strcat(proc_self_task, "/task/"); proc_paths[0] = proc_self_task; /* /proc/$$/task/ */ @@ -417,6 +432,7 @@ static void ListerThread(struct ListerParams *args) { num_threads--; sig_num_threads = num_threads; } else { + found_parent |= pid == ppid; added_entries++; } } @@ -435,6 +451,16 @@ static void ListerThread(struct ListerParams *args) { NO_INTR(sys_close(marker)); sig_marker = marker = -1; + /* If we never found the parent process, something is very wrong. + * Most likely, we are running in debugger. Any attempt to operate + * on the threads would be very incomplete. Let's just report an + * error to the caller. + */ + if (!found_parent) { + ResumeAllProcessThreads(num_threads, pids); + sys__exit(3); + } + /* Now we are ready to call the callback, * which takes care of resuming the threads for us. */ @@ -530,6 +556,9 @@ int ListAllProcessThreads(void *parameter, case 2: args.err = EFAULT; /* Some fault (e.g. SIGSEGV) detected */ args.result = -1; break; + case 3: args.err = EPERM; /* Process is already being traced */ + args.result = -1; + break; default:args.err = ECHILD; /* Child died unexpectedly */ args.result = -1; break; diff --git a/src/base/thread_lister.c b/src/base/thread_lister.c index 6def758..f3df16b 100644 --- a/src/base/thread_lister.c +++ b/src/base/thread_lister.c @@ -31,7 +31,8 @@ * Author: Markus Gutschke */ -#include // needed for NULL on some powerpc platforms (?!) +#include /* needed for NULL on some powerpc platforms (?!) */ +#include #include "base/thread_lister.h" #include "base/linuxthreads.h" /* Include other thread listers here that define THREADS macro @@ -46,16 +47,23 @@ int ListAllProcessThreads(void *parameter, ListAllProcessThreadsCallBack callback, ...) { - int rc; + int rc; va_list ap; + int dumpable = prctl(PR_GET_DUMPABLE, 0); + if (!dumpable) + prctl(PR_SET_DUMPABLE, 1); va_start(ap, callback); - rc = callback(parameter, 0, NULL, ap); + pid_t pid = getpid(); + rc = callback(parameter, 1, &pid, ap); va_end(ap); + if (!dumpable) + prctl(PR_SET_DUMPABLE, 0); return rc; } -void ResumeAllProcessThreads(int num_threads, pid_t *thread_pids) { +int ResumeAllProcessThreads(int num_threads, pid_t *thread_pids) { + return 1; } #endif diff --git a/src/google/heap-checker.h b/src/google/heap-checker.h index 66d23de..f888ae0 100644 --- a/src/google/heap-checker.h +++ b/src/google/heap-checker.h @@ -255,6 +255,19 @@ class HeapCleaner { }; class HeapLeakChecker { + public: // Static functions for working with (whole-program) leak checking. + + // If heap leak checking is currently active in some mode + // e.g. if leak checking was started (and is still active now) + // due to any valid non-empty --heap_check flag value + // (including "local") on the command-line + // or via a dependency on //base:heapcheck. + // The return value reflects iff HeapLeakChecker objects manually + // constructed right now will be doing leak checking or nothing. + // Note that we can go from active to inactive state during InitGoogle() + // if FLAGS_heap_check gets set to "" by some code before/during InitGoogle(). + static bool IsActive(); + public: // Non-static functions for starting and doing leak checking. // Start checking and name the leak check performed. diff --git a/src/heap-checker.cc b/src/heap-checker.cc index dc9c46d..4e8e2dc 100644 --- a/src/heap-checker.cc +++ b/src/heap-checker.cc @@ -468,6 +468,18 @@ static bool RecordGlobalDataLocked(uint64 start_address, if (inode == 0) return true; + // Sometimes people mmap their own files read-write. That would cause + // the strict ELF checker later to reject them. We do not want to loosen + // up the ELF checker, because we need to catch freaky files if they + // show up. So, make an exception for common files that we have seen. + // + // TODO(mec): the longer this gets, the more attractive it is to + // check for the ELF header and just accept all non-ELF files. + if (inode != 0) { + if (filename && strcmp(filename, "/dev/zero") == 0) + return true; + } + // Grab some ELF types. #ifdef _LP64 typedef Elf64_Ehdr ElfFileHeader; @@ -692,8 +704,15 @@ HeapLeakChecker::UseProcMaps(ProcMapsTask proc_maps_task) { "Looking at /proc/self/maps line:\n %s\n", proc_map_line); - if (start_address >= end_address) - abort(); + if (start_address >= end_address) { + // Crash if a line we can be interested in is ill-formed: + if (inode != 0) abort(); + // Skip other ill-formed lines: some are possible + // probably due to the interplay of how /proc/self/maps is updated + // while we read it in chunks in ProcMapsIterator and + // do things in this loop. + continue; + } // Determine if any shared libraries are present. if (inode != 0 && strstr(filename, "lib") && strstr(filename, ".so")) { @@ -738,6 +757,14 @@ static int64 live_bytes_total = 0; // (protected by our lock; IgnoreAllLiveObjectsLocked sets it) static pid_t self_thread_pid = 0; +// Status of our thread listing callback execution +// (protected by our lock; used from within IgnoreAllLiveObjectsLocked) +static enum { + CALLBACK_NOT_STARTED, + CALLBACK_STARTED, + CALLBACK_COMPLETED, +} thread_listing_status = CALLBACK_NOT_STARTED; + // Ideally to avoid deadlocks this function should not result in any libc // or other function calls that might need to lock a mutex: // It is called when all threads of a process are stopped @@ -774,6 +801,7 @@ int HeapLeakChecker::IgnoreLiveThreads(void* parameter, int num_threads, pid_t* thread_pids, va_list ap) { + thread_listing_status = CALLBACK_STARTED; if (HeapProfiler::kMaxLogging) { HeapProfiler::MESSAGE(2, "HeapChecker: Found %d threads (from pid %d)\n", num_threads, getpid()); @@ -838,6 +866,7 @@ int HeapLeakChecker::IgnoreLiveThreads(void* parameter, IgnoreNonThreadLiveObjectsLocked(); // Can now resume the threads: ResumeAllProcessThreads(num_threads, thread_pids); + thread_listing_status = CALLBACK_COMPLETED; return failures; } @@ -928,7 +957,8 @@ IgnoreAllLiveObjectsLocked(const StackExtent& self_stack) { UseProcMaps(RECORD_GLOBAL_DATA_LOCKED); } // Ignore all thread stacks: - bool executed_with_threads_stopped = false; + thread_listing_status = CALLBACK_NOT_STARTED; + bool need_to_ignore_non_thread_objects = true; self_thread_pid = getpid(); self_thread_stack = self_stack; if (FLAGS_heap_check_ignore_thread_live) { @@ -939,10 +969,22 @@ IgnoreAllLiveObjectsLocked(const StackExtent& self_stack) { // if not suspended they could still mess with the pointer // graph while we walk it). int r = ListAllProcessThreads(NULL, IgnoreLiveThreads); - executed_with_threads_stopped = (r >= 0); - if (r == -1) { - HeapProfiler::MESSAGE(0, "HeapChecker: Could not find thread stacks; " - "may get false leak reports\n"); + need_to_ignore_non_thread_objects = r < 0; + if (r < 0) { + HeapProfiler::MESSAGE(0, "HeapChecker: thread finding failed " + "with %d errno=%d\n", r, errno); + if (thread_listing_status == CALLBACK_COMPLETED) { + HeapProfiler::MESSAGE(0, "HeapChecker: thread finding callback " + "finished ok; hopefully everything is fine\n"); + need_to_ignore_non_thread_objects = false; + } else if (thread_listing_status == CALLBACK_STARTED) { + HeapProfiler::MESSAGE(0, "HeapChecker: thread finding callback was " + "interrupted or crashed; can't fix this\n"); + abort(); + } else { // CALLBACK_NOT_STARTED + HeapProfiler::MESSAGE(0, "HeapChecker: Could not find thread stacks; " + "may get false leak reports\n"); + } } else if (r != 0) { HeapProfiler::MESSAGE(0, "HeapChecker: Thread stacks not found " "for %d threads; may get false leak reports\n", @@ -960,7 +1002,7 @@ IgnoreAllLiveObjectsLocked(const StackExtent& self_stack) { } // Do all other live data ignoring here if we did not do it // within thread listing callback with all threads stopped. - if (!executed_with_threads_stopped) IgnoreNonThreadLiveObjectsLocked(); + if (need_to_ignore_non_thread_objects) IgnoreNonThreadLiveObjectsLocked(); if (live_objects_total) { HeapProfiler::MESSAGE(0, "HeapChecker: " "Ignoring "LLD" reachable " @@ -1349,10 +1391,13 @@ bool HeapLeakChecker::DoNoLeaks(bool same_heap, (same_heap ? (inuse_bytes_increase_ != 0 || inuse_allocs_increase_ != 0) : (inuse_bytes_increase_ > 0 || inuse_allocs_increase_ > 0)); if (see_leaks || do_full) { + bool pprof_can_ignore = false; + const char* command_tail = " --text 2>/dev/null"; // normal command const char* gv_command_tail = " --edgefraction=1e-10 --nodefraction=1e-10 --gv 2>/dev/null"; string ignore_re; if (disabled_regexp) { + pprof_can_ignore = true; ignore_re += " --ignore='^"; ignore_re += *disabled_regexp; ignore_re += "$'"; @@ -1361,22 +1406,29 @@ bool HeapLeakChecker::DoNoLeaks(bool same_heap, // some STLs can give us spurious leak alerts (since the STL tries to // do its own memory pooling), so we avoid it by using STL as little // as possible for "big" objects that might require "lots" of memory. - char command[6 * PATH_MAX + 200]; + char base_command[6 * PATH_MAX + 200]; + char beg_profile[PATH_MAX+1], end_profile[PATH_MAX+1]; if (use_initial_profile) { + snprintf(beg_profile, sizeof(beg_profile), "%s.%s-beg.heap", + profile_prefix->c_str(), name_); // compare against initial profile only if need to const char* drop_negative = same_heap ? "" : " --drop_negative"; - snprintf(command, sizeof(command), "%s --base=\"%s.%s-beg.heap\" %s ", - pprof_path(), profile_prefix->c_str(), name_, - drop_negative); + snprintf(base_command, sizeof(base_command), + "%s --base=\"%s\" %s ", + pprof_path(), beg_profile, drop_negative); } else { - snprintf(command, sizeof(command), "%s", + beg_profile[0] = '\0'; + snprintf(base_command, sizeof(base_command), "%s", pprof_path()); } - snprintf(command + strlen(command), sizeof(command) - strlen(command), - " %s \"%s.%s-end.heap\" %s --inuse_objects --lines", - invocation_path(), profile_prefix->c_str(), - name_, ignore_re.c_str()); + snprintf(end_profile, sizeof(end_profile), "%s.%s-end.heap", + profile_prefix->c_str(), name_); + snprintf(base_command + strlen(base_command), + sizeof(base_command) - strlen(base_command), + " %s \"%s\" %s --inuse_objects --lines", + invocation_path(), end_profile, ignore_re.c_str()); // --lines is important here to catch leaks when !see_leaks + char cwd[PATH_MAX+1]; if (getcwd(cwd, sizeof(cwd)) != cwd) abort(); if (see_leaks) { @@ -1390,7 +1442,7 @@ bool HeapLeakChecker::DoNoLeaks(bool same_heap, "To investigate leaks manually use e.g.\n" "cd %s; " // for proper symbol resolution "%s%s\n\n", - cwd, command, gv_command_tail); + cwd, base_command, gv_command_tail); } string output; int checked_leaks = 0; @@ -1403,14 +1455,18 @@ bool HeapLeakChecker::DoNoLeaks(bool same_heap, } else { // We don't care about pprof's stderr as long as it // succeeds with empty report: - checked_leaks = GetStatusOutput(command, &output); + char full_command[6 * PATH_MAX + 200]; // needed to concatenate + snprintf(full_command, sizeof(full_command), "%s%s", + base_command, command_tail); + checked_leaks = GetStatusOutput(full_command, &output); if (checked_leaks != 0) { HeapProfiler::MESSAGE(-1, "ERROR: Could not run pprof at %s\n", pprof_path()); abort(); } } - if (see_leaks && output.empty() && checked_leaks == 0) { + if (see_leaks && pprof_can_ignore && + output.empty() && checked_leaks == 0) { HeapProfiler::MESSAGE(-1, "HeapChecker: " "These must be leaks that we disabled" " (pprof succeeded)! This check WILL FAIL" @@ -1420,7 +1476,24 @@ bool HeapLeakChecker::DoNoLeaks(bool same_heap, // do not fail the check just due to us being a stripped binary if (!see_leaks && strstr(output.c_str(), "nm: ") != NULL && strstr(output.c_str(), ": no symbols") != NULL) output.resize(0); - if (!(see_leaks || checked_leaks == 0)) abort(); + } + // Make sure the profiles we created are still there. + // They can get deleted e.g. if the program forks/executes itself + // and FLAGS_cleanup_old_heap_profiles was kept as true. + if (access(end_profile, R_OK) != 0 || + (beg_profile[0] && access(beg_profile, R_OK) != 0)) { + HeapProfiler::MESSAGE(-1, "HeapChecker: " + "One of the heap profiles is gone: %s %s\n", + beg_profile, end_profile); + abort(); + } + if (!(see_leaks || checked_leaks == 0)) { + // Crash if something went wrong with executing pprof + // and we rely on pprof to do its work: + HeapProfiler::MESSAGE(-1, "HeapChecker: " + "pprof command failed: %s%s\n", + base_command, command_tail); + abort(); } if (see_leaks && use_initial_profile) { HeapProfiler::MESSAGE(-1, "HeapChecker: " @@ -1438,7 +1511,7 @@ bool HeapLeakChecker::DoNoLeaks(bool same_heap, "To investigate leaks manually uge e.g.\n" "cd %s; " // for proper symbol resolution "%s%s\n\n", - name_, cwd, command, gv_command_tail); + name_, cwd, base_command, gv_command_tail); if (use_initial_profile) { HeapProfiler::MESSAGE(-1, "HeapChecker: " "CAVEAT: Some of the reported leaks might have " @@ -1491,6 +1564,10 @@ HeapLeakChecker::~HeapLeakChecker() { // HeapLeakChecker overall heap check components //---------------------------------------------------------------------- +bool HeapLeakChecker::IsActive() { + return heap_checker_on; +} + vector* HeapCleaner::heap_cleanups_ = NULL; // When a HeapCleaner object is intialized, add its function to the static list @@ -1653,7 +1730,7 @@ void HeapLeakChecker::DoMainHeapCheck() { HeapProfiler::MESSAGE(0, "HeapChecker: " "Checking for whole-program memory leaks\n"); if (!main_heap_checker->DoNoLeaks(same_heap, do_full, do_report)) { - HeapProfiler::MESSAGE(-1, "ERROR: Leaks found in main heap check, aborting\n"); + HeapProfiler::MESSAGE(-1, "HeapChecker: crashing because of leaks\n"); abort(); } delete main_heap_checker; diff --git a/src/malloc_extension.cc b/src/malloc_extension.cc index 0260a34..686b4bc 100644 --- a/src/malloc_extension.cc +++ b/src/malloc_extension.cc @@ -166,6 +166,21 @@ struct StackTraceHash { } return h; } + // Less operator for MSVC's hash containers. + bool operator()(void** entry1, void** entry2) const { + if (Depth(entry1) != Depth(entry2)) + return Depth(entry1) < Depth(entry2); + for (int i = 0; i < Depth(entry1); i++) { + if (PC(entry1, i) != PC(entry2, i)) { + return PC(entry1, i) < PC(entry2, i); + } + } + return false; // entries are equal + } + // These two public members are required by msvc. 4 and 8 are the + // default values. + static const size_t bucket_size = 4; + static const size_t min_buckets = 8; }; struct StackTraceEqual { diff --git a/src/malloc_hook.cc b/src/malloc_hook.cc index 8499c73..613e612 100644 --- a/src/malloc_hook.cc +++ b/src/malloc_hook.cc @@ -115,7 +115,7 @@ extern "C" void* mmap64(void *start, size_t length, int fd, __off64_t offset) __THROW { void *result; - result = syscall(SYS_mmap, start, length, prot, flags, fd, offset); + result = (void *)syscall(SYS_mmap, start, length, prot, flags, fd, offset); MallocHook::InvokeMmapHook(result, start, length, prot, flags, fd, offset); return result; } diff --git a/src/pprof b/src/pprof index 5df1798..24b5b74 100755 --- a/src/pprof +++ b/src/pprof @@ -41,6 +41,9 @@ # Examples: # # % tools/pprof "program" "profile" +# Enters "interactive" mode +# +# % tools/pprof --text "program" "profile" # Generates one line per procedure # # % tools/pprof --gv "program" "profile" @@ -68,6 +71,8 @@ use strict; use Getopt::Long; +my $PPROF_VERSION = "0.8"; + # These are the object tools we use, which come from various sources. # We want to invoke them directly, rather than via users' aliases and/or # search paths, because some people have colorizing versions of them that @@ -79,9 +84,22 @@ my %obj_tool_map = ( "objdump" => "objdump", "nm" => "nm", "addr2line" => "addr2line", + "c++filt" => "c++filt", ); my $DOT = "dot"; # leave non-absolute, since it may be in /usr/local my $GV = "gv"; +# These are used for dynamic profiles +my $WGET = "wget"; +my $CURL = "curl"; + +# These are the web pages that servers need to support for dynamic profiles +my $HEAP_PAGE = "/pprof/heap"; +my $PROFILE_PAGE = "/pprof/profile"; # must support cgi-param "?seconds=#" +my $GROWTH_PAGE = "/pprof/growth"; +my $CONTENTION_PAGE = "/pprof/contention"; +my $SYMBOL_PAGE = "/pprof/symbol"; # must support symbol lookup via POST +my $PROGRAM_NAME_PAGE = "/pprof/cmdline"; + # There is a pervasive dependency on the length (in hex characters, i.e., # nibbles) of an address, distinguishing between 32-bit and 64-bit profiles: @@ -90,23 +108,40 @@ my $address_length = 8; # Hope for 32-bit, reset if 64-bit detected. ##### Argument parsing ##### sub usage_string { - return <<'EOF'; -Usage: pprof [options] ... - Prints specified cpu- or heap-profile - + return < + is a space separated list of profile names. +pprof [options] + is a remote form. Symbols are obtained from host:port$SYMBOL_PAGE + + Each profile name can be: + /path/to/profile - a path to a profile file + host:port[/] - a location of a service to get profile from + + The / can be $HEAP_PAGE, $PROFILE_PAGE, $GROWTH_PAGE, or $CONTENTION_PAGE. + For instance: "pprof http://myserver.com:80$HEAP_PAGE". + If / is omitted, the service defaults to $PROFILE_PAGE (cpu profiling). + + For more help with querying remote servers, including how to add the + necessary server-side support code, see this filename (or one like it): + + /usr/doc/google-perftools-$PPROF_VERSION/pprof_remote_servers.html + Options: --cum Sort by cumulative data --base= Subtract from before display - --interactive Run in interactive mode (interactive "help" gives help) - + --interactive Run in interactive mode (interactive "help" gives help) [default] + --seconds= Length of time for dynamic profiles [default=30 secs] + Reporting Granularity: --addresses Report at address level --lines Report at source line level --functions Report at function level [default] --files Report at source file level - + Output type: - --text Generate text report [default] + --text Generate text report --gv Generate Postscript and display --list= Generate source listing of matching routines --disasm= Generate disassembly of matching routines @@ -114,7 +149,7 @@ Output type: --ps Generate Postcript to stdout --pdf Generate PDF to stdout --gif Generate GIF to stdout - + Heap-Profile Options: --inuse_space Display in-use (mega)bytes [default] --inuse_objects Display in-use objects @@ -122,7 +157,12 @@ Heap-Profile Options: --alloc_objects Display allocated objects --show_bytes Display space in bytes --drop_negative Ignore negative differences - + +Contention-profile options: + --total_delay Display total delay at each region [default] + --contentions Display number of delays at each region + --mean_delay Display mean delay at each region + Call-graph Options: --nodecount= Show at most so many nodes [default=80] --nodefraction= Hide nodes below *total [default=.005] @@ -130,7 +170,7 @@ Call-graph Options: --focus= Focus on nodes matching --ignore= Ignore nodes matching --scale= Set GV scaling [default=0] - + Miscellaneous: --tools= Prefix for object tool pathnames --test Run unit tests @@ -138,7 +178,7 @@ Miscellaneous: --version Version information Examples: - + pprof /bin/ls ls.prof Outputs one line per procedure pprof --gv /bin/ls ls.prof @@ -151,12 +191,14 @@ pprof --list=getdir /bin/ls ls.prof (Per-line) annotated source listing for getdir() pprof --disasm=getdir /bin/ls ls.prof (Per-PC) annotated disassembly for getdir() +pprof localhost:1234 + Outputs one line per procedure for localhost:1234 EOF } sub version_string { - return <<'EOF' -pprof (part of google-perftools 0.7) + return < \$main::opt_help, - "version!" => \$main::opt_version, - "cum!" => \$main::opt_cum, - "base=s" => \$main::opt_base, - "functions!" => \$main::opt_functions, - "lines!" => \$main::opt_lines, - "addresses!" => \$main::opt_addresses, - "files!" => \$main::opt_files, - "text!" => \$main::opt_text, - "list=s" => \$main::opt_list, - "disasm=s" => \$main::opt_disasm, - "gv!" => \$main::opt_gv, - "dot!" => \$main::opt_dot, - "ps!" => \$main::opt_ps, - "pdf!" => \$main::opt_pdf, - "gif!" => \$main::opt_gif, - "interactive!" => \$main::opt_interactive, - "nodecount=i" => \$main::opt_nodecount, - "nodefraction=f" => \$main::opt_nodefraction, - "edgefraction=f" => \$main::opt_edgefraction, - "focus=s" => \$main::opt_focus, - "ignore=s" => \$main::opt_ignore, - "scale=i" => \$main::opt_scale, - "inuse_space!" => \$main::opt_inuse_space, - "inuse_objects!" => \$main::opt_inuse_objects, - "alloc_space!" => \$main::opt_alloc_space, - "alloc_objects!" => \$main::opt_alloc_objects, - "show_bytes!" => \$main::opt_show_bytes, - "drop_negative!" => \$main::opt_drop_negative, - "tools=s" => \$main::opt_tools, - "test!" => \$main::opt_test, - "debug!" => \$main::opt_debug, - ) || usage("Invalid option(s)"); - -# Deal with the standard --help and --version -if ($main::opt_help) { - print usage_string(); - exit(0); -} - -if ($main::opt_version) { - print version_string(); - exit(0); -} - -# Disassembly/listing mode requires address-level info -if ($main::opt_disasm || $main::opt_list) { - $main::opt_functions = 0; + $main::opt_cum = 0; + $main::opt_base = ''; + $main::opt_addresses = 0; $main::opt_lines = 0; - $main::opt_addresses = 1; + $main::opt_functions = 0; $main::opt_files = 0; -} -# Check heap-profiling flags -if ($main::opt_inuse_space + - $main::opt_inuse_objects + - $main::opt_alloc_space + - $main::opt_alloc_objects > 1) { - usage("Specify at most on of --inuse/--alloc options"); -} + $main::opt_text = 0; + $main::opt_list = ""; + $main::opt_disasm = ""; + $main::opt_gv = 0; + $main::opt_dot = 0; + $main::opt_ps = 0; + $main::opt_pdf = 0; + $main::opt_gif = 0; -# Check output granularities -my $grains = - $main::opt_functions + - $main::opt_lines + - $main::opt_addresses + - $main::opt_files + - 0; -if ($grains > 1) { - usage("Only specify one output granularity option"); -} -if ($grains == 0) { - $main::opt_functions = 1; -} + $main::opt_nodecount = 80; + $main::opt_nodefraction = 0.005; + $main::opt_edgefraction = 0.001; + $main::opt_focus = ''; + $main::opt_ignore = ''; + $main::opt_scale = 0; + $main::opt_seconds = 30; -# Check output modes -my $modes = - $main::opt_text + - $main::opt_gv + - $main::opt_dot + - $main::opt_ps + - $main::opt_pdf + - $main::opt_gif + - 0; -if ($modes > 1) { - usage("Only specify one output mode"); -} -if ($modes == 0) { - $main::opt_text = 1; -} + $main::opt_inuse_space = 0; + $main::opt_inuse_objects = 0; + $main::opt_alloc_space = 0; + $main::opt_alloc_objects = 0; + $main::opt_show_bytes = 0; + $main::opt_drop_negative = 0; + $main::opt_interactive = 0; -if ($main::opt_test) { - RunUnitTests(); - # Should not return - exit(1); -} + $main::opt_total_delay = 0; + $main::opt_contentions = 0; + $main::opt_mean_delay = 0; -# Binary name and profile arguments list -$main::prog = ""; -@main::pfile_args = (); + $main::opt_tools = ""; + $main::opt_debug = 0; + $main::opt_test = 0; -$main::prog = shift || usage("Did not specify program"); -scalar(@ARGV) || usage("Did not specify profile file"); + # Are we using $SYMBOL_PAGE? + $main::use_symbol_page = 0; -# Parse profile file/location arguments -foreach my $farg (@ARGV) { - unshift(@main::pfile_args, $farg); -} -ConfigureObjTools($main::prog); + # Are we printing a heap profile? + $main::heap_profile = 0; -##### Main section ##### + # Are we printing a lock profile? + $main::lock_profile = 0; -# Setup tmp-file name and handler to clean it up -$main::tmpfile_sym = "/tmp/pprof$$.sym"; -$main::tmpfile_ps = "/tmp/pprof$$"; -$main::next_tmpfile = 0; -$main::collected_profile = undef; -@main::profile_files = (); -#$main::op_time = time(); -$SIG{'INT'} = \&sighandler; + GetOptions("help!" => \$main::opt_help, + "version!" => \$main::opt_version, + "cum!" => \$main::opt_cum, + "base=s" => \$main::opt_base, + "seconds=i" => \$main::opt_seconds, + "functions!" => \$main::opt_functions, + "lines!" => \$main::opt_lines, + "addresses!" => \$main::opt_addresses, + "files!" => \$main::opt_files, + "text!" => \$main::opt_text, + "list=s" => \$main::opt_list, + "disasm=s" => \$main::opt_disasm, + "gv!" => \$main::opt_gv, + "dot!" => \$main::opt_dot, + "ps!" => \$main::opt_ps, + "pdf!" => \$main::opt_pdf, + "gif!" => \$main::opt_gif, + "interactive!" => \$main::opt_interactive, + "nodecount=i" => \$main::opt_nodecount, + "nodefraction=f" => \$main::opt_nodefraction, + "edgefraction=f" => \$main::opt_edgefraction, + "focus=s" => \$main::opt_focus, + "ignore=s" => \$main::opt_ignore, + "scale=i" => \$main::opt_scale, + "inuse_space!" => \$main::opt_inuse_space, + "inuse_objects!" => \$main::opt_inuse_objects, + "alloc_space!" => \$main::opt_alloc_space, + "alloc_objects!" => \$main::opt_alloc_objects, + "show_bytes!" => \$main::opt_show_bytes, + "drop_negative!" => \$main::opt_drop_negative, + "total_delay!" => \$main::opt_total_delay, + "contentions!" => \$main::opt_contentions, + "mean_delay!" => \$main::opt_mean_delay, + "tools=s" => \$main::opt_tools, + "test!" => \$main::opt_test, + "debug!" => \$main::opt_debug, + ) || usage("Invalid option(s)"); -# Fetch all profile data -FetchDynamicProfiles(); - -# Read one profile, pick the last item on the list -my $data = ReadProfile($main::prog, pop(@main::profile_files)); -my $profile = $data->{profile}; -my $libs = $data->{libs}; # Info about main program and shared libraries - -# List of function names to skip -$main::skip = (); -$main::skip_regexp = 'NOMATCH'; -if ($main::heap_profile) { - foreach my $name ('calloc', - 'cfree', - 'malloc', - 'free', - 'memalign', - 'pvalloc', - 'valloc', - 'realloc', - 'do_malloc', - 'DoSampledAllocation', - '__builtin_delete', - '__builtin_new', - '__builtin_vec_delete', - '__builtin_vec_new') { - $main::skip{$name} = 1; + # Deal with the standard --help and --version + if ($main::opt_help) { + print usage_string(); + exit(0); } - $main::skip_regexp = "TCMalloc"; -} -if ($main::lock_profile) { - foreach my $vname ('Mutex::Unlock', 'Mutex::UnlockSlow') { - $main::skip{$vname} = 1; + + if ($main::opt_version) { + print version_string(); + exit(0); } -} -# Add additional profiles, if available. -if (scalar(@main::profile_files) > 0) { - foreach my $pname (@main::profile_files) { - my $p = ReadProfile($main::prog, $pname)->{profile}; - $profile = AddProfile($profile, $p); + # Disassembly/listing mode requires address-level info + if ($main::opt_disasm || $main::opt_list) { + $main::opt_functions = 0; + $main::opt_lines = 0; + $main::opt_addresses = 1; + $main::opt_files = 0; } -} -# Subtract base from profile, if specified -if ($main::opt_base ne '') { - my $base = ReadProfile($main::prog, $main::opt_base)->{profile}; - $profile = SubtractProfile($profile, $base); -} + # Check heap-profiling flags + if ($main::opt_inuse_space + + $main::opt_inuse_objects + + $main::opt_alloc_space + + $main::opt_alloc_objects > 1) { + usage("Specify at most on of --inuse/--alloc options"); + } -# Get total data in profile -my $total = TotalProfile($profile); + # Check output granularities + my $grains = + $main::opt_functions + + $main::opt_lines + + $main::opt_addresses + + $main::opt_files + + 0; + if ($grains > 1) { + usage("Only specify one output granularity option"); + } + if ($grains == 0) { + $main::opt_functions = 1; + } -# Extract symbols -my $symbols = ExtractSymbols($libs, $profile, $data->{pcs}); - -# Focus? -if ($main::opt_focus ne '') { - $profile = FocusProfile($symbols, $profile, $main::opt_focus); -} - -# Ignore? -if ($main::opt_ignore ne '') { - $profile = IgnoreProfile($symbols, $profile, $main::opt_ignore); -} - -# Reduce profiles to required output granularity, and also clean -# each stack trace so a given entry exists at most once. -my $reduced = ReduceProfile($symbols, $profile); - -# Get derived profiles -my $flat = FlatProfile($reduced); -my $cumulative = CumulativeProfile($reduced); - -# Print -if (!$main::opt_interactive) { - if ($main::opt_disasm) { - PrintDisassembly($libs, $flat, $cumulative, $main::opt_disasm); - } elsif ($main::opt_list) { - PrintListing($libs, $flat, $cumulative, $main::opt_list); - } elsif ($main::opt_text) { - PrintText($symbols, $flat, $cumulative, $total, -1); - } else { - if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { - if ($main::opt_gv) { - if (!system("$GV --version >/dev/null 2>&1")) { - # Options using double dash are supported by this gv version. - system("$GV --scale=$main::opt_scale " . - PsTempName($main::next_tmpfile)); - } else { - # Old gv version - only supports options that use single dash. - system("$GV -scale $main::opt_scale " . - PsTempName($main::next_tmpfile)); - } - } + # Check output modes + my $modes = + $main::opt_text + + $main::opt_gv + + $main::opt_dot + + $main::opt_ps + + $main::opt_pdf + + $main::opt_gif + + $main::opt_interactive + + 0; + if ($modes > 1) { + usage("Only specify one output mode"); + } + if ($modes == 0) { + if (-t STDOUT) { # If STDOUT is a tty, activate interactive mode + $main::opt_interactive = 1; } else { - exit(1); + $main::opt_text = 1; } } -} else { - InteractiveMode(); + + if ($main::opt_test) { + RunUnitTests(); + # Should not return + exit(1); + } + + # Binary name and profile arguments list + $main::prog = ""; + @main::pfile_args = (); + + # Remote profiling without a binary (using $SYMBOL_PAGE instead) + if (IsProfileURL($ARGV[0])) { + $main::use_symbol_page = 1; + } + + if ($main::use_symbol_page) { # We don't need a binary! + my %disabled = ('--lines' => $main::opt_lines, + '--disasm' => $main::opt_disasm); + for my $option (keys %disabled) { + usage("$option cannot be used without a binary") if $disabled{$option}; + } + # Set $main::prog later... + scalar(@ARGV) || usage("Did not specify profile file"); + } else { + $main::prog = shift(@ARGV) || usage("Did not specify program"); + scalar(@ARGV) || usage("Did not specify profile file"); + } + + # Parse profile file/location arguments + foreach my $farg (@ARGV) { + if ($farg =~ m/(.*)\@([0-9]+)/ ) { + my $machine = $1; + my $num_machines = $2; + for (my $i = 0; $i < $num_machines; $i++) { + unshift(@main::pfile_args, "$i.$machine"); + } + } else { + unshift(@main::pfile_args, $farg); + } + } + + if ($main::use_symbol_page) { + unless (IsProfileURL($main::pfile_args[0])) { + error("The first profile should be a remote form to use $SYMBOL_PAGE\n"); + } + CheckSymbolPage(); + $main::prog = FetchProgramName(); + } else { + ConfigureObjTools($main::prog) + } } -cleanup(); -exit(0); +sub Main() { + Init(); + $main::collected_profile = undef; + @main::profile_files = (); + $main::op_time = time(); + # Fetch all profile data + FetchDynamicProfiles(); + + # Read one profile, pick the last item on the list + my $data = ReadProfile($main::prog, pop(@main::profile_files)); + my $profile = $data->{profile}; + my $libs = $data->{libs}; # Info about main program and shared libraries + + # List of function names to skip + $main::skip = (); + $main::skip_regexp = 'NOMATCH'; + if ($main::heap_profile) { + foreach my $name ('calloc', + 'cfree', + 'malloc', + 'free', + 'memalign', + 'pvalloc', + 'valloc', + 'realloc', + 'do_malloc', + 'DoSampledAllocation', + 'simple_alloc::allocate', + '__malloc_alloc_template::allocate', + '__builtin_delete', + '__builtin_new', + '__builtin_vec_delete', + '__builtin_vec_new') { + $main::skip{$name} = 1; + } + $main::skip_regexp = "TCMalloc"; + } + if ($main::lock_profile) { + foreach my $vname ('Mutex::Unlock', 'Mutex::UnlockSlow') { + $main::skip{$vname} = 1; + } + } + + # Add additional profiles, if available. + if (scalar(@main::profile_files) > 0) { + foreach my $pname (@main::profile_files) { + my $p = ReadProfile($main::prog, $pname)->{profile}; + $profile = AddProfile($profile, $p); + } + } + + # Subtract base from profile, if specified + if ($main::opt_base ne '') { + my $base = ReadProfile($main::prog, $main::opt_base)->{profile}; + $profile = SubtractProfile($profile, $base); + } + + # Get total data in profile + my $total = TotalProfile($profile); + + # Collect symbols + my $symbols = undef; + if ($main::use_symbol_page) { + $symbols = FetchSymbols($data->{pcs}); + } else { + $symbols = ExtractSymbols($libs, $profile, $data->{pcs}); + } + + # Focus? + if ($main::opt_focus ne '') { + $profile = FocusProfile($symbols, $profile, $main::opt_focus); + } + + # Ignore? + if ($main::opt_ignore ne '') { + $profile = IgnoreProfile($symbols, $profile, $main::opt_ignore); + } + + # Reduce profiles to required output granularity, and also clean + # each stack trace so a given entry exists at most once. + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + # Print + if (!$main::opt_interactive) { + if ($main::opt_disasm) { + PrintDisassembly($libs, $flat, $cumulative, $main::opt_disasm); + } elsif ($main::opt_list) { + PrintListing($libs, $flat, $cumulative, $main::opt_list); + } elsif ($main::opt_text) { + PrintText($symbols, $flat, $cumulative, $total, -1); + } else { + if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { + if ($main::opt_gv) { + if (!system("$GV --version >/dev/null 2>&1")) { + # Options using double dash are supported by this gv version. + system("$GV --scale=$main::opt_scale " . + PsTempName($main::next_tmpfile)); + } else { + # Old gv version - only supports options that use single dash. + system("$GV -scale $main::opt_scale " . + PsTempName($main::next_tmpfile)); + } + } + } else { + exit(1); + } + } + } else { + InteractiveMode($profile, $symbols, $libs, $total); + } + + cleanup(); + exit(0); +} + +##### Entry Point ##### + +Main(); + +# Temporary code to detect if we're running on a Goobuntu system. +# These systems don't have the right stuff installed for the special +# Readline libraries to work, so as a temporary workaround, we default +# to using the normal stdio code, rather than the fancier readline-based +# code +sub ReadlineMightFail { + if (-e '/lib/libtermcap.so.2') { + return 0; # libtermcap exists, so readline should be okay + } else { + return 1; + } +} ##### Interactive helper routines ##### sub InteractiveMode { - $| = 1; # Make output unbuffered for interactive mode - my $orig_profile = $profile; + $| = 1; # Make output unbuffered for interactive mode + my ($orig_profile, $symbols, $libs, $total) = @_; # Use ReadLine if it's installed. - if ( defined(eval {require Term::ReadLine}) ) { + if ( !ReadlineMightFail() && + defined(eval {require Term::ReadLine}) ) { my $term = new Term::ReadLine 'pprof'; while ( defined ($_ = $term->readline('(pprof) '))) { $term->addhistory($_) if /\S/; - if (!InteractiveCommand($orig_profile, $_)) { - last; # exit when we get an interactive command to quit + if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) { + last; # exit when we get an interactive command to quit } } } else { # don't have readline while (1) { print "(pprof) "; $_ = ; - if (!InteractiveCommand($orig_profile, $_)) { - last; # exit when we get an interactive command to quit + + # Save some flags that might be reset by InteractiveCommand() + my $save_opt_lines = $main::opt_lines; + + if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) { + last; # exit when we get an interactive command to quit } + + # Restore flags + $main::opt_lines = $save_opt_lines; } } } @@ -477,7 +605,7 @@ sub InteractiveMode { # Takes two args: orig profile, and command to run. # Returns 1 if we should keep going, or 0 if we were asked to quit sub InteractiveCommand { - my($orig_profile, $command) = @_; + my($orig_profile, $symbols, $libs, $total, $command) = @_; $_ = $command; # just to make future m//'s easier if (!defined($_)) { print "\n"; @@ -490,8 +618,7 @@ sub InteractiveCommand { InteractiveHelpMessage(); return 1; } - # Clear all the options - $main::opt_lines = 0; + # Clear all the mode options -- mode is controlled by "$command" $main::opt_text = 0; $main::opt_disasm = 0; $main::opt_list = 0; @@ -507,7 +634,7 @@ sub InteractiveCommand { my $ignore; ($routine, $ignore) = ParseInteractiveArgs($3); - my $profile = ProcessProfile($orig_profile, "", $ignore); + my $profile = ProcessProfile($orig_profile, $symbols, "", $ignore); my $reduced = ReduceProfile($symbols, $profile); # Get derived profiles @@ -524,7 +651,7 @@ sub InteractiveCommand { my $ignore; ($routine, $ignore) = ParseInteractiveArgs($1); - my $profile = ProcessProfile($orig_profile, "", $ignore); + my $profile = ProcessProfile($orig_profile, $symbols, "", $ignore); my $reduced = ReduceProfile($symbols, $profile); # Get derived profiles @@ -542,7 +669,7 @@ sub InteractiveCommand { ($routine, $ignore) = ParseInteractiveArgs($1); # Process current profile to account for various settings - my $profile = ProcessProfile($orig_profile, "", $ignore); + my $profile = ProcessProfile($orig_profile, $symbols, "", $ignore); my $reduced = ReduceProfile($symbols, $profile); # Get derived profiles @@ -560,7 +687,7 @@ sub InteractiveCommand { ($focus, $ignore) = ParseInteractiveArgs($1); # Process current profile to account for various settings - my $profile = ProcessProfile($orig_profile, $focus, $ignore); + my $profile = ProcessProfile($orig_profile, $symbols, $focus, $ignore); my $reduced = ReduceProfile($symbols, $profile); # Get derived profiles @@ -587,6 +714,7 @@ sub InteractiveCommand { sub ProcessProfile { my $orig_profile = shift; + my $symbols = shift; my $focus = shift; my $ignore = shift; @@ -598,18 +726,18 @@ sub ProcessProfile { $profile = FocusProfile($symbols, $profile, $focus); my $focus_count = TotalProfile($profile); printf("After focusing on '%s': %s %s of %s (%0.1f%%)\n", - $focus, - Unparse($focus_count), Units(), - Unparse($total_count), ($focus_count*100.0) / $total_count); + $focus, + Unparse($focus_count), Units(), + Unparse($total_count), ($focus_count*100.0) / $total_count); } if ($ignore ne '') { $profile = IgnoreProfile($symbols, $profile, $ignore); my $ignore_count = TotalProfile($profile); printf("After ignoring '%s': %s %s of %s (%0.1f%%)\n", - $ignore, - Unparse($ignore_count), Units(), - Unparse($total_count), - ($ignore_count*100.0) / $total_count); + $ignore, + Unparse($ignore_count), Units(), + Unparse($total_count), + ($ignore_count*100.0) / $total_count); } return $profile; @@ -637,7 +765,7 @@ Commands: Show top lines ordered by flat profile count, or cumulative count if --cum is specified. If a number is present after 'top', the top K routines will be shown (defaults to showing the top 10) - + disasm [routine_regexp] [-ignore1] [-ignore2] Show disassembly of routines whose names match "routine_regexp", annotated with sample counts. @@ -649,6 +777,10 @@ For commands that accept optional -ignore tags, samples where any routine in the stack trace matches the regular expression in any of the -ignore parameters will be ignored. +Further pprof details are available at this location (or one similar): + + /usr/doc/google-perftools-$PPROF_VERSION/cpu_profiler.html + ENDOFHELP } sub ParseInteractiveArgs { @@ -1023,8 +1155,8 @@ sub PrintDot { if ($nodelimit > 0 || $edgelimit > 0) { printf STDERR ("Dropping nodes with <= %s %s; edges with <= %s abs(%s)\n", - Unparse($nodelimit), Units(), - Unparse($edgelimit), Units()); + Unparse($nodelimit), Units(), + Unparse($edgelimit), Units()); } # Open DOT output file @@ -1160,8 +1292,6 @@ sub OutputKey { # Skip large addresses since they sometimes show up as fake entries on RH9 if (length($a) > 8) { if ($a gt "7fffffffffffffff") { return ''; } - } else { - if (hex($a) > 0x7fffffff) { return ''; } } # Extract symbolic info for address @@ -1220,7 +1350,7 @@ sub Unparse { return sprintf("%.1f", $num / 1048576.0); } } - } elsif ($main::lock_profile) { + } elsif ($main::lock_profile && !$main::opt_contentions) { return sprintf("%.3f", $num / 1e9); # Convert nanoseconds to seconds } else { return sprintf("%d", $num); @@ -1249,7 +1379,7 @@ sub Units { return "MB"; } } - } elsif ($main::lock_profile) { + } elsif ($main::lock_profile && !$main::opt_contentions) { return "seconds"; } else { return "samples"; @@ -1267,7 +1397,9 @@ sub FlatProfile { foreach my $k (keys(%{$profile})) { my $count = $profile->{$k}; my @addrs = split(/\n/, $k); - AddEntry($result, $addrs[0], $count); + if ($#addrs >= 0) { + AddEntry($result, $addrs[0], $count); + } } return $result; } @@ -1458,14 +1590,191 @@ sub AddEntries { ##### Code to profile a server dynamically ##### +sub CheckSymbolPage { + my $url = SymbolPageURL(); + open(SYMBOL, "$WGET -qO- '$url' |"); + my $line = ; + close(SYMBOL); + unless (defined($line)) { + error("$url doesn't exist\n"); + } + + if ($line =~ /^num_symbols:\s+(\d+)$/) { + if ($1 == 0) { + error("Stripped binary. No symbols available.\n"); + } + } else { + error("Failed to get the number of symbols from $url\n"); + } +} + +sub IsProfileURL { + my $profile_name = shift; + my ($host, $port, $type) = ParseProfileURL($profile_name); + return defined($host) and defined($port) and defined($type); +} + +sub ParseProfileURL { + my $profile_name = shift; + if ($profile_name =~ m,^(http://|)([^/:]+):(\d+)(|/|$PROFILE_PAGE|$HEAP_PAGE|$GROWTH_PAGE|$CONTENTION_PAGE)$,o) { + return ($2, $3, $4); + } + return (); +} + +# We fetch symbols from the first profile argument. +sub SymbolPageURL { + my ($host, $port, $type) = ParseProfileURL($main::pfile_args[0]); + return "http://$host:$port$SYMBOL_PAGE"; +} + +sub FetchProgramName() { + my ($host, $port, $type) = ParseProfileURL($main::pfile_args[0]); + my $url = "http://$host:$port$PROGRAM_NAME_PAGE"; + my $command_line = "$WGET -qO- '$url'"; + open(CMDLINE, "$command_line |") or error($command_line); + my $cmdline = ; + close(CMDLINE); + error("Failed to get program name from $url\n") unless defined($cmdline); + $cmdline =~ s/\x00.+//; # Remove argv[1] and latters. + $cmdline =~ s!\n!!g; # Remove LFs. + return $cmdline; +} + +# Gee, curl's -L (--location) option isn't reliable at least +# with its 7.12.3 version. Curl will forget to post data if +# there is a redirection. This function is a workaround for +# curl. Redirection happens on borg hosts. +sub ResolveRedirectionForCurl { + my $url = shift; + my $command_line = "$CURL -s --head '$url'"; + open(CMDLINE, "$command_line |") or error($command_line); + while () { + if (/^Location: (.*)/) { + $url = $1; + } + } + close(CMDLINE); + return $url; +} + +# Fetch symbols from $SYMBOL_PAGE for all PC values found in profile +sub FetchSymbols { + my $pcset = shift; + + my %seen = (); + my @pcs = grep { !$seen{$_}++ } keys(%$pcset); # uniq + my $post_data = join("+", sort((map {"0x" . "$_"} @pcs))); + open(POSTFILE, ">$main::tmpfile_sym"); + print POSTFILE $post_data; + close(POSTFILE); + + my $url = SymbolPageURL(); + # Here we use curl for sending data via POST since old + # wgets don't't have --post-file option. + $url = ResolveRedirectionForCurl($url); + my $command_line = "$CURL -sd '\@$main::tmpfile_sym' '$url'"; + # We use c++filt in case $SYMBOL_PAGE gives us mangled symbols. + my $cppfilt = $obj_tool_map{"c++filt"}; + open(SYMBOL, "$command_line | $cppfilt |") or error($command_line); + + my %map; + while () { + if (m/^0x([0-9a-f]+)\s+(.+)/) { + $map{$1} = $2; + } + } + close(SYMBOL); + + my $symbols = {}; + for my $pc (@pcs) { + my $fullname; + if (defined($map{$pc})) { + $fullname = $map{$pc}; + } else { + $fullname = "0x" . $pc; # Just use addresses + } + my $name = ShortFunctionName($fullname); + $symbols->{$pc} = [$name, "?", $fullname]; + } + return $symbols; +} + +sub BaseName { + my $file_name = shift; + $file_name =~ s!^.*/!!; # Remove directory name + return $file_name; +} + +sub MakeProfileBaseName { + my ($binary_name, $profile_name) = @_; + my ($host, $port, $type) = ParseProfileURL($profile_name); + my $binary_shortname = BaseName($binary_name); + return sprintf("%s.%s.%s-port%s", + $binary_shortname, $main::op_time, $host, $port); +} + sub FetchDynamicProfile { my $binary_name = shift; my $profile_name = shift; my $fetch_name_only = shift; my $encourage_patience = shift; - # TODO: Add support for fetching profiles dynamically from a server - return $profile_name; + my $user_dir = $ENV{HOME}; + my $profile_dir = $user_dir . "/pprof"; + if (!(-d $profile_dir)) { + mkdir($profile_dir) || die("Unable to create profile directory $profile_dir\n"); + } + if (!IsProfileURL($profile_name)) { + return $profile_name; + } else { + my ($host, $port, $type) = ParseProfileURL($profile_name); + if ($type eq "" || $type eq "/") { + # Missing type specifier defaults to cpu-profile + $type = $PROFILE_PAGE; + } + + my $profile_file = MakeProfileBaseName($binary_name, $profile_name); + + my $url; + my $wget_timeout; + if ($type eq $PROFILE_PAGE) { + $url = sprintf("http://$host:$port$PROFILE_PAGE?seconds=%d", + $main::opt_seconds); + $wget_timeout = sprintf("--timeout=%d", + int($main::opt_seconds * 1.01 + 60)); + } else { + # For non-CPU profiles, we add a type-extension to + # the target profile file name. + my $suffix = $type; + $suffix =~ s,/,.,g; + $profile_file .= "$suffix"; + $url = "http://$host:$port$type"; + $wget_timeout = ""; + } + my $tmp_profile = "$profile_dir/.tmp.$profile_file"; + my $real_profile = "$profile_dir/$profile_file"; + + if ($fetch_name_only > 0) { + return $real_profile; + } + + my $cmd = "$WGET $wget_timeout -q -O $tmp_profile '$url'"; + if ($type eq $PROFILE_PAGE) { + print STDERR "Gathering CPU profile from $host:$port for $main::opt_seconds seconds to\n ${real_profile}\n"; + if ($encourage_patience) { + print STDERR "Be patient...\n"; + } + } else { + print STDERR "Fetching $type profile from $host:$port to\n ${real_profile}\n"; + } + + (system($cmd) == 0) || error("Failed to get profile: $cmd: $!\n"); + (system("mv $tmp_profile $real_profile") == 0) || error("Unable to rename profile\n"); + print STDERR "Wrote profile to $real_profile\n"; + $main::collected_profile = $real_profile; + return $main::collected_profile; + } } # Collect profiles in parallel @@ -1543,10 +1852,11 @@ sub ReadProfile { open(PROFILE, "<$fname") || error("$fname: $!\n"); binmode PROFILE; # New perls do UTF-8 processing my $header = ; + my $contention_marker = substr($CONTENTION_PAGE, 1); # remove leading / if ($header =~ m/^heap profile:/) { $main::heap_profile = 1; return ReadHeapProfile($prog, $fname, $header); - } elsif ($header =~ m/^--- *contentionz/ ) { + } elsif ($header =~ m/^--- *$contention_marker/o ) { $main::lock_profile = 1; return ReadSynchProfile($prog, $fname); } elsif ($header =~ m/^--- *Stacks:/ ) { @@ -1581,17 +1891,11 @@ sub ReadCPUProfile { my $pcs = {}; # Parse string into array of slots. - # L! is needed for 64-bit # platforms, but not supported on 5.005 - # (despite the manpage claims) + # L! cannot be used because with a native 64-bit build, it will cause + # 1) a valid 64-bit profile to use the 32-bit codepath, and + # 2) a valid 32-bit profile to be unrecognized. - my $format; - if ($] >= 5.008) { - $format = "L!*"; - } else { - $format = "L*"; - } - - my @slots = unpack($format, $str); + my @slots = unpack("L*", $str); # Read header. The current header version is a 5-element structure # containing: @@ -1713,15 +2017,55 @@ sub ReadHeapProfile { $index = 2; } - # Find the type of this profile + # Find the type of this profile. The header line looks like: + # heap profile: 1246: 8800744 [ 1246: 8800744] @ /266053 + # There are two pairs , the first inuse objects/space, and the + # second allocated objects/space. This is followed optionally by a profile + # type, and if that is present, optionally by a sampling frequency. The + # interpretation of the sampling frequency is that the profiler, for each + # sample, calculates a uniformly distributed random integer less than the + # given value, and records the next sample after that many bytes have been + # allocated. Therefore, the expected sample interval is half of the given + # frequency. By default, if not specified, the expected sample interval is + # 128KB. Only remote-heap-page profiles are adjusted for sample size. + my $should_adjust_sample = 0; + my $sample_adjustment = 0; chomp($header); my $type = "unknown"; - if ($header =~ m/^heap profile:\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\](\s*@\s*(.*))?/) { + if ($header =~ m"^heap profile:\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\](\s*@\s*([^/]*)(/(\d+))?)?") { if (defined($6) && ($6 ne '')) { $type = $6; + # The regex test here is to see if type is a substring of HEAP_PAGE + if (($HEAP_PAGE =~ /$type/)) { + $should_adjust_sample = 1; + if (defined($8) && ($8 ne '')) { + $sample_adjustment = int($8)/2; + printf STDERR ("Adjusting heap profiles for 1-in-%d sampling rate\n", + $sample_adjustment); + } + } + } else { + # We detect whether or not this is a remote-heap profile by checking + # that the total-allocated stats ($n2,$s2) are exactly the + # same as the in-use stats ($n1,$s1). It is remotely conceivable + # that a non-remote-heap profile may pass this check, but it is hard + # to imagine how that could happen. + my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4); + if (($n1 == $n2) && ($s1 == $s2)) { + # This is likely to be a remote-heap based sample profile + $should_adjust_sample = 1; + } } } + # For remote-heap generated profiles, adjust the counts and sizes to + # account for the sample rate (we sample once every 128KB by default). + if ($should_adjust_sample && ($sample_adjustment == 0)) { + # Turn on profile adjustment. + $sample_adjustment = 128*1024; + print STDERR "Adjusting heap profiles for 1-in-128KB sampling rate\n"; + } + my $profile = {}; my $pcs = {}; my $map = ""; @@ -1739,13 +2083,13 @@ sub ReadHeapProfile { # Read /proc/self/maps data as formatted by DumpAddressMap() my $buildvar = ""; while () { - # Parse "build=" specification if supplied - if (m/^\s*build=(.*)\n/) { - $buildvar = $1; - } + # Parse "build=" specification if supplied + if (m/^\s*build=(.*)\n/) { + $buildvar = $1; + } - # Expand "$build" variable if available - $_ =~ s/\$build\b/$buildvar/g; + # Expand "$build" variable if available + $_ =~ s/\$build\b/$buildvar/g; $map .= $_; } @@ -1760,6 +2104,20 @@ sub ReadHeapProfile { my $stack = $5; my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4); + if ($sample_adjustment) { + my $ratio; + $ratio = (($s1*1.0)/$n1)/($sample_adjustment); + if ($ratio < 1) { + $n1 /= $ratio; + $s1 /= $ratio; + } + $ratio = (($s2*1.0)/$n2)/($sample_adjustment); + if ($ratio < 1) { + $n2 /= $ratio; + $s2 /= $ratio; + } + } + my @counts = ($n1, $s1, $n2, $s2); AddEntries($profile, $pcs, $stack, $counts[$index]); } @@ -1785,17 +2143,35 @@ sub ReadSynchProfile { my $seen_clockrate = 0; my $line; + my $index = 0; + if ($main::opt_total_delay) { + $index = 0; + } elsif ($main::opt_contentions) { + $index = 1; + } elsif ($main::opt_mean_delay) { + $index = 2; + } + while ( $line = ) { - if ( $line =~ /^(slow release).*thread \d+ \@\s*(.*?)\s*$/ || - $line =~ /^\s*(\d+) \@\s*(.*?)\s*$/ ) { - my ($count, $stack) = ($1, $2); - if ($count !~ /^\d+$/) { + if ( $line =~ /^\s*(\d+)\s+(\d+) \@\s*(.*?)\s*$/ ) { + my ($cycles, $count, $stack) = ($1, $2, $3); + + # Convert cycles to nanoseconds + $cycles /= $cyclespernanosec; + + my @values = ($cycles, $count, $cycles / $count); + AddEntries($profile, $pcs, $stack, $values[$index]); + + } elsif ( $line =~ /^(slow release).*thread \d+ \@\s*(.*?)\s*$/ || + $line =~ /^\s*(\d+) \@\s*(.*?)\s*$/ ) { + my ($cycles, $stack) = ($1, $2); + if ($cycles !~ /^\d+$/) { next; } # Convert cycles to nanoseconds - $count /= $cyclespernanosec; - AddEntries($profile, $pcs, $stack, $count); + $cycles /= $cyclespernanosec; + AddEntries($profile, $pcs, $stack, $cycles); } elsif ( $line =~ m|cycles/second = (\d+)|) { $cyclespernanosec = $1 / 1e9; @@ -1838,6 +2214,7 @@ sub HexExtend { # Split /proc/pid/maps dump into a list of libraries sub ParseLibraries { + return if $main::use_symbol_page; # We don't need libraries info. my $prog = shift; my $map = shift; my $pcs = shift; diff --git a/src/profiler.cc b/src/profiler.cc index 5843720..8ddcc41 100644 --- a/src/profiler.cc +++ b/src/profiler.cc @@ -460,12 +460,12 @@ void ProfileData::SetHandler(void (*handler)(int)) { } void ProfileData::FlushTable() { - if (out_ < 0) { - // Profiling is not enabled - return; - } - LOCK(&state_lock_); { + if (out_ < 0) { + // Profiling is not enabled + UNLOCK(&state_lock_); + return; + } SetHandler(SIG_IGN); // Disable timer interrupts while we're flushing LOCK(&table_lock_); { // Move data from hash table to eviction buffer diff --git a/src/stacktrace.cc b/src/stacktrace.cc index 859d52a..da20659 100644 --- a/src/stacktrace.cc +++ b/src/stacktrace.cc @@ -45,17 +45,14 @@ #include "stacktrace_x86-inl.h" #endif -#if !defined(IMPLEMENTED_STACK_TRACE) && defined(USE_LIBUNWIND) && HAVE_LIBUNWIND_H +#if !defined(IMPLEMENTED_STACK_TRACE) && defined(__x86_64__) && HAVE_LIBUNWIND_H #define IMPLEMENTED_STACK_TRACE -// This is turned off by default. Possible reasons for turning on in the -// future: -// 1. Compiler independence -// 2. Architecture independence -// 3. A more liberal MIT license, which allows use with multiple compilers +#define UNW_LOCAL_ONLY #include "stacktrace_libunwind-inl.h" #endif #if !defined(IMPLEMENTED_STACK_TRACE) && defined(__x86_64__) && HAVE_UNWIND_H +// This implementation suffers from deadlocks. Don't enable it. #define IMPLEMENTED_STACK_TRACE #include "stacktrace_x86_64-inl.h" #endif diff --git a/src/stacktrace_libunwind-inl.h b/src/stacktrace_libunwind-inl.h index 42c28d3..bf39633 100644 --- a/src/stacktrace_libunwind-inl.h +++ b/src/stacktrace_libunwind-inl.h @@ -51,14 +51,14 @@ int GetStackTrace(void** result, int max_depth, int skip_count) { do { ret = unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *) &ip); - assert(ret == 0); + if (ret < 0) + break; if (skip_count > 0) { skip_count--; } else { result[n++] = ip; } ret = unw_step(&cursor); - assert(ret >= 0); } while ((n < max_depth) && (ret > 0)); return n; diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc index 5dc062e..bf45dfb 100644 --- a/src/tcmalloc.cc +++ b/src/tcmalloc.cc @@ -79,6 +79,7 @@ #include #include #include +#include "base/commandlineflags.h" #include "google/malloc_hook.h" #include "google/malloc_extension.h" #include "google/stacktrace.h" @@ -147,12 +148,27 @@ static const size_t kDefaultOverallThreadCacheSize = 16 << 20; // REQUIRED: kMaxPages >= kMinSystemAlloc; static const size_t kMaxPages = kMinSystemAlloc; +/* The smallest prime > 2^n */ +static unsigned int primes_list[] = { + // Small values might cause high rates of sampling + // and hence commented out. + // 2, 5, 11, 17, 37, 67, 131, 257, + // 521, 1031, 2053, 4099, 8209, 16411, + 32771, 65537, 131101, 262147, 524309, 1048583, + 2097169, 4194319, 8388617, 16777259, 33554467 }; + // Twice the approximate gap between sampling actions. // I.e., we take one sample approximately once every -// kSampleParameter/2 +// tcmalloc_sample_parameter/2 // bytes of allocation, i.e., ~ once every 128KB. // Must be a prime number. -static const size_t kSampleParameter = 266053; +DEFINE_int64(tcmalloc_sample_parameter, 262147, + "Twice the approximate gap between sampling actions." + " Must be a prime number. Otherwise will be rounded up to a " + " larger prime number"); +static size_t sample_period = 262147; +// Protects sample_period above +static SpinLock sample_period_lock = SPINLOCK_INITIALIZER; //------------------------------------------------------------------- // Mapping from size to size_class and vice versa @@ -303,6 +319,17 @@ static int NumMoveSize(size_t size) { // and thread caches. if (num > static_cast(0.8 * kMaxFreeListLength)) num = static_cast(0.8 * kMaxFreeListLength); + + // Also, avoid bringing in too many objects into small object free + // lists. There are lots of such lists, and if we allow each one to + // fetch too many at a time, we end up having to scavenge too often + // (especially when there are lots of threads and each thread gets a + // small allowance for its thread cache). + // + // TODO: Make thread cache free list sizes dynamic so that we do not + // have to equally divide a fixed resource amongst lots of threads. + if (num > 32) num = 32; + return num; } @@ -918,7 +945,7 @@ void TCMalloc_PageHeap::Dump(TCMalloc_Printer* out) { uint64_t large_pages = 0; int large_spans = 0; for (Span* s = large_.next; s != &large_; s = s->next) { - out->printf(" [ %6" PRIuS " spans ]\n", s->length); + out->printf(" [ %6" PRIuS " pages ]\n", s->length); large_pages += s->length; large_spans++; } @@ -1057,6 +1084,7 @@ class TCMalloc_ThreadCache_FreeList { SLL_PopRange(&list_, N, start, end); ASSERT(length_ >= N); length_ -= N; + if (length_ < lowater_) lowater_ = length_; } }; @@ -1669,9 +1697,23 @@ void TCMalloc_ThreadCache::PickNextSample() { uint32_t r = rnd_; rnd_ = (r << 1) ^ ((static_cast(r) >> 31) & kPoly); - // Next point is "rnd_ % (2*sample_period)". I.e., average - // increment is "sample_period". - bytes_until_sample_ = rnd_ % kSampleParameter; + // Next point is "rnd_ % (sample_period)". I.e., average + // increment is "sample_period/2". + const int flag_value = FLAGS_tcmalloc_sample_parameter; + static int last_flag_value = -1; + + if (flag_value != last_flag_value) { + SpinLockHolder h(&sample_period_lock); + int i; + for (i = 0; i < (sizeof(primes_list)/sizeof(primes_list[0]) - 1); i++) { + if (primes_list[i] >= flag_value) { + break; + } + } + sample_period = primes_list[i]; + last_flag_value = flag_value; + } + bytes_until_sample_ = rnd_ % sample_period; } void TCMalloc_ThreadCache::InitModule() { @@ -2118,7 +2160,7 @@ static inline void* do_malloc(size_t size) { } // The following call forces module initialization TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache(); - if (heap->SampleAllocation(size)) { + if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) { Span* span = DoSampledAllocation(size); if (span != NULL) { ret = reinterpret_cast(span->start << kPageShift); diff --git a/src/tests/heap-checker_unittest.cc b/src/tests/heap-checker_unittest.cc index e9ec6c3..3e85e7a 100644 --- a/src/tests/heap-checker_unittest.cc +++ b/src/tests/heap-checker_unittest.cc @@ -273,6 +273,7 @@ static void DoRunHidden(Closure* c, int n) { if (n) { run_hidden_ptr(c, n-1); wipe_stack_ptr(n); + sleep(0); // undo -foptimize-sibling-calls } else { c->Run(); } @@ -284,6 +285,7 @@ static void DoWipeStack(int n) { volatile int arr[sz]; for (int i = 0; i < sz; ++i) arr[i] = 0; wipe_stack_ptr(n-1); + sleep(0); // undo -foptimize-sibling-calls } } @@ -463,14 +465,14 @@ static void TestHeapLeakCheckerPProf() { // trick heap change: same total # of bytes and objects, but // different individual object sizes static void TestHeapLeakCheckerTrick() { - void* bar1 = AllocHidden(60 * sizeof(int)); + void* bar1 = AllocHidden(240 * sizeof(int)); Use(&bar1); - void* bar2 = AllocHidden(40 * sizeof(int)); + void* bar2 = AllocHidden(160 * sizeof(int)); Use(&bar2); HeapLeakChecker check("trick"); - void* foo1 = AllocHidden(70 * sizeof(int)); + void* foo1 = AllocHidden(280 * sizeof(int)); Use(&foo1); - void* foo2 = AllocHidden(30 * sizeof(int)); + void* foo2 = AllocHidden(120 * sizeof(int)); Use(&foo2); DeAllocHidden(&bar1); DeAllocHidden(&bar2); @@ -482,16 +484,16 @@ static void TestHeapLeakCheckerTrick() { // no false negatives from pprof static void TestHeapLeakCheckerDeathTrick() { - void* bar1 = AllocHidden(60 * sizeof(int)); + void* bar1 = AllocHidden(240 * sizeof(int)); Use(&bar1); - void* bar2 = AllocHidden(40 * sizeof(int)); + void* bar2 = AllocHidden(160 * sizeof(int)); Use(&bar2); HeapLeakChecker check("death_trick"); DeAllocHidden(&bar1); DeAllocHidden(&bar2); - void* foo1 = AllocHidden(70 * sizeof(int)); + void* foo1 = AllocHidden(280 * sizeof(int)); Use(&foo1); - void* foo2 = AllocHidden(30 * sizeof(int)); + void* foo2 = AllocHidden(120 * sizeof(int)); Use(&foo2); // TODO(maxim): use the above if we make pprof work in automated test runs if (!FLAGS_maybe_stripped) { @@ -733,13 +735,19 @@ static void* HeapBusyThreadBody(void* a) { } } if (FLAGS_test_register_leak) { - // Hide the register pointer value with an xor mask. + // Hide the register "ptr" value with an xor mask. // If one provides --test_register_leak flag, the test should // (with very high probability) crash on some leak check // with a leak report (of some x * sizeof(int) + y * sizeof(int*) bytes) // pointing at the two lines above in this function // with "new (initialized) int" in them as the allocators // of the leaked objects. + // CAVEAT: We can't really prevent a compiler to save some + // temporary values of "ptr" on the stack and thus let us find + // the heap objects not via the register. + // Hence it's normal if for certain compilers or optimization modes + // --test_register_leak does not cause a leak crash of the above form + // (this happens e.g. for gcc 4.0.1 in opt mode). ptr = reinterpret_cast( reinterpret_cast(ptr) ^ kHideMask); // busy loop to get the thread interrupted at: diff --git a/src/tests/tcmalloc_unittest.cc b/src/tests/tcmalloc_unittest.cc index b030e32..9f2df59 100644 --- a/src/tests/tcmalloc_unittest.cc +++ b/src/tests/tcmalloc_unittest.cc @@ -399,11 +399,14 @@ static void TestHugeAllocations() { for (size_t i = 0; i < 10000; i++) { TryHugeAllocation(kMaxSize - i); } - - // Check that asking for stuff near signed/unsigned boundary returns NULL + // Asking for memory sizes near signed/unsigned boundary (kMaxSignedSize) + // might work or not, depending on the amount of virtual memory. for (size_t i = 0; i < 100; i++) { - TryHugeAllocation(kMaxSignedSize - i); - TryHugeAllocation(kMaxSignedSize + i); + void* p = NULL; + p = malloc(kMaxSignedSize + i); + if (p) free(p); // if: free(NULL) is not necessarily defined + p = malloc(kMaxSignedSize - i); + if (p) free(p); } } @@ -560,18 +563,6 @@ int main(int argc, char** argv) { free(p); } - // Check that large allocations fail with NULL instead of crashing - fprintf(LOGSTREAM, "==== Testing out of memory\n"); - for (int s = 0; ; s += (10<<20)) { - void* large_object = malloc(s); - if (large_object == NULL) break; - free(large_object); - } - - // Check that huge allocations fail with NULL instead of crashing - fprintf(LOGSTREAM, "==== Testing huge allocations\n"); - TestHugeAllocations(); - // Check calloc() with various arguments fprintf(LOGSTREAM, "==== Testing calloc\n"); TestCalloc(0, 0, true); @@ -611,10 +602,16 @@ int main(int argc, char** argv) { threads[i] = new TesterThread(i); } - // Start + // Start the threads. + // Set the stack size to a small value to avoid inheriting 120MB+ + // limit when running under the google make system. + pthread_attr_t attr; + pthread_attr_init(&attr); + pthread_attr_setstacksize(&attr, 1 << 20); for (int i = 0; i < FLAGS_numthreads; ++i) { - CHECK_EQ(pthread_create(&thread_ids[i], NULL, RunThread, threads[i]), 0); + CHECK_EQ(pthread_create(&thread_ids[i], &attr, RunThread, threads[i]), 0); } + pthread_attr_destroy(&attr); // Wait for (int i = 0; i < FLAGS_numthreads; ++i) { @@ -624,6 +621,21 @@ int main(int argc, char** argv) { for (int i = 0; i < FLAGS_numthreads; ++i) delete threads[i]; // Cleanup + // Do the memory intensive tests after threads are done, since exhausting + // the available address space can make pthread_create to fail. + + // Check that huge allocations fail with NULL instead of crashing + fprintf(LOGSTREAM, "==== Testing huge allocations\n"); + TestHugeAllocations(); + + // Check that large allocations fail with NULL instead of crashing + fprintf(LOGSTREAM, "==== Testing out of memory\n"); + for (int s = 0; ; s += (10<<20)) { + void* large_object = malloc(s); + if (large_object == NULL) break; + free(large_object); + } + fprintf(LOGSTREAM, "PASS\n"); return 0; }