Wed Jun 14 15:11:14 2006 Google Inc. <opensource@google.com>

* google-perftools: version 0.8 release * Experimental support for remote profiling added to pprof (many) * Fixed race condition in ProfileData::FlushTable (etune) * Better support for weird /proc maps (maxim, mec) * Fix heap-checker interaction with gdb (markus) * Better 64-bit support in pprof (aruns) * Reduce scavenging cost in tcmalloc by capping NumMoveSize (sanjay) * Cast syscall(SYS_mmap); works on more 64-bit systems now (menage) * Document the text output of pprof! (csilvers) * Better compiler support for no-THREADS and for old compilers (csilvers) * Make libunwind the default stack unwinder for x86-64 (aruns) * Somehow the COPYING file got erased. Regenerate it (csilvers) git-svn-id: http://gperftools.googlecode.com/svn/trunk@23 6b5cf1ce-ec42-a296-1ba9-69fdba395a50
2025-01-02 20:52:03 +00:00 · 2007-03-22 04:55:49 +00:00 · 2007-03-22 04:55:49 +00:00 · 8e188310f7
commit 8e188310f7
parent c3b96b3ac5
22 changed files with 1727 additions and 491 deletions
--- a/28
+++ b/28
@ -0,0 +1,28 @@
+Copyright (c) 2005, Google Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+    * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/15
+++ b/15
@ -85,3 +85,18 @@ Thu Apr 13 20:59:09 2006  Google Inc. <opensource@google.com>
 	* Syscall support for older kernels, including _syscall6 (markus)
 	* Support PIC mode (markus, mbland, iant)
 	* Better support for running in non-threaded contexts (csilvers)
+
+Wed Jun 14 15:11:14 2006  Google Inc. <opensource@google.com>
+
+	* google-perftools: version 0.8 release
+	* Experimental support for remote profiling added to pprof (many)
+	* Fixed race condition in ProfileData::FlushTable (etune)
+	* Better support for weird /proc maps (maxim, mec)
+	* Fix heap-checker interaction with gdb (markus)
+	* Better 64-bit support in pprof (aruns)
+	* Reduce scavenging cost in tcmalloc by capping NumMoveSize (sanjay)
+	* Cast syscall(SYS_mmap); works on more 64-bit systems now (menage)
+	* Document the text output of pprof! (csilvers)
+	* Better compiler support for no-THREADS and for old compilers (csilvers)
+	* Make libunwind the default stack unwinder for x86-64 (aruns)
+	* Somehow the COPYING file got erased.  Regenerate it (csilvers)
--- a/Makefile.am
+++ b/Makefile.am
@ -115,21 +115,24 @@ libtcmalloc_minimal_la_LDFLAGS = $(PTHREAD_CFLAGS) -export-symbols-regex $(TCMAL
 libtcmalloc_minimal_la_LIBADD = $(PTHREAD_LIBS) libstacktrace.la

 ### Unittests
-TESTS += malloc_unittest
-MALLOC_UNITEST_INCLUDES = src/config.h \
-                          src/google/malloc_extension.h \
-                          src/google/malloc_hook.h \
-                          src/base/basictypes.h \
-                          src/google/perftools/hash_set.h \
-                          src/maybe_threads.h
-malloc_unittest_SOURCES = src/tests/tcmalloc_unittest.cc \
-                          src/malloc_hook.cc \
-                          src/malloc_extension.cc \
-                          src/maybe_threads.cc \
-                          $(MALLOC_UNITTEST_INCLUDES)
-malloc_unittest_CXXFLAGS = $(PTHREAD_CFLAGS)
-malloc_unittest_LDFLAGS = $(PTHREAD_CFLAGS)
-malloc_unittest_LDADD = $(PTHREAD_LIBS)
+
+# Commented out for the moment because malloc(very_big_num) is broken in
+# standard libc!  At least, in some situations, some of the time.
+## TESTS += malloc_unittest
+## MALLOC_UNITEST_INCLUDES = src/config.h \
+##                           src/google/malloc_extension.h \
+##                           src/google/malloc_hook.h \
+##                           src/base/basictypes.h \
+##                           src/google/perftools/hash_set.h \
+##                           src/maybe_threads.h
+## malloc_unittest_SOURCES = src/tests/tcmalloc_unittest.cc \
+##                           src/malloc_hook.cc \
+##                           src/malloc_extension.cc \
+##                           src/maybe_threads.cc \
+##                           $(MALLOC_UNITTEST_INCLUDES)
+## malloc_unittest_CXXFLAGS = $(PTHREAD_CFLAGS)
+## malloc_unittest_LDFLAGS = $(PTHREAD_CFLAGS)
+## malloc_unittest_LDADD = $(PTHREAD_LIBS)

 TESTS += tcmalloc_unittest
 TCMALLOC_UNITTEST_INCLUDES = src/google/malloc_extension.h
--- a/aclocal.m4
+++ b/aclocal.m4
@ -6751,7 +6751,61 @@ AC_DEFUN([AC_COMPILER_CHARACTERISTICS],
 	       

 # This was retrieved from
-#    http://www.gnu.org/software/ac-archive/htmldoc/acx_pthread.html
+#    http://0pointer.de/cgi-bin/viewcvs.cgi/trunk/common/acx_pthread.m4?rev=1220
+# See also (perhaps for new versions?)
+#    http://0pointer.de/cgi-bin/viewcvs.cgi/trunk/common/acx_pthread.m4
+
+dnl @synopsis ACX_PTHREAD([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]])
+dnl
+dnl @summary figure out how to build C programs using POSIX threads
+dnl
+dnl This macro figures out how to build C programs using POSIX threads.
+dnl It sets the PTHREAD_LIBS output variable to the threads library and
+dnl linker flags, and the PTHREAD_CFLAGS output variable to any special
+dnl C compiler flags that are needed. (The user can also force certain
+dnl compiler flags/libs to be tested by setting these environment
+dnl variables.)
+dnl
+dnl Also sets PTHREAD_CC to any special C compiler that is needed for
+dnl multi-threaded programs (defaults to the value of CC otherwise).
+dnl (This is necessary on AIX to use the special cc_r compiler alias.)
+dnl
+dnl NOTE: You are assumed to not only compile your program with these
+dnl flags, but also link it with them as well. e.g. you should link
+dnl with $PTHREAD_CC $CFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS
+dnl $LIBS
+dnl
+dnl If you are only building threads programs, you may wish to use
+dnl these variables in your default LIBS, CFLAGS, and CC:
+dnl
+dnl        LIBS="$PTHREAD_LIBS $LIBS"
+dnl        CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
+dnl        CC="$PTHREAD_CC"
+dnl
+dnl In addition, if the PTHREAD_CREATE_JOINABLE thread-attribute
+dnl constant has a nonstandard name, defines PTHREAD_CREATE_JOINABLE to
+dnl that name (e.g. PTHREAD_CREATE_UNDETACHED on AIX).
+dnl
+dnl ACTION-IF-FOUND is a list of shell commands to run if a threads
+dnl library is found, and ACTION-IF-NOT-FOUND is a list of commands to
+dnl run it if it is not found. If ACTION-IF-FOUND is not specified, the
+dnl default action will define HAVE_PTHREAD.
+dnl
+dnl Please let the authors know if this macro fails on any platform, or
+dnl if you have any other suggestions or comments. This macro was based
+dnl on work by SGJ on autoconf scripts for FFTW (www.fftw.org) (with
+dnl help from M. Frigo), as well as ac_pthread and hb_pthread macros
+dnl posted by Alejandro Forero Cuervo to the autoconf macro repository.
+dnl We are also grateful for the helpful feedback of numerous users.
+dnl
+dnl @category InstalledPackages
+dnl @author Steven G. Johnson <stevenj@alum.mit.edu>
+dnl @version 2005-06-15
+dnl @license GPLWithACException
+dnl 
+dnl Checks for GCC shared/pthread inconsistency based on work by
+dnl Marcin Owsiany <marcin@owsiany.pl>
+

 AC_DEFUN([ACX_PTHREAD], [
 AC_REQUIRE([AC_CANONICAL_HOST])
@ -6809,6 +6863,7 @@ acx_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -m
 # -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it
 #      doesn't hurt to check since this sometimes defines pthreads too;
 #      also defines -D_REENTRANT)
+#      ... -mt is also the pthreads flag for HP/aCC
 # pthread: Linux, etcetera
 # --thread-safe: KAI C++
 # pthread-config: use pthread-config program (for GNU Pth library)
@ -6818,13 +6873,13 @@ case "${host_cpu}-${host_os}" in

        # On Solaris (at least, for some versions), libc contains stubbed
        # (non-functional) versions of the pthreads routines, so link-based
-        # tests will erroneously succeed.  (We need to link with -pthread or
+        # tests will erroneously succeed.  (We need to link with -pthreads/-mt/
        # -lpthread.)  (The stubs are missing pthread_cleanup_push, or rather
        # a function called by this macro, so we could check for that, but
        # who knows whether they'll stub that too in a future libc.)  So,
        # we'll just look for -pthreads and -lpthread first:

-        acx_pthread_flags="-pthread -pthreads pthread -mt $acx_pthread_flags"
+        acx_pthread_flags="-pthreads pthread -mt -pthread $acx_pthread_flags"
        ;;
 esac

@ -6898,7 +6953,7 @@ if test "x$acx_pthread_ok" = xyes; then
 	AC_MSG_CHECKING([for joinable pthread attribute])
 	attr_name=unknown
 	for attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do
-            AC_TRY_LINK([#include <pthread.h>], [int attr=$attr;],
+	    AC_TRY_LINK([#include <pthread.h>], [int attr=$attr; return attr;],
                        [attr_name=$attr; break])
 	done
        AC_MSG_RESULT($attr_name)
@ -6924,6 +6979,107 @@ if test "x$acx_pthread_ok" = xyes; then

        # More AIX lossage: must compile with cc_r
        AC_CHECK_PROG(PTHREAD_CC, cc_r, cc_r, ${CC})
+
+   # The next part tries to detect GCC inconsistency with -shared on some
+   # architectures and systems. The problem is that in certain
+   # configurations, when -shared is specified, GCC "forgets" to
+   # internally use various flags which are still necessary.
+   
+   # First, check whether caller wants us to skip -shared checks
+   # this is useful
+   AC_MSG_CHECKING([whether to check for GCC pthread/shared inconsistencies])
+   if test x"$GCC" != xyes; then
+      AC_MSG_RESULT([no])
+   else
+      AC_MSG_RESULT([yes])
+
+      # In order not to create several levels of indentation, we test
+      # the value of "$ok" until we find out the cure or run out of
+      # ideas.
+      ok="no"
+
+      #
+      # Prepare the flags
+      #
+      save_CFLAGS="$CFLAGS"
+      save_LIBS="$LIBS"
+      save_CC="$CC"
+      # Try with the flags determined by the earlier checks.
+      #
+      # -Wl,-z,defs forces link-time symbol resolution, so that the
+      # linking checks with -shared actually have any value
+      #
+      # FIXME: -fPIC is required for -shared on many architectures,
+      # so we specify it here, but the right way would probably be to
+      # properly detect whether it is actually required.
+      CFLAGS="-shared -fPIC -Wl,-z,defs $CFLAGS $PTHREAD_CFLAGS"
+      LIBS="$PTHREAD_LIBS $LIBS"
+      CC="$PTHREAD_CC"
+
+      AC_MSG_CHECKING([whether -pthread is sufficient with -shared])
+      AC_TRY_LINK([#include <pthread.h>],
+         [pthread_t th; pthread_join(th, 0);
+         pthread_attr_init(0); pthread_cleanup_push(0, 0);
+         pthread_create(0,0,0,0); pthread_cleanup_pop(0); ],
+         [ok=yes])
+      
+      if test "x$ok" = xyes; then
+         AC_MSG_RESULT([yes])
+      else
+         AC_MSG_RESULT([no])
+      fi
+   
+      #
+      # Linux gcc on some architectures such as mips/mipsel forgets
+      # about -lpthread
+      #
+      if test x"$ok" = xno; then
+         AC_MSG_CHECKING([whether -lpthread fixes that])
+         LIBS="-lpthread $PTHREAD_LIBS $save_LIBS"
+         AC_TRY_LINK([#include <pthread.h>],
+            [pthread_t th; pthread_join(th, 0);
+            pthread_attr_init(0); pthread_cleanup_push(0, 0);
+            pthread_create(0,0,0,0); pthread_cleanup_pop(0); ],
+            [ok=yes])
+   
+         if test "x$ok" = xyes; then
+            AC_MSG_RESULT([yes])
+            PTHREAD_LIBS="-lpthread $PTHREAD_LIBS"
+         else
+            AC_MSG_RESULT([no])
+         fi
+      fi
+      #
+      # FreeBSD 4.10 gcc forgets to use -lc_r instead of -lc
+      #
+      if test x"$ok" = xno; then
+         AC_MSG_CHECKING([whether -lc_r fixes that])
+         LIBS="-lc_r $PTHREAD_LIBS $save_LIBS"
+         AC_TRY_LINK([#include <pthread.h>],
+             [pthread_t th; pthread_join(th, 0);
+              pthread_attr_init(0); pthread_cleanup_push(0, 0);
+              pthread_create(0,0,0,0); pthread_cleanup_pop(0); ],
+             [ok=yes])
+   
+         if test "x$ok" = xyes; then
+            AC_MSG_RESULT([yes])
+            PTHREAD_LIBS="-lc_r $PTHREAD_LIBS"
+         else
+            AC_MSG_RESULT([no])
+         fi
+      fi
+      if test x"$ok" = xno; then
+         # OK, we have run out of ideas
+         AC_MSG_WARN([Impossible to determine how to use pthreads with shared libraries])
+
+         # so it's not safe to assume that we may use pthreads
+         acx_pthread_ok=no
+      fi
+
+      CFLAGS="$save_CFLAGS"
+      LIBS="$save_LIBS"
+      CC="$save_CC"
+   fi
 else
        PTHREAD_CC="$CC"
 fi
--- a/237
+++ b/237
@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.57 for google-perftools 0.7.
+# Generated by GNU Autoconf 2.57 for google-perftools 0.8.
 #
 # Report bugs to <opensource@google.com>.
 #
@ -422,8 +422,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
 # Identity of this package.
 PACKAGE_NAME='google-perftools'
 PACKAGE_TARNAME='google-perftools'
-PACKAGE_VERSION='0.7'
-PACKAGE_STRING='google-perftools 0.7'
+PACKAGE_VERSION='0.8'
+PACKAGE_STRING='google-perftools 0.8'
 PACKAGE_BUGREPORT='opensource@google.com'

 ac_unique_file="README"
@ -953,7 +953,7 @@ if test "$ac_init_help" = "long"; then
  # Omit some internal or obsolete options to make the list less imposing.
  # This message is too long to be a string in the A/UX 3.1 sh.
  cat <<_ACEOF
-\`configure' configures google-perftools 0.7 to adapt to many kinds of systems.
+\`configure' configures google-perftools 0.8 to adapt to many kinds of systems.

 Usage: $0 [OPTION]... [VAR=VALUE]...

@ -1019,7 +1019,7 @@ fi

 if test -n "$ac_init_help"; then
  case $ac_init_help in
-     short | recursive ) echo "Configuration of google-perftools 0.7:";;
+     short | recursive ) echo "Configuration of google-perftools 0.8:";;
   esac
  cat <<\_ACEOF

@ -1125,7 +1125,7 @@ fi
 test -n "$ac_init_help" && exit 0
 if $ac_init_version; then
  cat <<\_ACEOF
-google-perftools configure 0.7
+google-perftools configure 0.8
 generated by GNU Autoconf 2.57

 Copyright 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, 2002
@ -1140,7 +1140,7 @@ cat >&5 <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.

-It was created by google-perftools $as_me 0.7, which was
+It was created by google-perftools $as_me 0.8, which was
 generated by GNU Autoconf 2.57.  Invocation command line was

  $ $0 $@
@ -1733,7 +1733,7 @@ fi

 # Define the identity of the package.
 PACKAGE=google-perftools
- VERSION=0.7
+ VERSION=0.8


 cat >>confdefs.h <<_ACEOF
@ -21171,6 +21171,7 @@ acx_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -m
 # -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it
 #      doesn't hurt to check since this sometimes defines pthreads too;
 #      also defines -D_REENTRANT)
+#      ... -mt is also the pthreads flag for HP/aCC
 # pthread: Linux, etcetera
 # --thread-safe: KAI C++
 # pthread-config: use pthread-config program (for GNU Pth library)
@ -21180,13 +21181,13 @@ case "${host_cpu}-${host_os}" in

        # On Solaris (at least, for some versions), libc contains stubbed
        # (non-functional) versions of the pthreads routines, so link-based
-        # tests will erroneously succeed.  (We need to link with -pthread or
+        # tests will erroneously succeed.  (We need to link with -pthreads/-mt/
        # -lpthread.)  (The stubs are missing pthread_cleanup_push, or rather
        # a function called by this macro, so we could check for that, but
        # who knows whether they'll stub that too in a future libc.)  So,
        # we'll just look for -pthreads and -lpthread first:

-        acx_pthread_flags="-pthread -pthreads pthread -mt $acx_pthread_flags"
+        acx_pthread_flags="-pthreads pthread -mt -pthread $acx_pthread_flags"
        ;;
 esac

@ -21343,7 +21344,7 @@ cat >>conftest.$ac_ext <<_ACEOF
 int
 main ()
 {
-int attr=$attr;
+int attr=$attr; return attr;
  ;
  return 0;
 }
@ -21431,6 +21432,216 @@ else
 echo "${ECHO_T}no" >&6
 fi

+
+   # The next part tries to detect GCC inconsistency with -shared on some
+   # architectures and systems. The problem is that in certain
+   # configurations, when -shared is specified, GCC "forgets" to
+   # internally use various flags which are still necessary.
+
+   # First, check whether caller wants us to skip -shared checks
+   # this is useful
+   echo "$as_me:$LINENO: checking whether to check for GCC pthread/shared inconsistencies" >&5
+echo $ECHO_N "checking whether to check for GCC pthread/shared inconsistencies... $ECHO_C" >&6
+   if test x"$GCC" != xyes; then
+      echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+   else
+      echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6
+
+      # In order not to create several levels of indentation, we test
+      # the value of "$ok" until we find out the cure or run out of
+      # ideas.
+      ok="no"
+
+      #
+      # Prepare the flags
+      #
+      save_CFLAGS="$CFLAGS"
+      save_LIBS="$LIBS"
+      save_CC="$CC"
+      # Try with the flags determined by the earlier checks.
+      #
+      # -Wl,-z,defs forces link-time symbol resolution, so that the
+      # linking checks with -shared actually have any value
+      #
+      # FIXME: -fPIC is required for -shared on many architectures,
+      # so we specify it here, but the right way would probably be to
+      # properly detect whether it is actually required.
+      CFLAGS="-shared -fPIC -Wl,-z,defs $CFLAGS $PTHREAD_CFLAGS"
+      LIBS="$PTHREAD_LIBS $LIBS"
+      CC="$PTHREAD_CC"
+
+      echo "$as_me:$LINENO: checking whether -pthread is sufficient with -shared" >&5
+echo $ECHO_N "checking whether -pthread is sufficient with -shared... $ECHO_C" >&6
+      cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <pthread.h>
+int
+main ()
+{
+pthread_t th; pthread_join(th, 0);
+         pthread_attr_init(0); pthread_cleanup_push(0, 0);
+         pthread_create(0,0,0,0); pthread_cleanup_pop(0);
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
+  (eval $ac_link) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+         { ac_try='test -s conftest$ac_exeext'
+  { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ok=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+fi
+rm -f conftest.$ac_objext conftest$ac_exeext conftest.$ac_ext
+
+      if test "x$ok" = xyes; then
+         echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6
+      else
+         echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+      fi
+
+      #
+      # Linux gcc on some architectures such as mips/mipsel forgets
+      # about -lpthread
+      #
+      if test x"$ok" = xno; then
+         echo "$as_me:$LINENO: checking whether -lpthread fixes that" >&5
+echo $ECHO_N "checking whether -lpthread fixes that... $ECHO_C" >&6
+         LIBS="-lpthread $PTHREAD_LIBS $save_LIBS"
+         cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <pthread.h>
+int
+main ()
+{
+pthread_t th; pthread_join(th, 0);
+            pthread_attr_init(0); pthread_cleanup_push(0, 0);
+            pthread_create(0,0,0,0); pthread_cleanup_pop(0);
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
+  (eval $ac_link) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+         { ac_try='test -s conftest$ac_exeext'
+  { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ok=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+fi
+rm -f conftest.$ac_objext conftest$ac_exeext conftest.$ac_ext
+
+         if test "x$ok" = xyes; then
+            echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6
+            PTHREAD_LIBS="-lpthread $PTHREAD_LIBS"
+         else
+            echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+         fi
+      fi
+      #
+      # FreeBSD 4.10 gcc forgets to use -lc_r instead of -lc
+      #
+      if test x"$ok" = xno; then
+         echo "$as_me:$LINENO: checking whether -lc_r fixes that" >&5
+echo $ECHO_N "checking whether -lc_r fixes that... $ECHO_C" >&6
+         LIBS="-lc_r $PTHREAD_LIBS $save_LIBS"
+         cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <pthread.h>
+int
+main ()
+{
+pthread_t th; pthread_join(th, 0);
+              pthread_attr_init(0); pthread_cleanup_push(0, 0);
+              pthread_create(0,0,0,0); pthread_cleanup_pop(0);
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
+  (eval $ac_link) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+         { ac_try='test -s conftest$ac_exeext'
+  { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ok=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+fi
+rm -f conftest.$ac_objext conftest$ac_exeext conftest.$ac_ext
+
+         if test "x$ok" = xyes; then
+            echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6
+            PTHREAD_LIBS="-lc_r $PTHREAD_LIBS"
+         else
+            echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+         fi
+      fi
+      if test x"$ok" = xno; then
+         # OK, we have run out of ideas
+         { echo "$as_me:$LINENO: WARNING: Impossible to determine how to use pthreads with shared libraries" >&5
+echo "$as_me: WARNING: Impossible to determine how to use pthreads with shared libraries" >&2;}
+
+         # so it's not safe to assume that we may use pthreads
+         acx_pthread_ok=no
+      fi
+
+      CFLAGS="$save_CFLAGS"
+      LIBS="$save_LIBS"
+      CC="$save_CC"
+   fi
 else
        PTHREAD_CC="$CC"
 fi
@ -22393,7 +22604,7 @@ _ASBOX
 } >&5
 cat >&5 <<_CSEOF

-This file was extended by google-perftools $as_me 0.7, which was
+This file was extended by google-perftools $as_me 0.8, which was
 generated by GNU Autoconf 2.57.  Invocation command line was

  CONFIG_FILES    = $CONFIG_FILES
@ -22456,7 +22667,7 @@ _ACEOF

 cat >>$CONFIG_STATUS <<_ACEOF
 ac_cs_version="\\
-google-perftools config.status 0.7
+google-perftools config.status 0.8
 configured by $0, generated by GNU Autoconf 2.57,
  with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\"

--- a/configure.ac
+++ b/configure.ac
@ -5,7 +5,7 @@
 # make sure we're interpreted by some minimal autoconf
 AC_PREREQ(2.57)

-AC_INIT(google-perftools, 0.7, opensource@google.com)
+AC_INIT(google-perftools, 0.8, opensource@google.com)
 # The argument here is just something that should be in the current directory
 # (for sanity checking)
 AC_CONFIG_SRCDIR(README)
--- a/doc/cpu_profiler.html
+++ b/doc/cpu_profiler.html
@ -109,6 +109,24 @@ detail below.</p>
  annotated with the flat and cumulative sample counts at each PC value.
 </pre>

+<h3>Analyzing Text Output</h3>
+
+<p>Text mode has lines of output that look like this:</p>
+<pre>
+       14   2.1%  17.2%       58   8.7% std::_Rb_tree::find
+</pre>
+
+<p>Here is how to interpret the columns:</p>
+<ol>
+  <li> Number of profiling samples in this function
+  <li> Percentage of profiling samples in this function
+  <li> Percentage of profiling samples in the functions printed so far
+  <li> Number of profiling samples in this function and its callees
+  <li> Percentage of profiling samples in this function and its callees
+  <li> Function name
+</ol>
+
+
 <h3>Node Information</h3>

 <p>In the various graphical modes of pprof, the output is a call graph
--- a/doc/pprof_remote_servers.html
+++ b/doc/pprof_remote_servers.html
@ -0,0 +1,190 @@
+<HTML>
+
+<HEAD>
+<title>pprof and Remote Servers</title>
+</HEAD>
+
+<BODY>
+
+<h1><code>pprof</code> and Remote Servers</h2>
+
+<p>In mid-2006, we added an experimental facility to <A
+HREF="cpu_profiler.html">pprof</A>, the tool that analyzes CPU and
+heap profiles.  This facility allows you to collect profile
+information from running applications.  It makes it easy to collect
+profile information without having to stop the program first, and
+without having to log into the machine where the application is
+running.  This is meant to be used on webservers, but will work on any
+application that can be modified to accept TCP connections on a port
+of its choosing, and to respond to HTTP requests on that port.</p>
+
+<p>We do not currently have infrastructure, such as apache modules,
+that you can pop into a webserver or other application to get the
+necessary functionality "for free."  However, it's easy to generate
+the necessary data, which should allow the interested developer to add
+the necessary support into his or her applications.</p>
+
+<p>To use <code>pprof</code> in this experimental "server" mode, you
+give the script a host and port it should query, replacing the normal
+commandline arguments of application + profile file:</p>
+<pre>
+   % pprof internalweb.mycompany.com:80
+</pre>
+
+<p>The host must be listening on that port, and be able to accept HTTP/1.0
+requests -- sent via <code>wget</code> and <code>curl</code> -- for
+several urls.  The following sections list the urls that
+<code>pprof</code> can send, and the responses it expects in
+return.</p>
+
+
+<ul><li> <code><b>/pprof/heap</b></code>
+
+<p><code>pprof</code> asks for the url <code>/pprof/heap</code> to
+get heap information.  The actual url is controlled via the variable
+<code>HEAP_PAGE</code> in the <code>pprof</code> script, so you
+can change it if you'd like.</p>
+
+<p>The server should respond by calling</p>
+<pre>
+    MallocExtension::instance()->GetHeapSample(&output);
+</pre>
+<p>and sending <code>output</code> back as an HTTP response to
+<code>pprof</code>.  <code>MallocExtension</code> is defined in the
+header file <code>google/malloc_extension.h</code>.</p>
+
+<p>Here's an example, from an actual Google webserver, of what the
+output should look like:</p>
+<pre>
+heap profile:   9369: 126987529 [  9369: 126987529] @ heap
+     2:     1024 [     2:     1024] @ 0x87da913 0x8923ad4 0x891d4c2 0x892de12 0x8930519 0x83a16c2 0x836cb38 0x834cd1c 0x8349ba5 0x10a3177 0x8349961
+     1:       36 [     1:       36] @ 0x87da913 0x83a0929 0x836cb38 0x834cd1c 0x8349ba5 0x10a3177 0x8349961
+   308: 10092544 [   308: 10092544] @ 0x87da913 0x8970d66 0x8970e64 0x896e8e2 0x88e69d2 0x88e6add 0x88e6dec 0x88e7384 0x88e73fa 0x8838793 0x8838b36 0x88395f8 0x88f5a4b 0x890d03a 0x890d65a 0x8917666 0x890d1f3 0x890e6e4 0x8349c1b 0x10a3177 0x8349961
+[...]
+</pre>
+
+
+</li><li> <code><b>/pprof/growth</b></code>
+
+<p><code>pprof</code> asks for the url <code>/pprof/growth</code> to
+get heap-profiling delta (growth) information.  The actual url is
+controlled via the variable <code>GROWTH_PAGE</code> in the
+<code>pprof</code> script, so you can change it if you'd like.</p>
+
+<p>The server should respond by calling</p>
+<pre>
+    MallocExtension::instance()->GetHeapGrowthStacks(&output);
+</pre>
+<p>and sending <code>output</code> back as an HTTP response to
+<code>pprof</code>.  <code>MallocExtension</code> is defined in the
+header file <code>google/malloc_extension.h</code>.</p>
+
+<p>Here's an example, from an actual Google webserver, of what the
+output should look like:</p>
+<pre>
+heap profile:    741: 812122112 [   741: 812122112] @ growth
+     1:  1572864 [     1:  1572864] @ 0x87da564 0x87db8a3 0x84787a4 0x846e851 0x836d12f 0x834cd1c 0x8349ba5 0x10a3177 0x8349961
+     1:  1048576 [     1:  1048576] @ 0x87d92e8 0x87d9213 0x87d9178 0x87d94d3 0x87da9da 0x8a364ff 0x8a437e7 0x8ab7d23 0x8ab7da9 0x8ac7454 0x8348465 0x10a3161 0x8349961
+[...]
+</pre>
+
+
+</li><li> <code><b>/pprof/profile</b></code>
+
+<p><code>pprof</code> asks for the url
+<code>/pprof/profile?seconds=XX</code> to get cpu-profiling
+information.  The actual url is controlled via the variable
+<code>PROFILE_PAGE</code> in the <code>pprof</code> script, so you can
+change it if you'd like.</p>
+
+<p>The server should respond by calling
+<code>ProfilerStart(filename)</code>, continuing to do its work, and
+then, XX seconds later, calling <code>ProfilerStop()</code>.  (These
+functions are declared in <code>google/profiler.h</code>.)  The
+application is responsible for picking a unique filename for
+<code>ProfilerStart()</code>.  After calling
+<code>ProfilerStop()</code>, the server should read the contents of
+<code>filename</code> and send them back as an HTTP response to
+<code>pprof</code>.</p>
+
+<p>Obviously, to get useful profile information the application must
+continue to run in the XX seconds that the profiler is running.  Thus,
+the profile start-stop calls should be done in a separate thread, or
+be otherwise non-blocking.</p>
+
+<p>The profiler output file is binary, but near the end of it, it
+should have lines of text somewhat like this:</p>
+<pre>
+01016000-01017000 rw-p 00015000 03:01 59314      /lib/ld-2.2.2.so
+</pre>
+
+
+</li><li> <code><b>/pprof/contention</b></code>
+
+<p>This is intended to be able to profile (thread) lock contention in
+addition to CPU and memory use.  It's not yet usable.</p>
+
+
+</li><li> <code><b>/pprof/cmdline</b></code>
+
+<p><code>pprof</code> asks for the url <code>/pprof/cmdline</code> to
+figure out what application it's profiling.  The actual url is
+controlled via the variable <code>PROGRAM_NAME_PAGE</code> in the
+<code>pprof</code> script, so you can change it if you'd like.</p>
+
+<p>The server should respond by reading the contents of
+<code>/proc/self/cmdline</code>, converting all internal NUL (\0)
+characters to newlines, and sending the result back as an HTTP
+response to <code>pprof</code>.</p>
+
+<p>Here's an example return value:<p>
+<pre>
+/root/server/custom_webserver
+80
+--configfile=/root/server/ws.config
+</pre>
+
+
+</li><li> <code><b>/pprof/symbol</b></code>
+
+<p><code>pprof</code> asks for the url <code>/pprof/symbol</code> to
+map from hex addresses to variable names.  The actual url is
+controlled via the variable <code>SYMBOL_PAGE</code> in the
+<code>pprof</code> script, so you can change it if you'd like.</p>
+
+<p>This is perhaps the hardest request to write code for, because
+it must accept POST requests.  This means that after the HTTP headers,
+pprof will pass in a list of hex addresses connected by
+<code>+</code>, like so:</p>
+<pre>
+   curl -d '0x0824d061+0x0824d1cf' http://remote_host:80/pprof/symbol
+</pre>
+
+<p>The server should read the POST data, which will be in one line,
+and for each hex value, should write one line of output to the output
+stream, like so:</p>
+<pre>
+&lt;hex address&gt;&lt;tab&gt;&lt;function name&gt;
+</pre>
+<p>For instance:</p>
+<pre>
+0x08b2dabd    _Update
+</pre>
+
+<p>The other reason this is the most difficult request to implement,
+is that the application will have to figure out for itself how to map
+from address to function name.  One possibility is to run <code>nm -C
+-n &lt;program name&gt;</code> to get the mappings, either statically
+(say at program-compile time), or dynamically, by having the
+application call out to <code>nm</code> for every
+<code>pprof/symbol</code> call (presumably with some caching!).</p>
+
+<p><code>pprof</code> itself does just this for local profiles (not
+ones that talk to remote servers); look at the subroutine
+<code>GetProcedureBoundaries</code>.</p>
+
+
+<hr>
+Last modified: Mon Jun 12 21:30:14 PDT 2006
+</body>
+</html>
--- a/src/base/linux_syscall_support.h
+++ b/src/base/linux_syscall_support.h
@ -45,6 +45,14 @@
 #if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__)) && \
    defined(__linux)

+#ifdef __cplusplus
+/* Some system header files in older versions of gcc neglect to properly
+ * handle being included from C++. As it appears to be harmless to have
+ * multiple nested 'extern "C"' blocks, just add another one here.
+ */
+extern "C" {
+#endif
+
 #include <errno.h>
 #include <signal.h>
 #include <stdarg.h>
@ -87,16 +95,24 @@
 #ifndef __NR_futex
 #define __NR_futex              240
 #endif
+#ifndef __NR_sched_setaffinity
+#define __NR_sched_setaffinity  241
+#define __NR_sched_getaffinity  242
+#endif
 /* End of i386 definitions                                                   */
 #elif defined(__ARM_ARCH_3__)
 #ifndef __NR_getdents64
-#define __NR_getdents64   217
+#define __NR_getdents64         (__NR_SYSCALL_BASE + 217)
 #endif
 #ifndef __NR_gettid
-#define __NR_gettid       224
+#define __NR_gettid             (__NR_SYSCALL_BASE + 224)
 #endif
 #ifndef __NR_futex
-#define __NR_futex        240
+#define __NR_futex              (__NR_SYSCALL_BASE + 240)
+#endif
+#ifndef __NR_sched_setaffinity
+#define __NR_sched_setaffinity  (__NR_SYSCALL_BASE + 241)
+#define __NR_sched_getaffinity  (__NR_SYSCALL_BASE + 242)
 #endif
 /* End of ARM 3 definitions                                                  */
 #elif defined(__x86_64__)
@ -109,6 +125,10 @@
 #ifndef __NR_futex
 #define __NR_futex              202
 #endif
+#ifndef __NR_sched_setaffinity
+#define __NR_sched_setaffinity  203
+#define __NR_sched_getaffinity  204
+#endif
 /* End of x86-64 definitions                                                 */
 #endif

@ -306,9 +326,11 @@ struct dirent64;
  #endif
  #if defined(__x86_64__)
    struct msghdr;
+    struct sockaddr;
    #define __NR_sys_mmap           __NR_mmap
    #define __NR_sys_recvmsg        __NR_recvmsg
    #define __NR_sys_sendmsg        __NR_sendmsg
+    #define __NR_sys_sendto         __NR_sendto
    #define __NR_sys_shutdown       __NR_shutdown
    #define __NR_sys_rt_sigaction   __NR_rt_sigaction
    #define __NR_sys_rt_sigprocmask __NR_rt_sigprocmask
@ -322,6 +344,10 @@ struct dirent64;
                            struct msghdr*,          m, int, f);
    static inline _syscall3(int, sys_sendmsg,        int,   s,
                            const struct msghdr*,    m, int, f);
+    static inline _syscall6(int, sys_sendto,         int,   s,
+                            const void*,             m, size_t, l,
+                            int,                     f,
+                            const struct sockaddr*,  a, int, t);
    static inline _syscall2(int, sys_shutdown,       int,   s,
                            int,                     h);
    static inline _syscall4(int, sys_rt_sigaction,   int,   s,
@ -378,6 +404,8 @@ struct dirent64;
    }
    #define sys_recvmsg(s,m,f)      sys_socketcall(17,      (s), (m), (f))
    #define sys_sendmsg(s,m,f)      sys_socketcall(16,      (s), (m), (f))
+    #define sys_sendto(s,m,l,f,a,t) sys_socketcall(11,      (s), (m), (l),(f),\
+                                                            (a), (t))
    #define sys_shutdown(s,h)       sys_socketcall(13,      (s), (h))
    #define sys_socket(d,t,p)       sys_socketcall(1,       (d), (t), (p))
    #define sys_socketpair(d,t,p,s) sys_socketcall(8,       (d), (t), (p),(s))
@ -395,6 +423,7 @@ struct dirent64;
  #define __NR_sys_fcntl              __NR_fcntl
  #define __NR_sys_fork               __NR_fork
  #define __NR_sys_fstat              __NR_fstat
+  #define __NR_sys_futex              __NR_futex
  #define __NR_sys_getdents           __NR_getdents
  #define __NR_sys_getdents64         __NR_getdents64
  #define __NR_sys_getegid            __NR_getegid
@ -415,11 +444,12 @@ struct dirent64;
  #define __NR_sys_ptrace             __NR_ptrace
  #define __NR_sys_read               __NR_read
  #define __NR_sys_readlink           __NR_readlink
+  #define __NR_sys_sched_getaffinity  __NR_sched_getaffinity
+  #define __NR_sys_sched_setaffinity  __NR_sched_setaffinity
  #define __NR_sys_sched_yield        __NR_sched_yield
  #define __NR_sys_sigaltstack        __NR_sigaltstack
  #define __NR_sys_stat               __NR_stat
  #define __NR_sys_write              __NR_write
-  #define __NR_sys_futex        __NR_futex
  static inline _syscall1(int,     sys_close,       int,         f);
  static inline _syscall1(int,     sys_dup,         int,         f);
  static inline _syscall2(int,     sys_dup2,        int,         s,
@ -432,6 +462,8 @@ struct dirent64;
  static inline _syscall0(pid_t,   sys_fork);
  static inline _syscall2(int,     sys_fstat,       int,         f,
                          struct stat*,   b);
+  static inline _syscall4(int, sys_futex, int*, addrx, int, opx, int, valx,
+                          struct timespec *, timeoutx);
  static inline _syscall3(int,   sys_getdents,      int,         f,
                          struct dirent*, d, int,    c);
  static inline _syscall3(int,   sys_getdents64,    int,         f,
@ -464,6 +496,10 @@ struct dirent64;
                          void *,         b, size_t, c);
  static inline _syscall3(int,     sys_readlink,    const char*, p,
                          char*,          b, size_t, s);
+  static inline _syscall3(int, sys_sched_getaffinity, pid_t, pid,
+                          unsigned int, len, unsigned long *, mask);
+  static inline _syscall3(int, sys_sched_setaffinity, pid_t, pid,
+                          unsigned int, len, unsigned long *, mask);
  static inline _syscall0(int,     sys_sched_yield);
  static inline _syscall2(int,     sys_sigaltstack, const stack_t*, s,
                          const stack_t*, o);
@ -471,8 +507,6 @@ struct dirent64;
                          struct stat*,   b);
  static inline _syscall3(ssize_t, sys_write,        int,        f,
                          const void *,   b, size_t, c);
-  static inline _syscall4(int, sys_futex, int*, addrx, int, opx, int, valx,
-                          struct timespec *, timeoutx);

  static inline int sys_sysconf(int name) {
    extern int __getpagesize(void);
@ -517,6 +551,9 @@ struct dirent64;
  #undef RETURN
 #endif

+#ifdef __cplusplus
+}
+#endif

 #endif
 #endif
--- a/src/base/linuxthreads.c
+++ b/src/base/linuxthreads.c
@ -51,6 +51,10 @@
 #include "base/linux_syscall_support.h"
 #include "base/thread_lister.h"

+#ifndef CLONE_UNTRACED
+#define CLONE_UNTRACED 0x00800000
+#endif
+

 /* itoa() is not a standard function, and we cannot safely call printf()
 * after suspending threads. So, we just implement our own copy. A
@ -97,8 +101,19 @@ static int local_clone (int (*fn)(void *), void *arg, ...) {
   * Leave 4kB of gap between the callers stack and the new clone. This
   * should be more than sufficient for the caller to call waitpid() until
   * the cloned thread terminates.
+   *
+   * It is important that we set the CLONE_UNTRACED flag, because newer
+   * versions of "gdb" otherwise attempt to attach to our thread, and will
+   * attempt to reap its status codes. This subsequently results in the
+   * caller hanging indefinitely in waitpid(), waiting for a change in
+   * status that will never happen. By setting the CLONE_UNTRACED flag, we
+   * prevent "gdb" from stealing events, but we still expect the thread
+   * lister to fail, because it cannot PTRACE_ATTACH to the process that
+   * is being debugged. This is OK and the error code will be reported
+   * correctly.
   */
-  return clone(fn, (char *)&arg - 4096, CLONE_VM|CLONE_FS|CLONE_FILES, arg);
+  return clone(fn, (char *)&arg - 4096,
+               CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_UNTRACED, arg);
 }


@ -209,7 +224,8 @@ struct ListerParams {
 static void ListerThread(struct ListerParams *args) {
  static const int  signals[]  = { SIGABRT, SIGILL, SIGFPE, SIGSEGV, SIGBUS,
                                   SIGXCPU, SIGXFSZ };
-  pid_t             clone_pid  = sys_gettid();
+  int               found_parent = 0;
+  pid_t             clone_pid  = sys_gettid(), ppid = sys_getppid();
  char              proc_self_task[80], marker_name[48], *marker_path;
  const char        *proc_paths[3];
  const char *const *proc_path = proc_paths;
@ -239,8 +255,7 @@ static void ListerThread(struct ListerParams *args) {
  }

  /* Compute search paths for finding thread directories in /proc            */
-  local_itoa(strrchr(strcpy(proc_self_task, "/proc/"), '\000'),
-             sys_getppid());
+  local_itoa(strrchr(strcpy(proc_self_task, "/proc/"), '\000'), ppid);
  marker_path = strrchr(strcpy(marker_name, proc_self_task), '\000');
  strcat(proc_self_task, "/task/");
  proc_paths[0] = proc_self_task; /* /proc/$$/task/                          */
@ -417,6 +432,7 @@ static void ListerThread(struct ListerParams *args) {
                  num_threads--;
                  sig_num_threads = num_threads;
                } else {
+                  found_parent |= pid == ppid;
                  added_entries++;
                }
              }
@ -435,6 +451,16 @@ static void ListerThread(struct ListerParams *args) {
        NO_INTR(sys_close(marker));
        sig_marker = marker = -1;

+        /* If we never found the parent process, something is very wrong.
+         * Most likely, we are running in debugger. Any attempt to operate
+         * on the threads would be very incomplete. Let's just report an
+         * error to the caller.
+         */
+        if (!found_parent) {
+          ResumeAllProcessThreads(num_threads, pids);
+          sys__exit(3);
+        }
+
        /* Now we are ready to call the callback,
         * which takes care of resuming the threads for us.
         */
@ -530,6 +556,9 @@ int ListAllProcessThreads(void *parameter,
      case 2: args.err = EFAULT; /* Some fault (e.g. SIGSEGV) detected       */
              args.result = -1;
              break;
+      case 3: args.err = EPERM;  /* Process is already being traced          */
+              args.result = -1;
+              break;
      default:args.err = ECHILD; /* Child died unexpectedly                  */
              args.result = -1;
              break;
--- a/src/base/thread_lister.c
+++ b/src/base/thread_lister.c
@ -31,7 +31,8 @@
 * Author: Markus Gutschke
 */

-#include <stdio.h>         // needed for NULL on some powerpc platforms (?!)
+#include <stdio.h>         /* needed for NULL on some powerpc platforms (?!) */
+#include <sys/prctl.h>
 #include "base/thread_lister.h"
 #include "base/linuxthreads.h"
 /* Include other thread listers here that define THREADS macro
@ -49,13 +50,20 @@ int ListAllProcessThreads(void *parameter,
  int rc;
  va_list ap;

+  int dumpable = prctl(PR_GET_DUMPABLE, 0);
+  if (!dumpable)
+    prctl(PR_SET_DUMPABLE, 1);
  va_start(ap, callback);
-  rc = callback(parameter, 0, NULL, ap);
+  pid_t pid = getpid();
+  rc = callback(parameter, 1, &pid, ap);
  va_end(ap);
+  if (!dumpable)
+    prctl(PR_SET_DUMPABLE, 0);
  return rc;
 }

-void ResumeAllProcessThreads(int num_threads, pid_t *thread_pids) {
+int ResumeAllProcessThreads(int num_threads, pid_t *thread_pids) {
+  return 1;
 }

 #endif
--- a/src/google/heap-checker.h
+++ b/src/google/heap-checker.h
@ -255,6 +255,19 @@ class HeapCleaner {
 };

 class HeapLeakChecker {
+ public:  // Static functions for working with (whole-program) leak checking.
+ 
+  // If heap leak checking is currently active in some mode
+  // e.g. if leak checking was started (and is still active now)
+  // due to any valid non-empty --heap_check flag value
+  // (including "local") on the command-line
+  // or via a dependency on //base:heapcheck.
+  // The return value reflects iff HeapLeakChecker objects manually 
+  // constructed right now will be doing leak checking or nothing.
+  // Note that we can go from active to inactive state during InitGoogle()
+  // if FLAGS_heap_check gets set to "" by some code before/during InitGoogle().
+  static bool IsActive();
+
 public:  // Non-static functions for starting and doing leak checking.

  // Start checking and name the leak check performed.
--- a/src/heap-checker.cc
+++ b/src/heap-checker.cc
@ -468,6 +468,18 @@ static bool RecordGlobalDataLocked(uint64 start_address,
  if (inode == 0)
    return true;

+  // Sometimes people mmap their own files read-write.  That would cause
+  // the strict ELF checker later to reject them.  We do not want to loosen
+  // up the ELF checker, because we need to catch freaky files if they
+  // show up.  So, make an exception for common files that we have seen.
+  //
+  // TODO(mec): the longer this gets, the more attractive it is to
+  // check for the ELF header and just accept all non-ELF files.
+  if (inode != 0) {
+    if (filename && strcmp(filename, "/dev/zero") == 0)
+      return true;
+  }
+
  // Grab some ELF types.
 #ifdef _LP64
  typedef Elf64_Ehdr ElfFileHeader;
@ -692,8 +704,15 @@ HeapLeakChecker::UseProcMaps(ProcMapsTask proc_maps_task) {
                          "Looking at /proc/self/maps line:\n  %s\n",
                          proc_map_line);

-    if (start_address >= end_address)
-      abort();
+    if (start_address >= end_address) {
+      // Crash if a line we can be interested in is ill-formed:
+      if (inode != 0)  abort();
+      // Skip other ill-formed lines: some are possible
+      // probably due to the interplay of how /proc/self/maps is updated
+      // while we read it in chunks in ProcMapsIterator and
+      // do things in this loop.
+      continue;
+    }

    // Determine if any shared libraries are present.
    if (inode != 0 && strstr(filename, "lib") && strstr(filename, ".so")) {
@ -738,6 +757,14 @@ static int64 live_bytes_total = 0;
 // (protected by our lock; IgnoreAllLiveObjectsLocked sets it)
 static pid_t self_thread_pid = 0;

+// Status of our thread listing callback execution
+// (protected by our lock; used from within IgnoreAllLiveObjectsLocked)
+static enum {
+  CALLBACK_NOT_STARTED,
+  CALLBACK_STARTED,
+  CALLBACK_COMPLETED,
+} thread_listing_status = CALLBACK_NOT_STARTED;
+
 // Ideally to avoid deadlocks this function should not result in any libc
 // or other function calls that might need to lock a mutex:
 // It is called when all threads of a process are stopped
@ -774,6 +801,7 @@ int HeapLeakChecker::IgnoreLiveThreads(void* parameter,
                                       int num_threads,
                                       pid_t* thread_pids,
                                       va_list ap) {
+  thread_listing_status = CALLBACK_STARTED;
  if (HeapProfiler::kMaxLogging) {
    HeapProfiler::MESSAGE(2, "HeapChecker: Found %d threads (from pid %d)\n",
                          num_threads, getpid());
@ -838,6 +866,7 @@ int HeapLeakChecker::IgnoreLiveThreads(void* parameter,
  IgnoreNonThreadLiveObjectsLocked();
  // Can now resume the threads:
  ResumeAllProcessThreads(num_threads, thread_pids);
+  thread_listing_status = CALLBACK_COMPLETED;
  return failures;
 }

@ -928,7 +957,8 @@ IgnoreAllLiveObjectsLocked(const StackExtent& self_stack) {
    UseProcMaps(RECORD_GLOBAL_DATA_LOCKED);
  }
  // Ignore all thread stacks:
-  bool executed_with_threads_stopped = false;
+  thread_listing_status = CALLBACK_NOT_STARTED;
+  bool need_to_ignore_non_thread_objects = true;
  self_thread_pid = getpid();
  self_thread_stack = self_stack;
  if (FLAGS_heap_check_ignore_thread_live) {
@ -939,10 +969,22 @@ IgnoreAllLiveObjectsLocked(const StackExtent& self_stack) {
    //  if not suspended they could still mess with the pointer
    //  graph while we walk it).
    int r = ListAllProcessThreads(NULL, IgnoreLiveThreads);
-    executed_with_threads_stopped = (r >= 0);
-    if (r == -1) {
+    need_to_ignore_non_thread_objects = r < 0;
+    if (r < 0) {
+      HeapProfiler::MESSAGE(0, "HeapChecker: thread finding failed "
+                               "with %d errno=%d\n", r, errno);
+      if (thread_listing_status == CALLBACK_COMPLETED) {
+        HeapProfiler::MESSAGE(0, "HeapChecker: thread finding callback "
+                                 "finished ok; hopefully everything is fine\n");
+        need_to_ignore_non_thread_objects = false;
+      } else if (thread_listing_status == CALLBACK_STARTED) {
+        HeapProfiler::MESSAGE(0, "HeapChecker: thread finding callback was "
+                                 "interrupted or crashed; can't fix this\n");
+        abort();
+      } else {  // CALLBACK_NOT_STARTED
        HeapProfiler::MESSAGE(0, "HeapChecker: Could not find thread stacks; "
                                 "may get false leak reports\n");
+      }
    } else if (r != 0) {
      HeapProfiler::MESSAGE(0, "HeapChecker: Thread stacks not found "
                               "for %d threads; may get false leak reports\n",
@ -960,7 +1002,7 @@ IgnoreAllLiveObjectsLocked(const StackExtent& self_stack) {
  }
  // Do all other live data ignoring here if we did not do it
  // within thread listing callback with all threads stopped.
-  if (!executed_with_threads_stopped)  IgnoreNonThreadLiveObjectsLocked();
+  if (need_to_ignore_non_thread_objects)  IgnoreNonThreadLiveObjectsLocked();
  if (live_objects_total) {
    HeapProfiler::MESSAGE(0, "HeapChecker: "
                          "Ignoring "LLD" reachable "
@ -1349,10 +1391,13 @@ bool HeapLeakChecker::DoNoLeaks(bool same_heap,
      (same_heap ? (inuse_bytes_increase_ != 0 || inuse_allocs_increase_ != 0)
                 : (inuse_bytes_increase_ > 0 || inuse_allocs_increase_ > 0));
    if (see_leaks || do_full) {
+      bool pprof_can_ignore = false;
+      const char* command_tail = " --text 2>/dev/null";  // normal command
      const char* gv_command_tail
        = " --edgefraction=1e-10 --nodefraction=1e-10 --gv 2>/dev/null";
      string ignore_re;
      if (disabled_regexp) {
+        pprof_can_ignore = true;
        ignore_re += " --ignore='^";
        ignore_re += *disabled_regexp;
        ignore_re += "$'";
@ -1361,22 +1406,29 @@ bool HeapLeakChecker::DoNoLeaks(bool same_heap,
      // some STLs can give us spurious leak alerts (since the STL tries to
      // do its own memory pooling), so we avoid it by using STL as little
      // as possible for "big" objects that might require "lots" of memory.
-      char command[6 * PATH_MAX + 200];
+      char base_command[6 * PATH_MAX + 200];
+      char beg_profile[PATH_MAX+1], end_profile[PATH_MAX+1];
      if (use_initial_profile) {
+        snprintf(beg_profile, sizeof(beg_profile), "%s.%s-beg.heap",
+                 profile_prefix->c_str(), name_);
        // compare against initial profile only if need to
        const char* drop_negative = same_heap ? "" : " --drop_negative";
-        snprintf(command, sizeof(command), "%s --base=\"%s.%s-beg.heap\" %s ",
-                 pprof_path(), profile_prefix->c_str(), name_,
-                 drop_negative);
+        snprintf(base_command, sizeof(base_command),
+                 "%s --base=\"%s\" %s ",
+                 pprof_path(), beg_profile, drop_negative);
      } else {
-        snprintf(command, sizeof(command), "%s",
+        beg_profile[0] = '\0';
+        snprintf(base_command, sizeof(base_command), "%s",
                 pprof_path());
      }
-      snprintf(command + strlen(command), sizeof(command) - strlen(command),
-               " %s \"%s.%s-end.heap\" %s --inuse_objects --lines",
-               invocation_path(), profile_prefix->c_str(),
-               name_, ignore_re.c_str());
+      snprintf(end_profile, sizeof(end_profile), "%s.%s-end.heap",
+               profile_prefix->c_str(), name_);
+      snprintf(base_command + strlen(base_command),
+               sizeof(base_command) - strlen(base_command),
+               " %s \"%s\" %s --inuse_objects --lines",
+               invocation_path(), end_profile, ignore_re.c_str());
                   // --lines is important here to catch leaks when !see_leaks
+
      char cwd[PATH_MAX+1];
      if (getcwd(cwd, sizeof(cwd)) != cwd)  abort();
      if (see_leaks) {
@ -1390,7 +1442,7 @@ bool HeapLeakChecker::DoNoLeaks(bool same_heap,
                              "To investigate leaks manually use e.g.\n"
                              "cd %s; "  // for proper symbol resolution
                              "%s%s\n\n",
-                              cwd, command, gv_command_tail);
+                              cwd, base_command, gv_command_tail);
      }
      string output;
      int checked_leaks = 0;
@ -1403,14 +1455,18 @@ bool HeapLeakChecker::DoNoLeaks(bool same_heap,
        } else {
          // We don't care about pprof's stderr as long as it
          // succeeds with empty report:
-          checked_leaks = GetStatusOutput(command, &output);
+          char full_command[6 * PATH_MAX + 200];   // needed to concatenate
+          snprintf(full_command, sizeof(full_command), "%s%s",
+                   base_command, command_tail);
+          checked_leaks = GetStatusOutput(full_command, &output);
          if (checked_leaks != 0) {
            HeapProfiler::MESSAGE(-1, "ERROR: Could not run pprof at %s\n",
                                  pprof_path());
            abort();
          }
        }
-        if (see_leaks && output.empty() && checked_leaks == 0) {
+        if (see_leaks && pprof_can_ignore &&
+            output.empty() && checked_leaks == 0) {
          HeapProfiler::MESSAGE(-1, "HeapChecker: "
                                "These must be leaks that we disabled"
                                " (pprof succeeded)! This check WILL FAIL"
@ -1420,7 +1476,24 @@ bool HeapLeakChecker::DoNoLeaks(bool same_heap,
        // do not fail the check just due to us being a stripped binary
        if (!see_leaks  &&  strstr(output.c_str(), "nm: ") != NULL  &&
            strstr(output.c_str(), ": no symbols") != NULL)  output.resize(0);
-        if (!(see_leaks || checked_leaks == 0))  abort();
+      }
+      // Make sure the profiles we created are still there.
+      // They can get deleted e.g. if the program forks/executes itself
+      // and FLAGS_cleanup_old_heap_profiles was kept as true.
+      if (access(end_profile, R_OK) != 0  ||
+          (beg_profile[0]  &&  access(beg_profile, R_OK) != 0)) {
+        HeapProfiler::MESSAGE(-1, "HeapChecker: "
+                              "One of the heap profiles is gone: %s %s\n",
+                              beg_profile, end_profile);
+        abort();
+      }
+      if (!(see_leaks || checked_leaks == 0)) {
+        // Crash if something went wrong with executing pprof
+        // and we rely on pprof to do its work:
+        HeapProfiler::MESSAGE(-1, "HeapChecker: "
+                              "pprof command failed: %s%s\n",
+                              base_command, command_tail);
+        abort();
      }
      if (see_leaks  &&  use_initial_profile) {
        HeapProfiler::MESSAGE(-1, "HeapChecker: "
@ -1438,7 +1511,7 @@ bool HeapLeakChecker::DoNoLeaks(bool same_heap,
                              "To investigate leaks manually uge e.g.\n"
                              "cd %s; "  // for proper symbol resolution
                              "%s%s\n\n",
-                              name_, cwd, command, gv_command_tail);
+                              name_, cwd, base_command, gv_command_tail);
        if (use_initial_profile) {
          HeapProfiler::MESSAGE(-1, "HeapChecker: "
                                "CAVEAT: Some of the reported leaks might have "
@ -1491,6 +1564,10 @@ HeapLeakChecker::~HeapLeakChecker() {
 // HeapLeakChecker overall heap check components
 //----------------------------------------------------------------------

+bool HeapLeakChecker::IsActive() {
+  return heap_checker_on;
+}
+
 vector<HeapCleaner::void_function>* HeapCleaner::heap_cleanups_ = NULL;

 // When a HeapCleaner object is intialized, add its function to the static list
@ -1653,7 +1730,7 @@ void HeapLeakChecker::DoMainHeapCheck() {
    HeapProfiler::MESSAGE(0, "HeapChecker: "
                             "Checking for whole-program memory leaks\n");
    if (!main_heap_checker->DoNoLeaks(same_heap, do_full, do_report)) {
-      HeapProfiler::MESSAGE(-1, "ERROR: Leaks found in main heap check, aborting\n");
+      HeapProfiler::MESSAGE(-1, "HeapChecker: crashing because of leaks\n");
      abort();
    }
    delete main_heap_checker;
--- a/src/malloc_extension.cc
+++ b/src/malloc_extension.cc
@ -166,6 +166,21 @@ struct StackTraceHash {
    }
    return h;
  }
+  // Less operator for MSVC's hash containers.
+  bool operator()(void** entry1, void** entry2) const {
+    if (Depth(entry1) != Depth(entry2))
+      return Depth(entry1) < Depth(entry2);
+    for (int i = 0; i < Depth(entry1); i++) {
+      if (PC(entry1, i) != PC(entry2, i)) {
+        return PC(entry1, i) < PC(entry2, i);
+      }
+    }
+    return false;  // entries are equal
+  }
+  // These two public members are required by msvc.  4 and 8 are the
+  // default values.
+  static const size_t bucket_size = 4;
+  static const size_t min_buckets = 8;
 };

 struct StackTraceEqual {
--- a/src/malloc_hook.cc
+++ b/src/malloc_hook.cc
@ -115,7 +115,7 @@ extern "C" void* mmap64(void *start, size_t length,
                        int fd, __off64_t offset) __THROW {

  void *result;
-  result = syscall(SYS_mmap, start, length, prot, flags, fd, offset);
+  result = (void *)syscall(SYS_mmap, start, length, prot, flags, fd, offset);
  MallocHook::InvokeMmapHook(result, start, length, prot, flags, fd, offset);
  return result;
 }
--- a/src/pprof
+++ b/src/pprof
@ -41,6 +41,9 @@
 # Examples:
 #
 # % tools/pprof "program" "profile"
+#   Enters "interactive" mode
+#
+# % tools/pprof --text "program" "profile"
 #   Generates one line per procedure
 #
 # % tools/pprof --gv "program" "profile"
@ -68,6 +71,8 @@
 use strict;
 use Getopt::Long;

+my $PPROF_VERSION = "0.8";
+
 # These are the object tools we use, which come from various sources.
 # We want to invoke them directly, rather than via users' aliases and/or
 # search paths, because some people have colorizing versions of them that
@ -79,9 +84,22 @@ my %obj_tool_map = (
  "objdump" => "objdump",
  "nm" => "nm",
  "addr2line" => "addr2line",
+  "c++filt" => "c++filt",
 );
 my $DOT = "dot";          # leave non-absolute, since it may be in /usr/local
 my $GV = "gv";
+# These are used for dynamic profiles
+my $WGET = "wget";
+my $CURL = "curl";
+
+# These are the web pages that servers need to support for dynamic profiles
+my $HEAP_PAGE = "/pprof/heap";
+my $PROFILE_PAGE = "/pprof/profile";   # must support cgi-param "?seconds=#"
+my $GROWTH_PAGE = "/pprof/growth";
+my $CONTENTION_PAGE = "/pprof/contention";
+my $SYMBOL_PAGE = "/pprof/symbol";     # must support symbol lookup via POST
+my $PROGRAM_NAME_PAGE = "/pprof/cmdline";
+

 # There is a pervasive dependency on the length (in hex characters, i.e.,
 # nibbles) of an address, distinguishing between 32-bit and 64-bit profiles:
@ -90,14 +108,31 @@ my $address_length = 8;   # Hope for 32-bit, reset if 64-bit detected.
 ##### Argument parsing #####

 sub usage_string {
-  return <<'EOF';
-Usage: pprof [options] <program> <profile> ...
-   Prints specified cpu- or heap-profile
+  return <<EOF;
+Usage:
+pprof [options] <program> <profiles>
+   <profiles> is a space separated list of profile names.
+pprof [options] <profile>
+   <profile> is a remote form.  Symbols are obtained from host:port$SYMBOL_PAGE
+
+   Each profile name can be:
+   /path/to/profile        - a path to a profile file
+   host:port[/<service>]   - a location of a service to get profile from
+
+   The /<service> can be $HEAP_PAGE, $PROFILE_PAGE, $GROWTH_PAGE, or $CONTENTION_PAGE.
+   For instance: "pprof http://myserver.com:80$HEAP_PAGE".
+   If /<service> is omitted, the service defaults to $PROFILE_PAGE (cpu profiling).
+
+   For more help with querying remote servers, including how to add the
+   necessary server-side support code, see this filename (or one like it):
+
+   /usr/doc/google-perftools-$PPROF_VERSION/pprof_remote_servers.html

 Options:
   --cum               Sort by cumulative data
   --base=<base>       Subtract <base> from <profile> before display
-   --interactive       Run in interactive mode (interactive "help" gives help)
+   --interactive       Run in interactive mode (interactive "help" gives help) [default]
+   --seconds=<n>       Length of time for dynamic profiles [default=30 secs]

 Reporting Granularity:
   --addresses         Report at address level
@ -106,7 +141,7 @@ Reporting Granularity:
   --files             Report at source file level

 Output type:
-   --text              Generate text report [default]
+   --text              Generate text report
   --gv                Generate Postscript and display
   --list=<regexp>     Generate source listing of matching routines
   --disasm=<regexp>   Generate disassembly of matching routines
@ -123,6 +158,11 @@ Heap-Profile Options:
   --show_bytes        Display space in bytes
   --drop_negative     Ignore negative differences

+Contention-profile options:
+   --total_delay      Display total delay at each region [default]
+   --contentions      Display number of delays at each region
+   --mean_delay       Display mean delay at each region
+
 Call-graph Options:
   --nodecount=<n>     Show at most so many nodes [default=80]
   --nodefraction=<f>  Hide nodes below <f>*total [default=.005]
@ -151,12 +191,14 @@ pprof --list=getdir /bin/ls ls.prof
                       (Per-line) annotated source listing for getdir()
 pprof --disasm=getdir /bin/ls ls.prof
                       (Per-PC) annotated disassembly for getdir()
+pprof localhost:1234
+                       Outputs one line per procedure for localhost:1234
 EOF
 }

 sub version_string {
-  return <<'EOF'
-pprof (part of google-perftools 0.7)
+  return <<EOF
+pprof (part of google-perftools $PPROF_VERSION)

 Copyright 1998-2006 Google Inc.

@ -175,6 +217,15 @@ sub usage {
  exit(1);
 }

+sub Init() {
+  # Setup tmp-file name and handler to clean it up.
+  # We do this in the very beginning so that we can use
+  # error() and cleanup() function anytime here after.
+  $main::tmpfile_sym = "/tmp/pprof$$.sym";
+  $main::tmpfile_ps = "/tmp/pprof$$";
+  $main::next_tmpfile = 0;
+  $SIG{'INT'} = \&sighandler;
+

  $main::opt_help = 0;
  $main::opt_version = 0;
@ -201,6 +252,7 @@ $main::opt_edgefraction = 0.001;
  $main::opt_focus = '';
  $main::opt_ignore = '';
  $main::opt_scale = 0;
+  $main::opt_seconds = 30;

  $main::opt_inuse_space   = 0;
  $main::opt_inuse_objects = 0;
@ -210,10 +262,17 @@ $main::opt_show_bytes    = 0;
  $main::opt_drop_negative = 0;
  $main::opt_interactive   = 0;

+  $main::opt_total_delay = 0;
+  $main::opt_contentions = 0;
+  $main::opt_mean_delay = 0;
+
  $main::opt_tools   = "";
  $main::opt_debug   = 0;
  $main::opt_test    = 0;

+  # Are we using $SYMBOL_PAGE?
+  $main::use_symbol_page = 0;
+
  # Are we printing a heap profile?
  $main::heap_profile = 0;

@ -224,6 +283,7 @@ GetOptions("help!"          => \$main::opt_help,
             "version!"       => \$main::opt_version,
             "cum!"           => \$main::opt_cum,
             "base=s"         => \$main::opt_base,
+             "seconds=i"      => \$main::opt_seconds,
             "functions!"     => \$main::opt_functions,
             "lines!"         => \$main::opt_lines,
             "addresses!"     => \$main::opt_addresses,
@ -249,6 +309,9 @@ GetOptions("help!"          => \$main::opt_help,
             "alloc_objects!" => \$main::opt_alloc_objects,
             "show_bytes!"    => \$main::opt_show_bytes,
             "drop_negative!" => \$main::opt_drop_negative,
+             "total_delay!"   => \$main::opt_total_delay,
+             "contentions!"   => \$main::opt_contentions,
+             "mean_delay!"    => \$main::opt_mean_delay,
             "tools=s"        => \$main::opt_tools,
             "test!"          => \$main::opt_test,
             "debug!"         => \$main::opt_debug,
@ -303,13 +366,18 @@ my $modes =
      $main::opt_ps +
      $main::opt_pdf +
      $main::opt_gif +
+      $main::opt_interactive +
      0;
  if ($modes > 1) {
    usage("Only specify one output mode");
  }
  if ($modes == 0) {
+    if (-t STDOUT) {  # If STDOUT is a tty, activate interactive mode
+      $main::opt_interactive = 1;
+    } else {
      $main::opt_text = 1;
    }
+  }

  if ($main::opt_test) {
    RunUnitTests();
@ -321,25 +389,53 @@ if ($main::opt_test) {
  $main::prog = "";
  @main::pfile_args = ();

-$main::prog = shift || usage("Did not specify program");
+  # Remote profiling without a binary (using $SYMBOL_PAGE instead)
+  if (IsProfileURL($ARGV[0])) {
+    $main::use_symbol_page = 1;
+  }
+
+  if ($main::use_symbol_page) {  # We don't need a binary!
+    my %disabled = ('--lines' => $main::opt_lines,
+                    '--disasm' => $main::opt_disasm);
+    for my $option (keys %disabled) {
+      usage("$option cannot be used without a binary") if $disabled{$option};
+    }
+    # Set $main::prog later...
    scalar(@ARGV) || usage("Did not specify profile file");
+  } else {
+    $main::prog = shift(@ARGV) || usage("Did not specify program");
+    scalar(@ARGV) || usage("Did not specify profile file");
+  }

  # Parse profile file/location arguments
  foreach my $farg (@ARGV) {
+    if ($farg =~ m/(.*)\@([0-9]+)/ ) {
+      my $machine = $1;
+      my $num_machines = $2;
+      for (my $i = 0; $i < $num_machines; $i++) {
+        unshift(@main::pfile_args, "$i.$machine");
+      }
+    } else {
      unshift(@main::pfile_args, $farg);
    }
-ConfigureObjTools($main::prog);
+  }

-##### Main section #####
+  if ($main::use_symbol_page) {
+    unless (IsProfileURL($main::pfile_args[0])) {
+      error("The first profile should be a remote form to use $SYMBOL_PAGE\n");
+    }
+    CheckSymbolPage();
+    $main::prog = FetchProgramName();
+  } else {
+    ConfigureObjTools($main::prog)
+  }
+}

-# Setup tmp-file name and handler to clean it up
-$main::tmpfile_sym = "/tmp/pprof$$.sym";
-$main::tmpfile_ps = "/tmp/pprof$$";
-$main::next_tmpfile = 0;
+sub Main() {
+  Init();
  $main::collected_profile = undef;
  @main::profile_files = ();
-#$main::op_time = time();
-$SIG{'INT'} = \&sighandler;
+  $main::op_time = time();

  # Fetch all profile data
  FetchDynamicProfiles();
@ -363,6 +459,8 @@ if ($main::heap_profile) {
                      'realloc',
                      'do_malloc',
                      'DoSampledAllocation',
+		      'simple_alloc::allocate',
+		      '__malloc_alloc_template::allocate',
                      '__builtin_delete',
                      '__builtin_new',
                      '__builtin_vec_delete',
@ -394,8 +492,13 @@ if ($main::opt_base ne '') {
  # Get total data in profile
  my $total = TotalProfile($profile);

-# Extract symbols
-my $symbols = ExtractSymbols($libs, $profile, $data->{pcs});
+  # Collect symbols
+  my $symbols = undef;
+  if ($main::use_symbol_page) {
+    $symbols = FetchSymbols($data->{pcs});
+  } else {
+    $symbols = ExtractSymbols($libs, $profile, $data->{pcs});
+  }

  # Focus?
  if ($main::opt_focus ne '') {
@ -441,25 +544,43 @@ if (!$main::opt_interactive) {
      }
    }
  } else {
-  InteractiveMode();
+    InteractiveMode($profile, $symbols, $libs, $total);
  }

  cleanup();
  exit(0);
+}

+##### Entry Point #####
+
+Main();
+
+# Temporary code to detect if we're running on a Goobuntu system.
+# These systems don't have the right stuff installed for the special
+# Readline libraries to work, so as a temporary workaround, we default
+# to using the normal stdio code, rather than the fancier readline-based
+# code
+sub ReadlineMightFail {
+  if (-e '/lib/libtermcap.so.2') {
+    return 0;  # libtermcap exists, so readline should be okay
+  } else {
+    return 1;
+  }
+}

 ##### Interactive helper routines #####

 sub InteractiveMode {
  $| = 1;  # Make output unbuffered for interactive mode
-  my $orig_profile = $profile;
+  my ($orig_profile, $symbols, $libs, $total) = @_;

  # Use ReadLine if it's installed.
-  if ( defined(eval {require Term::ReadLine}) ) {
+  if ( !ReadlineMightFail() &&
+       defined(eval {require Term::ReadLine}) ) {
    my $term = new Term::ReadLine 'pprof';
    while ( defined ($_ = $term->readline('(pprof) '))) {
      $term->addhistory($_) if /\S/;
-      if (!InteractiveCommand($orig_profile, $_)) {
+      if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) {
        last;    # exit when we get an interactive command to quit
      }
    }
@ -467,9 +588,16 @@ sub InteractiveMode {
    while (1) {
      print "(pprof) ";
      $_ = <STDIN>;
-      if (!InteractiveCommand($orig_profile, $_)) {
+
+      # Save some flags that might be reset by InteractiveCommand()
+      my $save_opt_lines = $main::opt_lines;
+
+      if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) {
        last;    # exit when we get an interactive command to quit
      }
+
+      # Restore flags
+      $main::opt_lines = $save_opt_lines;
    }
  }
 }
@ -477,7 +605,7 @@ sub InteractiveMode {
 # Takes two args: orig profile, and command to run.
 # Returns 1 if we should keep going, or 0 if we were asked to quit
 sub InteractiveCommand {
-  my($orig_profile, $command) = @_;
+  my($orig_profile, $symbols, $libs, $total, $command) = @_;
  $_ = $command;                # just to make future m//'s easier
  if (!defined($_)) {
    print "\n";
@ -490,8 +618,7 @@ sub InteractiveCommand {
    InteractiveHelpMessage();
    return 1;
  }
-  # Clear all the options
-  $main::opt_lines = 0;
+  # Clear all the mode options -- mode is controlled by "$command"
  $main::opt_text = 0;
  $main::opt_disasm = 0;
  $main::opt_list = 0;
@ -507,7 +634,7 @@ sub InteractiveCommand {
    my $ignore;
    ($routine, $ignore) = ParseInteractiveArgs($3);

-    my $profile = ProcessProfile($orig_profile, "", $ignore);
+    my $profile = ProcessProfile($orig_profile, $symbols, "", $ignore);
    my $reduced = ReduceProfile($symbols, $profile);

    # Get derived profiles
@ -524,7 +651,7 @@ sub InteractiveCommand {
    my $ignore;
    ($routine, $ignore) = ParseInteractiveArgs($1);

-    my $profile = ProcessProfile($orig_profile, "", $ignore);
+    my $profile = ProcessProfile($orig_profile, $symbols, "", $ignore);
    my $reduced = ReduceProfile($symbols, $profile);

    # Get derived profiles
@ -542,7 +669,7 @@ sub InteractiveCommand {
    ($routine, $ignore) = ParseInteractiveArgs($1);

    # Process current profile to account for various settings
-    my $profile = ProcessProfile($orig_profile, "", $ignore);
+    my $profile = ProcessProfile($orig_profile, $symbols, "", $ignore);
    my $reduced = ReduceProfile($symbols, $profile);

    # Get derived profiles
@ -560,7 +687,7 @@ sub InteractiveCommand {
    ($focus, $ignore) = ParseInteractiveArgs($1);

    # Process current profile to account for various settings
-    my $profile = ProcessProfile($orig_profile, $focus, $ignore);
+    my $profile = ProcessProfile($orig_profile, $symbols, $focus, $ignore);
    my $reduced = ReduceProfile($symbols, $profile);

    # Get derived profiles
@ -587,6 +714,7 @@ sub InteractiveCommand {

 sub ProcessProfile {
  my $orig_profile = shift;
+  my $symbols = shift;
  my $focus = shift;
  my $ignore = shift;

@ -649,6 +777,10 @@ For commands that accept optional -ignore tags, samples where any routine in
 the stack trace matches the regular expression in any of the -ignore
 parameters will be ignored.

+Further pprof details are available at this location (or one similar):
+
+ /usr/doc/google-perftools-$PPROF_VERSION/cpu_profiler.html
+
 ENDOFHELP
 }
 sub ParseInteractiveArgs {
@ -1160,8 +1292,6 @@ sub OutputKey {
  # Skip large addresses since they sometimes show up as fake entries on RH9
  if (length($a) > 8) {
    if ($a gt "7fffffffffffffff") { return ''; }
-  } else {
-    if (hex($a) > 0x7fffffff) { return ''; }
  }

  # Extract symbolic info for address
@ -1220,7 +1350,7 @@ sub Unparse {
        return sprintf("%.1f", $num / 1048576.0);
      }
    }
-  } elsif ($main::lock_profile) {
+  } elsif ($main::lock_profile && !$main::opt_contentions) {
    return sprintf("%.3f", $num / 1e9);	# Convert nanoseconds to seconds
  } else {
    return sprintf("%d", $num);
@ -1249,7 +1379,7 @@ sub Units {
        return "MB";
      }
    }
-  } elsif ($main::lock_profile) {
+  } elsif ($main::lock_profile && !$main::opt_contentions) {
    return "seconds";
  } else {
    return "samples";
@ -1267,8 +1397,10 @@ sub FlatProfile {
  foreach my $k (keys(%{$profile})) {
    my $count = $profile->{$k};
    my @addrs = split(/\n/, $k);
+    if ($#addrs >= 0) {
      AddEntry($result, $addrs[0], $count);
    }
+  }
  return $result;
 }

@ -1458,14 +1590,191 @@ sub AddEntries {

 ##### Code to profile a server dynamically #####

+sub CheckSymbolPage {
+  my $url = SymbolPageURL();
+  open(SYMBOL, "$WGET -qO- '$url' |");
+  my $line = <SYMBOL>;
+  close(SYMBOL);
+  unless (defined($line)) {
+    error("$url doesn't exist\n");
+  }
+
+  if ($line =~ /^num_symbols:\s+(\d+)$/) {
+    if ($1 == 0) {
+      error("Stripped binary. No symbols available.\n");
+    }
+  } else {
+    error("Failed to get the number of symbols from $url\n");
+  }
+}
+
+sub IsProfileURL {
+  my $profile_name = shift;
+  my ($host, $port, $type) = ParseProfileURL($profile_name);
+  return defined($host) and defined($port) and defined($type);
+}
+
+sub ParseProfileURL {
+  my $profile_name = shift;
+  if ($profile_name =~ m,^(http://|)([^/:]+):(\d+)(|/|$PROFILE_PAGE|$HEAP_PAGE|$GROWTH_PAGE|$CONTENTION_PAGE)$,o) {
+    return ($2, $3, $4);
+  }
+  return ();
+}
+
+# We fetch symbols from the first profile argument.
+sub SymbolPageURL {
+  my ($host, $port, $type) = ParseProfileURL($main::pfile_args[0]);
+  return "http://$host:$port$SYMBOL_PAGE";
+}
+
+sub FetchProgramName() {
+  my ($host, $port, $type) = ParseProfileURL($main::pfile_args[0]);
+  my $url = "http://$host:$port$PROGRAM_NAME_PAGE";
+  my $command_line = "$WGET -qO- '$url'";
+  open(CMDLINE, "$command_line |") or error($command_line);
+  my $cmdline = <CMDLINE>;
+  close(CMDLINE);
+  error("Failed to get program name from $url\n") unless defined($cmdline);
+  $cmdline =~ s/\x00.+//;  # Remove argv[1] and latters.
+  $cmdline =~ s!\n!!g;  # Remove LFs.
+  return $cmdline;
+}
+
+# Gee, curl's -L (--location) option isn't reliable at least
+# with its 7.12.3 version.  Curl will forget to post data if
+# there is a redirection.  This function is a workaround for
+# curl.  Redirection happens on borg hosts.
+sub ResolveRedirectionForCurl {
+  my $url = shift;
+  my $command_line = "$CURL -s --head '$url'";
+  open(CMDLINE, "$command_line |") or error($command_line);
+  while (<CMDLINE>) {
+    if (/^Location: (.*)/) {
+      $url = $1;
+    }
+  }
+  close(CMDLINE);
+  return $url;
+}
+
+# Fetch symbols from $SYMBOL_PAGE for all PC values found in profile
+sub FetchSymbols {
+  my $pcset = shift;
+
+  my %seen = ();
+  my @pcs = grep { !$seen{$_}++ } keys(%$pcset);  # uniq
+  my $post_data = join("+", sort((map {"0x" . "$_"} @pcs)));
+  open(POSTFILE, ">$main::tmpfile_sym");
+  print POSTFILE $post_data;
+  close(POSTFILE);
+
+  my $url = SymbolPageURL();
+  # Here we use curl for sending data via POST since old
+  # wgets don't't have --post-file option.
+  $url = ResolveRedirectionForCurl($url);
+  my $command_line = "$CURL -sd '\@$main::tmpfile_sym' '$url'";
+  # We use c++filt in case $SYMBOL_PAGE gives us mangled symbols.
+  my $cppfilt = $obj_tool_map{"c++filt"};
+  open(SYMBOL, "$command_line | $cppfilt |") or error($command_line);
+
+  my %map;
+  while (<SYMBOL>) {
+    if (m/^0x([0-9a-f]+)\s+(.+)/) {
+      $map{$1} = $2;
+    }
+  }
+  close(SYMBOL);
+
+  my $symbols = {};
+  for my $pc (@pcs) {
+    my $fullname;
+    if (defined($map{$pc})) {
+      $fullname = $map{$pc};
+    } else {
+      $fullname = "0x" . $pc;  # Just use addresses
+    }
+    my $name = ShortFunctionName($fullname);
+    $symbols->{$pc} = [$name, "?", $fullname];
+  }
+  return $symbols;
+}
+
+sub BaseName {
+  my $file_name = shift;
+  $file_name =~ s!^.*/!!;  # Remove directory name
+  return $file_name;
+}
+
+sub MakeProfileBaseName {
+  my ($binary_name, $profile_name) = @_;
+  my ($host, $port, $type) = ParseProfileURL($profile_name);
+  my $binary_shortname = BaseName($binary_name);
+  return sprintf("%s.%s.%s-port%s",
+                 $binary_shortname, $main::op_time, $host, $port);
+}
+
 sub FetchDynamicProfile {
  my $binary_name = shift;
  my $profile_name = shift;
  my $fetch_name_only = shift;
  my $encourage_patience = shift;

-  # TODO: Add support for fetching profiles dynamically from a server
+  my $user_dir = $ENV{HOME};
+  my $profile_dir = $user_dir . "/pprof";
+  if (!(-d $profile_dir)) {
+    mkdir($profile_dir) || die("Unable to create profile directory $profile_dir\n");
+  }
+  if (!IsProfileURL($profile_name)) {
    return $profile_name;
+  } else {
+    my ($host, $port, $type) = ParseProfileURL($profile_name);
+    if ($type eq "" || $type eq "/") {
+      # Missing type specifier defaults to cpu-profile
+      $type = $PROFILE_PAGE;
+    }
+
+    my $profile_file = MakeProfileBaseName($binary_name, $profile_name);
+
+    my $url;
+    my $wget_timeout;
+    if ($type eq $PROFILE_PAGE) {
+      $url = sprintf("http://$host:$port$PROFILE_PAGE?seconds=%d",
+                     $main::opt_seconds);
+      $wget_timeout = sprintf("--timeout=%d",
+                              int($main::opt_seconds * 1.01 + 60));
+    } else {
+      # For non-CPU profiles, we add a type-extension to
+      # the target profile file name.
+      my $suffix = $type;
+      $suffix =~ s,/,.,g;
+      $profile_file .= "$suffix";
+      $url = "http://$host:$port$type";
+      $wget_timeout = "";
+    }
+    my $tmp_profile = "$profile_dir/.tmp.$profile_file";
+    my $real_profile = "$profile_dir/$profile_file";
+
+    if ($fetch_name_only > 0) {
+      return $real_profile;
+    }
+
+    my $cmd = "$WGET $wget_timeout -q -O $tmp_profile '$url'";
+    if ($type eq $PROFILE_PAGE) {
+      print STDERR "Gathering CPU profile from $host:$port for $main::opt_seconds seconds to\n  ${real_profile}\n";
+      if ($encourage_patience) {
+        print STDERR "Be patient...\n";
+      }
+    } else {
+      print STDERR "Fetching $type profile from $host:$port to\n  ${real_profile}\n";
+    }
+
+    (system($cmd) == 0) || error("Failed to get profile: $cmd: $!\n");
+    (system("mv $tmp_profile $real_profile") == 0) || error("Unable to rename profile\n");
+    print STDERR "Wrote profile to $real_profile\n";
+    $main::collected_profile = $real_profile;
+    return $main::collected_profile;
+  }
 }

 # Collect profiles in parallel
@ -1543,10 +1852,11 @@ sub ReadProfile {
  open(PROFILE, "<$fname") || error("$fname: $!\n");
  binmode PROFILE;      # New perls do UTF-8 processing
  my $header = <PROFILE>;
+  my $contention_marker = substr($CONTENTION_PAGE, 1);   # remove leading /
  if ($header =~ m/^heap profile:/) {
    $main::heap_profile = 1;
    return ReadHeapProfile($prog, $fname, $header);
-  } elsif ($header =~ m/^--- *contentionz/ ) {
+  } elsif ($header =~ m/^--- *$contention_marker/o ) {
    $main::lock_profile = 1;
    return ReadSynchProfile($prog, $fname);
  } elsif ($header =~ m/^--- *Stacks:/ ) {
@ -1581,17 +1891,11 @@ sub ReadCPUProfile {
  my $pcs = {};

  # Parse string into array of slots.
-  # L! is needed for 64-bit # platforms, but not supported on 5.005
-  # (despite the manpage claims)
+  # L! cannot be used because with a native 64-bit build, it will cause
+  # 1) a valid 64-bit profile to use the 32-bit codepath, and
+  # 2) a valid 32-bit profile to be unrecognized.

-  my $format;
-  if ($] >= 5.008) {
-      $format = "L!*";
-  } else {
-      $format = "L*";
-  }
-
-  my @slots = unpack($format, $str);
+  my @slots = unpack("L*", $str);

  # Read header.  The current header version is a 5-element structure
  # containing:
@ -1713,14 +2017,54 @@ sub ReadHeapProfile {
    $index = 2;
  }

-  # Find the type of this profile
+  # Find the type of this profile.  The header line looks like:
+  #    heap profile:   1246:  8800744 [  1246:  8800744] @ <heap-url>/266053
+  # There are two pairs <count: size>, the first inuse objects/space, and the
+  # second allocated objects/space.  This is followed optionally by a profile
+  # type, and if that is present, optionally by a sampling frequency.  The
+  # interpretation of the sampling frequency is that the profiler, for each
+  # sample, calculates a uniformly distributed random integer less than the
+  # given value, and records the next sample after that many bytes have been
+  # allocated.  Therefore, the expected sample interval is half of the given
+  # frequency.  By default, if not specified, the expected sample interval is
+  # 128KB.  Only remote-heap-page profiles are adjusted for sample size.
+  my $should_adjust_sample = 0;
+  my $sample_adjustment = 0;
  chomp($header);
  my $type = "unknown";
-  if ($header =~ m/^heap profile:\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\](\s*@\s*(.*))?/) {
+  if ($header =~ m"^heap profile:\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\](\s*@\s*([^/]*)(/(\d+))?)?") {
    if (defined($6) && ($6 ne '')) {
      $type = $6;
+      # The regex test here is to see if type is a substring of HEAP_PAGE
+      if (($HEAP_PAGE =~ /$type/)) {
+	$should_adjust_sample = 1;
+	if (defined($8) && ($8 ne '')) {
+	  $sample_adjustment = int($8)/2;
+	  printf STDERR ("Adjusting heap profiles for 1-in-%d sampling rate\n",
+			 $sample_adjustment);
 	}
      }
+    } else {
+      # We detect whether or not this is a remote-heap profile by checking
+      # that the total-allocated stats ($n2,$s2) are exactly the
+      # same as the in-use stats ($n1,$s1).  It is remotely conceivable
+      # that a non-remote-heap profile may pass this check, but it is hard
+      # to imagine how that could happen.
+      my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4);
+      if (($n1 == $n2) && ($s1 == $s2)) {
+        # This is likely to be a remote-heap based sample profile
+	$should_adjust_sample = 1;
+      }
+    }
+  }
+
+  # For remote-heap generated profiles, adjust the counts and sizes to
+  # account for the sample rate (we sample once every 128KB by default).
+  if ($should_adjust_sample && ($sample_adjustment == 0)) {
+    # Turn on profile adjustment.
+    $sample_adjustment = 128*1024;
+    print STDERR "Adjusting heap profiles for 1-in-128KB sampling rate\n";
+  }

  my $profile = {};
  my $pcs = {};
@ -1760,6 +2104,20 @@ sub ReadHeapProfile {
      my $stack = $5;
      my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4);

+      if ($sample_adjustment) {
+        my $ratio;
+        $ratio = (($s1*1.0)/$n1)/($sample_adjustment);
+        if ($ratio < 1) {
+          $n1 /= $ratio;
+          $s1 /= $ratio;
+        }
+        $ratio = (($s2*1.0)/$n2)/($sample_adjustment);
+        if ($ratio < 1) {
+          $n2 /= $ratio;
+          $s2 /= $ratio;
+        }
+      }
+
      my @counts = ($n1, $s1, $n2, $s2);
      AddEntries($profile, $pcs, $stack, $counts[$index]);
    }
@ -1785,17 +2143,35 @@ sub ReadSynchProfile {
  my $seen_clockrate = 0;
  my $line;

+  my $index = 0;
+  if ($main::opt_total_delay) {
+    $index = 0;
+  } elsif ($main::opt_contentions) {
+    $index = 1;
+  } elsif ($main::opt_mean_delay) {
+    $index = 2;
+  }
+
  while ( $line = <PROFILE> ) {
-    if ( $line =~ /^(slow release).*thread \d+  \@\s*(.*?)\s*$/ ||
+    if ( $line =~ /^\s*(\d+)\s+(\d+) \@\s*(.*?)\s*$/ ) {
+      my ($cycles, $count, $stack) = ($1, $2, $3);
+
+      # Convert cycles to nanoseconds
+      $cycles /= $cyclespernanosec;
+
+      my @values = ($cycles, $count, $cycles / $count);
+      AddEntries($profile, $pcs, $stack, $values[$index]);
+
+    } elsif ( $line =~ /^(slow release).*thread \d+  \@\s*(.*?)\s*$/ ||
              $line =~ /^\s*(\d+) \@\s*(.*?)\s*$/ ) {
-      my ($count, $stack) = ($1, $2);
-      if ($count !~ /^\d+$/) {
+      my ($cycles, $stack) = ($1, $2);
+      if ($cycles !~ /^\d+$/) {
        next;
      }

      # Convert cycles to nanoseconds
-      $count /= $cyclespernanosec;
-      AddEntries($profile, $pcs, $stack, $count);
+      $cycles /= $cyclespernanosec;
+      AddEntries($profile, $pcs, $stack, $cycles);

    } elsif ( $line =~ m|cycles/second = (\d+)|) {
      $cyclespernanosec = $1 / 1e9;
@ -1838,6 +2214,7 @@ sub HexExtend {

 # Split /proc/pid/maps dump into a list of libraries
 sub ParseLibraries {
+  return if $main::use_symbol_page;  # We don't need libraries info.
  my $prog = shift;
  my $map = shift;
  my $pcs = shift;
--- a/src/profiler.cc
+++ b/src/profiler.cc
@ -460,12 +460,12 @@ void ProfileData::SetHandler(void (*handler)(int)) {
 }

 void ProfileData::FlushTable() {
+  LOCK(&state_lock_); {
    if (out_ < 0) {
      // Profiling is not enabled
+      UNLOCK(&state_lock_);
      return;
    }
-
-  LOCK(&state_lock_); {
    SetHandler(SIG_IGN);       // Disable timer interrupts while we're flushing
    LOCK(&table_lock_); {
      // Move data from hash table to eviction buffer
--- a/src/stacktrace.cc
+++ b/src/stacktrace.cc
@ -45,17 +45,14 @@
 #include "stacktrace_x86-inl.h"
 #endif

-#if !defined(IMPLEMENTED_STACK_TRACE) && defined(USE_LIBUNWIND) && HAVE_LIBUNWIND_H
+#if !defined(IMPLEMENTED_STACK_TRACE) && defined(__x86_64__) && HAVE_LIBUNWIND_H
 #define IMPLEMENTED_STACK_TRACE
-// This is turned off by default. Possible reasons for turning on in the
-// future:
-// 1. Compiler independence
-// 2. Architecture independence
-// 3. A more liberal MIT license, which allows use with multiple compilers
+#define UNW_LOCAL_ONLY
 #include "stacktrace_libunwind-inl.h"
 #endif

 #if !defined(IMPLEMENTED_STACK_TRACE) && defined(__x86_64__) && HAVE_UNWIND_H
+// This implementation suffers from deadlocks. Don't enable it.
 #define IMPLEMENTED_STACK_TRACE
 #include "stacktrace_x86_64-inl.h"
 #endif
--- a/src/stacktrace_libunwind-inl.h
+++ b/src/stacktrace_libunwind-inl.h
@ -51,14 +51,14 @@ int GetStackTrace(void** result, int max_depth, int skip_count) {

  do {
    ret = unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *) &ip);
-    assert(ret == 0);
+    if (ret < 0)
+      break;
    if (skip_count > 0) {
      skip_count--;
    } else {
      result[n++] = ip;
    }
    ret = unw_step(&cursor);
-    assert(ret >= 0);
  } while ((n < max_depth) && (ret > 0));

  return n;
--- a/src/tcmalloc.cc
+++ b/src/tcmalloc.cc
@ -79,6 +79,7 @@
 #include <unistd.h>
 #include <errno.h>
 #include <stdarg.h>
+#include "base/commandlineflags.h"
 #include "google/malloc_hook.h"
 #include "google/malloc_extension.h"
 #include "google/stacktrace.h"
@ -147,12 +148,27 @@ static const size_t kDefaultOverallThreadCacheSize = 16 << 20;
 // REQUIRED: kMaxPages >= kMinSystemAlloc;
 static const size_t kMaxPages = kMinSystemAlloc;

+/* The smallest prime > 2^n */
+static unsigned int primes_list[] = {
+	// Small values might cause high rates of sampling
+	// and hence commented out.
+	// 2, 5, 11, 17, 37, 67, 131, 257,
+	// 521, 1031, 2053, 4099, 8209, 16411,
+	32771, 65537, 131101, 262147, 524309, 1048583,
+	2097169, 4194319, 8388617, 16777259, 33554467 };
+
 // Twice the approximate gap between sampling actions.
 // I.e., we take one sample approximately once every
-//      kSampleParameter/2
+//      tcmalloc_sample_parameter/2
 // bytes of allocation, i.e., ~ once every 128KB.
 // Must be a prime number.
-static const size_t kSampleParameter = 266053;
+DEFINE_int64(tcmalloc_sample_parameter, 262147,
+	     "Twice the approximate gap between sampling actions."
+	     " Must be a prime number. Otherwise will be rounded up to a "
+	     " larger prime number");
+static size_t sample_period = 262147;
+// Protects sample_period above
+static SpinLock sample_period_lock = SPINLOCK_INITIALIZER;

 //-------------------------------------------------------------------
 // Mapping from size to size_class and vice versa
@ -303,6 +319,17 @@ static int NumMoveSize(size_t size) {
  // and thread caches.
  if (num > static_cast<int>(0.8 * kMaxFreeListLength))
    num = static_cast<int>(0.8 * kMaxFreeListLength);
+
+  // Also, avoid bringing in too many objects into small object free
+  // lists.  There are lots of such lists, and if we allow each one to
+  // fetch too many at a time, we end up having to scavenge too often
+  // (especially when there are lots of threads and each thread gets a
+  // small allowance for its thread cache).
+  //
+  // TODO: Make thread cache free list sizes dynamic so that we do not
+  // have to equally divide a fixed resource amongst lots of threads.
+  if (num > 32) num = 32;
+
  return num;
 }

@ -918,7 +945,7 @@ void TCMalloc_PageHeap::Dump(TCMalloc_Printer* out) {
  uint64_t large_pages = 0;
  int large_spans = 0;
  for (Span* s = large_.next; s != &large_; s = s->next) {
-    out->printf("   [ %6" PRIuS " spans ]\n", s->length);
+    out->printf("   [ %6" PRIuS " pages ]\n", s->length);
    large_pages += s->length;
    large_spans++;
  }
@ -1057,6 +1084,7 @@ class TCMalloc_ThreadCache_FreeList {
    SLL_PopRange(&list_, N, start, end);
    ASSERT(length_ >= N);
    length_ -= N;
+    if (length_ < lowater_) lowater_ = length_;
  }
 };

@ -1669,9 +1697,23 @@ void TCMalloc_ThreadCache::PickNextSample() {
  uint32_t r = rnd_;
  rnd_ = (r << 1) ^ ((static_cast<int32_t>(r) >> 31) & kPoly);

-  // Next point is "rnd_ % (2*sample_period)".  I.e., average
-  // increment is "sample_period".
-  bytes_until_sample_ = rnd_ % kSampleParameter;
+  // Next point is "rnd_ % (sample_period)".  I.e., average
+  // increment is "sample_period/2".
+  const int flag_value = FLAGS_tcmalloc_sample_parameter;
+  static int last_flag_value = -1;
+
+  if (flag_value != last_flag_value) {
+    SpinLockHolder h(&sample_period_lock);
+    int i;
+    for (i = 0; i < (sizeof(primes_list)/sizeof(primes_list[0]) - 1); i++) {
+      if (primes_list[i] >= flag_value) {
+        break;
+      }
+    }
+    sample_period = primes_list[i];
+    last_flag_value = flag_value;
+  }
+  bytes_until_sample_ = rnd_ % sample_period;
 }

 void TCMalloc_ThreadCache::InitModule() {
@ -2118,7 +2160,7 @@ static inline void* do_malloc(size_t size) {
  }
  // The following call forces module initialization
  TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache();
-  if (heap->SampleAllocation(size)) {
+  if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) {
    Span* span = DoSampledAllocation(size);
    if (span != NULL) {
      ret = reinterpret_cast<void*>(span->start << kPageShift);
--- a/src/tests/heap-checker_unittest.cc
+++ b/src/tests/heap-checker_unittest.cc
@ -273,6 +273,7 @@ static void DoRunHidden(Closure* c, int n) {
  if (n) {
    run_hidden_ptr(c, n-1);
    wipe_stack_ptr(n);
+    sleep(0);  // undo -foptimize-sibling-calls
  } else {
    c->Run();
  }
@ -284,6 +285,7 @@ static void DoWipeStack(int n) {
    volatile int arr[sz];
    for (int i = 0; i < sz; ++i)  arr[i] = 0;
    wipe_stack_ptr(n-1);
+    sleep(0);  // undo -foptimize-sibling-calls
  }
 }

@ -463,14 +465,14 @@ static void TestHeapLeakCheckerPProf() {
 // trick heap change: same total # of bytes and objects, but
 // different individual object sizes
 static void TestHeapLeakCheckerTrick() {
-  void* bar1 = AllocHidden(60 * sizeof(int));
+  void* bar1 = AllocHidden(240 * sizeof(int));
  Use(&bar1);
-  void* bar2 = AllocHidden(40 * sizeof(int));
+  void* bar2 = AllocHidden(160 * sizeof(int));
  Use(&bar2);
  HeapLeakChecker check("trick");
-  void* foo1 = AllocHidden(70 * sizeof(int));
+  void* foo1 = AllocHidden(280 * sizeof(int));
  Use(&foo1);
-  void* foo2 = AllocHidden(30 * sizeof(int));
+  void* foo2 = AllocHidden(120 * sizeof(int));
  Use(&foo2);
  DeAllocHidden(&bar1);
  DeAllocHidden(&bar2);
@ -482,16 +484,16 @@ static void TestHeapLeakCheckerTrick() {

 // no false negatives from pprof
 static void TestHeapLeakCheckerDeathTrick() {
-  void* bar1 = AllocHidden(60 * sizeof(int));
+  void* bar1 = AllocHidden(240 * sizeof(int));
  Use(&bar1);
-  void* bar2 = AllocHidden(40 * sizeof(int));
+  void* bar2 = AllocHidden(160 * sizeof(int));
  Use(&bar2);
  HeapLeakChecker check("death_trick");
  DeAllocHidden(&bar1);
  DeAllocHidden(&bar2);
-  void* foo1 = AllocHidden(70 * sizeof(int));
+  void* foo1 = AllocHidden(280 * sizeof(int));
  Use(&foo1);
-  void* foo2 = AllocHidden(30 * sizeof(int));
+  void* foo2 = AllocHidden(120 * sizeof(int));
  Use(&foo2);
  // TODO(maxim): use the above if we make pprof work in automated test runs
  if (!FLAGS_maybe_stripped) {
@ -733,13 +735,19 @@ static void* HeapBusyThreadBody(void* a) {
      }
    }
    if (FLAGS_test_register_leak) {
-      // Hide the register pointer value with an xor mask.
+      // Hide the register "ptr" value with an xor mask.
      // If one provides --test_register_leak flag, the test should
      // (with very high probability) crash on some leak check
      // with a leak report (of some x * sizeof(int) + y * sizeof(int*) bytes)
      // pointing at the two lines above in this function
      // with "new (initialized) int" in them as the allocators
      // of the leaked objects.
+      // CAVEAT: We can't really prevent a compiler to save some
+      // temporary values of "ptr" on the stack and thus let us find
+      // the heap objects not via the register.
+      // Hence it's normal if for certain compilers or optimization modes
+      // --test_register_leak does not cause a leak crash of the above form
+      // (this happens e.g. for gcc 4.0.1 in opt mode).
      ptr = reinterpret_cast<int **>(
          reinterpret_cast<uintptr_t>(ptr) ^ kHideMask);
      // busy loop to get the thread interrupted at:
--- a/src/tests/tcmalloc_unittest.cc
+++ b/src/tests/tcmalloc_unittest.cc
@ -399,11 +399,14 @@ static void TestHugeAllocations() {
  for (size_t i = 0; i < 10000; i++) {
    TryHugeAllocation(kMaxSize - i);
  }
-
-  // Check that asking for stuff near signed/unsigned boundary returns NULL
+  // Asking for memory sizes near signed/unsigned boundary (kMaxSignedSize)
+  // might work or not, depending on the amount of virtual memory.
  for (size_t i = 0; i < 100; i++) {
-    TryHugeAllocation(kMaxSignedSize - i);
-    TryHugeAllocation(kMaxSignedSize + i);
+    void* p = NULL;
+    p = malloc(kMaxSignedSize + i);
+    if (p) free(p);    // if: free(NULL) is not necessarily defined
+    p = malloc(kMaxSignedSize - i);
+    if (p) free(p);
  }
 }

@ -560,18 +563,6 @@ int main(int argc, char** argv) {
    free(p);
  }

-  // Check that large allocations fail with NULL instead of crashing
-  fprintf(LOGSTREAM, "==== Testing out of memory\n");
-  for (int s = 0; ; s += (10<<20)) {
-    void* large_object = malloc(s);
-    if (large_object == NULL) break;
-    free(large_object);
-  }
-
-  // Check that huge allocations fail with NULL instead of crashing
-  fprintf(LOGSTREAM, "==== Testing huge allocations\n");
-  TestHugeAllocations();
-
  // Check calloc() with various arguments
  fprintf(LOGSTREAM, "==== Testing calloc\n");
  TestCalloc(0, 0, true);
@ -611,10 +602,16 @@ int main(int argc, char** argv) {
    threads[i] = new TesterThread(i);
  }

-  // Start
+  // Start the threads.
+  // Set the stack size to a small value to avoid inheriting 120MB+
+  // limit when running under the google make system.
+  pthread_attr_t attr;
+  pthread_attr_init(&attr);
+  pthread_attr_setstacksize(&attr, 1 << 20);
  for (int i = 0; i < FLAGS_numthreads; ++i) {
-    CHECK_EQ(pthread_create(&thread_ids[i], NULL, RunThread, threads[i]), 0);
+    CHECK_EQ(pthread_create(&thread_ids[i], &attr, RunThread, threads[i]), 0);
  }
+  pthread_attr_destroy(&attr);

  // Wait
  for (int i = 0; i < FLAGS_numthreads; ++i) {
@ -624,6 +621,21 @@ int main(int argc, char** argv) {

  for (int i = 0; i < FLAGS_numthreads; ++i) delete threads[i];    // Cleanup

+  // Do the memory intensive tests after threads are done, since exhausting
+  // the available address space can make pthread_create to fail.
+
+  // Check that huge allocations fail with NULL instead of crashing
+  fprintf(LOGSTREAM, "==== Testing huge allocations\n");
+  TestHugeAllocations();
+
+  // Check that large allocations fail with NULL instead of crashing
+  fprintf(LOGSTREAM, "==== Testing out of memory\n");
+  for (int s = 0; ; s += (10<<20)) {
+    void* large_object = malloc(s);
+    if (large_object == NULL) break;
+    free(large_object);
+  }
+
  fprintf(LOGSTREAM, "PASS\n");
  return 0;
 }