diff --git a/Makefile.am b/Makefile.am index 25618b7..d163435 100755 --- a/Makefile.am +++ b/Makefile.am @@ -203,6 +203,10 @@ libsysinfo_la_SOURCES = src/base/sysinfo.cc \ $(SYSINFO_INCLUDES) libsysinfo_la_LIBADD = $(NANOSLEEP_LIBS) +noinst_LTLIBRARIES += libmaybe_threads.la +# .cc is conditionally added below +libmaybe_threads_la_SOURCES = src/maybe_threads.h + # For MinGW, we use also have to use libwindows Luckily, we need the # windows.a library in exactly the same place we need spinlock.a # (pretty much everywhere), so we can use the same variable name for @@ -271,9 +275,7 @@ libsysinfo_la_LIBADD += -lshlwapi TCMALLOC_CC = # windows has its own system for threads and system memory allocation. if HAVE_PTHREAD_DESPITE_ASKING_FOR -MAYBE_THREADS_CC = src/maybe_threads.cc -else -MAYBE_THREADS_CC = +libmaybe_threads_la_SOURCES += src/maybe_threads.cc endif SYSTEM_ALLOC_CC = else !MINGW @@ -296,7 +298,7 @@ libspinlock_la_LIBADD = $(NANOSLEEP_LIBS) LIBSPINLOCK = libspinlock.la libsysinfo.la liblogging.la TCMALLOC_CC = src/tcmalloc.cc -MAYBE_THREADS_CC = src/maybe_threads.cc +libmaybe_threads_la_SOURCES += src/maybe_threads.cc SYSTEM_ALLOC_CC = src/system-alloc.cc endif !MINGW @@ -322,13 +324,12 @@ LOW_LEVEL_ALLOC_UNITTEST_INCLUDES = src/base/low_level_alloc.h \ $(LOGGING_INCLUDES) low_level_alloc_unittest_SOURCES = src/base/low_level_alloc.cc \ src/malloc_hook.cc \ - $(MAYBE_THREADS_CC) \ src/tests/low_level_alloc_unittest.cc \ $(LOW_LEVEL_ALLOC_UNITTEST_INCLUDES) # By default, MallocHook takes stack traces for use by the heap-checker. # We don't need that functionality here, so we turn it off to reduce deps. low_level_alloc_unittest_CXXFLAGS = -DNO_TCMALLOC_SAMPLES -low_level_alloc_unittest_LDADD = $(LIBSPINLOCK) +low_level_alloc_unittest_LDADD = $(LIBSPINLOCK) libmaybe_threads.la TESTS += atomicops_unittest ATOMICOPS_UNITTEST_INCLUDES = src/base/atomicops.h \ @@ -447,8 +448,7 @@ S_TCMALLOC_MINIMAL_INCLUDES = src/common.h \ src/base/thread_annotations.h \ src/malloc_hook-inl.h \ src/malloc_hook_mmap_linux.h \ - src/malloc_hook_mmap_freebsd.h \ - src/maybe_threads.h + src/malloc_hook_mmap_freebsd.h SG_TCMALLOC_MINIMAL_INCLUDES = src/gperftools/malloc_hook.h \ src/gperftools/malloc_hook_c.h \ src/gperftools/malloc_extension.h \ @@ -477,7 +477,6 @@ libtcmalloc_minimal_internal_la_SOURCES = src/common.cc \ src/thread_cache.cc \ src/malloc_hook.cc \ src/malloc_extension.cc \ - $(MAYBE_THREADS_CC) \ $(TCMALLOC_MINIMAL_INCLUDES) # We #define NO_TCMALLOC_SAMPLES, since sampling is turned off for _minimal. libtcmalloc_minimal_internal_la_CXXFLAGS = -DNO_TCMALLOC_SAMPLES \ @@ -485,7 +484,7 @@ libtcmalloc_minimal_internal_la_CXXFLAGS = -DNO_TCMALLOC_SAMPLES \ $(PTHREAD_CFLAGS) -DNDEBUG \ $(AM_CXXFLAGS) $(NO_EXCEPTIONS) libtcmalloc_minimal_internal_la_LDFLAGS = $(PTHREAD_CFLAGS) $(AM_LDFLAGS) -libtcmalloc_minimal_internal_la_LIBADD = $(PTHREAD_LIBS) $(LIBSPINLOCK) +libtcmalloc_minimal_internal_la_LIBADD = $(PTHREAD_LIBS) $(LIBSPINLOCK) libmaybe_threads.la lib_LTLIBRARIES += libtcmalloc_minimal.la WINDOWS_PROJECTS += vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj @@ -898,7 +897,7 @@ lib_LTLIBRARIES += libtcmalloc.la libtcmalloc_la_SOURCES = $(TCMALLOC_CC) $(TCMALLOC_INCLUDES) libtcmalloc_la_CXXFLAGS = $(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS) libtcmalloc_la_LDFLAGS = $(PTHREAD_CFLAGS) -version-info @TCMALLOC_SO_VERSION@ -libtcmalloc_la_LIBADD = libtcmalloc_internal.la $(PTHREAD_LIBS) +libtcmalloc_la_LIBADD = libtcmalloc_internal.la libmaybe_threads.la $(PTHREAD_LIBS) if WITH_HEAP_CHECKER # heap-checker-bcad is last, in hopes its global ctor will run first. @@ -1238,7 +1237,7 @@ libprofiler_la_SOURCES = src/profiler.cc \ src/profile-handler.cc \ src/profiledata.cc \ $(CPU_PROFILER_INCLUDES) -libprofiler_la_LIBADD = libstacktrace.la +libprofiler_la_LIBADD = libstacktrace.la libmaybe_threads.la # We have to include ProfileData for profiledata_unittest CPU_PROFILER_SYMBOLS = '(ProfilerStart|ProfilerStartWithOptions|ProfilerStop|ProfilerFlush|ProfilerEnable|ProfilerDisable|ProfilingIsEnabledForAllThreads|ProfilerRegisterThread|ProfilerGetCurrentState|ProfilerState|ProfileData|ProfileHandler)' libprofiler_la_LDFLAGS = -export-symbols-regex $(CPU_PROFILER_SYMBOLS) \ diff --git a/configure.ac b/configure.ac index 65c2a11..8892d55 100644 --- a/configure.ac +++ b/configure.ac @@ -465,6 +465,18 @@ AM_CONDITIONAL(WITH_STACK_TRACE, test "$enable_cpu_profiler" = yes -o \ "$enable_heap_profiler" = yes -o \ "$enable_heap_checker" = yes) +have_linux_sigev_thread_id=no +AC_MSG_CHECKING([for Linux SIGEV_THREAD_ID]) +AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM([[#include + #include ]], + [[return SIGEV_THREAD_ID || CLOCK_THREAD_CPUTIME_ID || __linux;]])], + [AC_DEFINE(HAVE_LINUX_SIGEV_THREAD_ID, 1, + [Define if this is Linux that has SIGEV_THREAD_ID]) + have_linux_sigev_thread_id=yes + AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([no])]) + # Write generated configuration file AC_CONFIG_FILES([Makefile src/gperftools/tcmalloc.h src/windows/gperftools/tcmalloc.h]) diff --git a/src/maybe_threads.cc b/src/maybe_threads.cc index 834eabb..6dd0d8d 100644 --- a/src/maybe_threads.cc +++ b/src/maybe_threads.cc @@ -60,6 +60,8 @@ extern "C" { int pthread_key_create (pthread_key_t*, void (*)(void*)) __THROW ATTRIBUTE_WEAK; + int pthread_key_delete (pthread_key_t) + __THROW ATTRIBUTE_WEAK; void *pthread_getspecific(pthread_key_t) __THROW ATTRIBUTE_WEAK; int pthread_setspecific(pthread_key_t, const void*) @@ -96,6 +98,14 @@ int perftools_pthread_key_create(pthread_key_t *key, } } +int perftools_pthread_key_delete(pthread_key_t key) { + if (pthread_key_delete) { + return pthread_key_delete(key); + } else { + return 0; + } +} + void *perftools_pthread_getspecific(pthread_key_t key) { if (pthread_getspecific) { return pthread_getspecific(key); diff --git a/src/maybe_threads.h b/src/maybe_threads.h index d25f4b3..b60f4ef 100644 --- a/src/maybe_threads.h +++ b/src/maybe_threads.h @@ -45,6 +45,7 @@ int perftools_pthread_key_create(pthread_key_t *key, void (*destr_function) (void *)); +int perftools_pthread_key_delete(pthread_key_t key); void *perftools_pthread_getspecific(pthread_key_t key); int perftools_pthread_setspecific(pthread_key_t key, void *val); int perftools_pthread_once(pthread_once_t *ctl, diff --git a/src/profile-handler.cc b/src/profile-handler.cc index 1477e6b..1518ed4 100644 --- a/src/profile-handler.cc +++ b/src/profile-handler.cc @@ -46,6 +46,15 @@ #include #include +#if HAVE_LINUX_SIGEV_THREAD_ID +// for timer_{create,settime} and associated typedefs & constants +#include +// for sys_gettid +#include "base/linux_syscall_support.h" +// for perftools_pthread_key_create +#include "maybe_threads.h" +#endif + #include "base/dynamic_annotations.h" #include "base/googleinit.h" #include "base/logging.h" @@ -144,6 +153,14 @@ class ProfileHandler { // Is profiling allowed at all? bool allowed_; + bool per_thread_timer_enabled_; + +#ifdef HAVE_LINUX_SIGEV_THREAD_ID + // this is used to destroy per-thread profiling timers on thread + // termination + pthread_key_t thread_timer_key; +#endif + // Whether or not the threading system provides interval timers that are // shared by all threads in a process. enum { @@ -227,6 +244,83 @@ const int32 ProfileHandler::kDefaultFrequency; extern "C" int pthread_once(pthread_once_t *, void (*)(void)) ATTRIBUTE_WEAK; +#if HAVE_LINUX_SIGEV_THREAD_ID + +// We use weak alias to timer_create to avoid runtime dependency on +// -lrt and in turn -lpthread. +// +// At runtime we detect if timer_create is available and if so we +// can enable linux-sigev-thread mode of profiling +extern "C" { + int timer_create(clockid_t clockid, struct sigevent *evp, + timer_t *timerid) + ATTRIBUTE_WEAK; + int timer_delete(timer_t timerid) + ATTRIBUTE_WEAK; + int timer_settime(timer_t timerid, int flags, + const struct itimerspec *value, + struct itimerspec *ovalue) + ATTRIBUTE_WEAK; +} + +struct timer_id_holder { + timer_t timerid; + timer_id_holder(timer_t _timerid) : timerid(_timerid) {} +}; + +extern "C" { + static void ThreadTimerDestructor(void *arg) { + if (!arg) { + return; + } + timer_id_holder *holder = static_cast(arg); + timer_delete(holder->timerid); + delete holder; + } +} + +static void CreateThreadTimerKey(pthread_key_t *pkey) { + int rv = perftools_pthread_key_create(pkey, ThreadTimerDestructor); + if (rv) { + RAW_LOG(FATAL, "aborting due to pthread_key_create error: %s", strerror(rv)); + } +} + +static void StartLinuxThreadTimer(int timer_type, int32 frequency, pthread_key_t timer_key) { + int rv; + struct sigevent sevp; + timer_t timerid; + struct itimerspec its; + memset(&sevp, 0, sizeof(sevp)); + sevp.sigev_notify = SIGEV_THREAD_ID; + sevp._sigev_un._tid = sys_gettid(); + const int signal_number = (timer_type == ITIMER_PROF ? SIGPROF : SIGALRM); + sevp.sigev_signo = signal_number; + clockid_t clock = CLOCK_THREAD_CPUTIME_ID; + if (timer_type == ITIMER_REAL) { + clock = CLOCK_MONOTONIC; + } + rv = timer_create(clock, &sevp, &timerid); + if (rv) { + RAW_LOG(FATAL, "aborting due to timer_create error: %s", strerror(errno)); + } + + timer_id_holder *holder = new timer_id_holder(timerid); + rv = perftools_pthread_setspecific(timer_key, holder); + if (rv) { + RAW_LOG(FATAL, "aborting due to pthread_setspecific error: %s", strerror(rv)); + } + + its.it_interval.tv_sec = 0; + its.it_interval.tv_nsec = 1000000000 / frequency; + its.it_value = its.it_interval; + rv = timer_settime(timerid, 0, &its, 0); + if (rv) { + RAW_LOG(FATAL, "aborting due to timer_settime error: %s", strerror(errno)); + } +} +#endif + void ProfileHandler::Init() { instance_ = new ProfileHandler(); } @@ -249,6 +343,7 @@ ProfileHandler::ProfileHandler() : interrupts_(0), callback_count_(0), allowed_(true), + per_thread_timer_enabled_(false), timer_sharing_(TIMERS_UNTOUCHED) { SpinLockHolder cl(&control_lock_); @@ -281,10 +376,29 @@ ProfileHandler::ProfileHandler() // Ignore signals until we decide to turn profiling on. (Paranoia; // should already be ignored.) DisableHandler(); + +#if HAVE_LINUX_SIGEV_THREAD_ID + if (getenv("CPUPROFILE_PER_THREAD_TIMERS")) { + if (timer_create && pthread_once) { + timer_sharing_ = TIMERS_SEPARATE; + CreateThreadTimerKey(&thread_timer_key); + per_thread_timer_enabled_ = true; + } else { + RAW_LOG(INFO, + "Not enabling linux-per-thread-timers mode due to lack of timer_create." + " Preload or link to librt.so for this to work"); + } + } +#endif } ProfileHandler::~ProfileHandler() { Reset(); +#ifdef HAVE_LINUX_SIGEV_THREAD_ID + if (per_thread_timer_enabled_) { + perftools_pthread_key_delete(thread_timer_key); + } +#endif } void ProfileHandler::RegisterThread() { @@ -421,6 +535,14 @@ void ProfileHandler::StartTimer() { if (!allowed_) { return; } + +#if HAVE_LINUX_SIGEV_THREAD_ID + if (per_thread_timer_enabled_) { + StartLinuxThreadTimer(timer_type_, frequency_, thread_timer_key); + return; + } +#endif + struct itimerval timer; timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 1000000 / frequency_; @@ -432,6 +554,10 @@ void ProfileHandler::StopTimer() { if (!allowed_) { return; } + if (per_thread_timer_enabled_) { + RAW_LOG(FATAL, "StopTimer cannot be called in linux-per-thread-timers mode"); + } + struct itimerval timer; memset(&timer, 0, sizeof timer); setitimer(timer_type_, &timer, 0); @@ -441,6 +567,9 @@ bool ProfileHandler::IsTimerRunning() { if (!allowed_) { return false; } + if (per_thread_timer_enabled_) { + return false; + } struct itimerval current_timer; RAW_CHECK(0 == getitimer(timer_type_, ¤t_timer), "getitimer"); return (current_timer.it_value.tv_sec != 0 || diff --git a/src/tests/profile-handler_unittest.cc b/src/tests/profile-handler_unittest.cc index 9f6b0af..e49d23e 100644 --- a/src/tests/profile-handler_unittest.cc +++ b/src/tests/profile-handler_unittest.cc @@ -81,6 +81,7 @@ int kSleepInterval = 200000000; int kTimerResetInterval = 5000000; // Whether each thread has separate timers. +static bool linux_per_thread_timers_mode_ = false; static bool timer_separate_ = false; static int timer_type_ = ITIMER_PROF; static int signal_number_ = SIGPROF; @@ -180,7 +181,7 @@ class BusyThread : public Thread { while (!stop_work()) { } // If timers are separate, check that timer is enabled for this thread. - EXPECT_TRUE(!timer_separate_ || IsTimerEnabled()); + EXPECT_TRUE(linux_per_thread_timers_mode_ || !timer_separate_ || IsTimerEnabled()); } }; @@ -188,7 +189,7 @@ class NullThread : public Thread { private: void Run() { // If timers are separate, check that timer is enabled for this thread. - EXPECT_TRUE(!timer_separate_ || IsTimerEnabled()); + EXPECT_TRUE(linux_per_thread_timers_mode_ || !timer_separate_ || IsTimerEnabled()); } }; @@ -209,6 +210,9 @@ class ProfileHandlerTest { signal_number_ = (getenv("CPUPROFILE_REALTIME") ? SIGALRM : SIGPROF); timer_separate_ = threads_have_separate_timers(); +#if HAVE_LINUX_SIGEV_THREAD_ID + linux_per_thread_timers_mode_ = (getenv("CPUPROFILE_PER_THREAD_TIMERS") != NULL); +#endif Delay(kTimerResetInterval); } @@ -300,7 +304,7 @@ class ProfileHandlerTest { // Check the callback count. EXPECT_GT(GetCallbackCount(), 0); // Check that the profile timer is enabled. - EXPECT_EQ(FLAGS_test_profiler_enabled, IsTimerEnabled()); + EXPECT_EQ(FLAGS_test_profiler_enabled, linux_per_thread_timers_mode_ || IsTimerEnabled()); // Check that the signal handler is enabled. if (FLAGS_test_profiler_signal_handler) { EXPECT_EQ(FLAGS_test_profiler_enabled, IsSignalEnabled()); @@ -332,10 +336,12 @@ class ProfileHandlerTest { if (FLAGS_test_profiler_signal_handler) { EXPECT_FALSE(IsSignalEnabled()); } - if (timer_separate_) { - EXPECT_TRUE(IsTimerEnabled()); - } else { - EXPECT_FALSE(IsTimerEnabled()); + if (!linux_per_thread_timers_mode_) { + if (timer_separate_) { + EXPECT_TRUE(IsTimerEnabled()); + } else { + EXPECT_FALSE(IsTimerEnabled()); + } } } } @@ -350,10 +356,12 @@ class ProfileHandlerTest { // Check that the callback count is 0. EXPECT_EQ(0, GetCallbackCount()); // Check that the timer is disabled if shared, enabled otherwise. - if (timer_separate_) { - EXPECT_TRUE(IsTimerEnabled()); - } else { - EXPECT_FALSE(IsTimerEnabled()); + if (!linux_per_thread_timers_mode_) { + if (timer_separate_) { + EXPECT_TRUE(IsTimerEnabled()); + } else { + EXPECT_FALSE(IsTimerEnabled()); + } } // Verify that the ProfileHandler is not accumulating profile ticks. uint64 interrupts_before = GetInterruptCount(); @@ -498,7 +506,7 @@ TEST_F(ProfileHandlerTest, RegisterCallbackBeforeThread) { // correctly enabled. RegisterThread(); EXPECT_EQ(1, GetCallbackCount()); - EXPECT_EQ(FLAGS_test_profiler_enabled, IsTimerEnabled()); + EXPECT_EQ(FLAGS_test_profiler_enabled, linux_per_thread_timers_mode_ || IsTimerEnabled()); if (FLAGS_test_profiler_signal_handler) { EXPECT_EQ(FLAGS_test_profiler_enabled, IsSignalEnabled()); }