implemented cpu-profiling mode that profiles threads separately

Default mode of operation of cpu profiler uses itimer and
SIGPROF. This timer is by definition per-process and no spec defines
which thread is going to receive SIGPROF. And it provides correct
profiles only if we assume that probability of picking threads will be
proportional to cpu time spent by threads.

It is easy to see, that recent Linux (at least on common SMP hardware)
doesn't satisfy that assumption. Quite big skews of SIGPROF ticks
between threads is visible. I.e. I could see as big as 70%/20%
division instead of 50%/50% for pair of cpu-hog threads. (And I do see
it become 50/50 with new mode)

Fortunately POSIX provides mechanism to track per-thread cpu time via
posix timers facility. And even more fortunately, Linux also provides
mechanism to deliver timer ticks to specific threads.

Interestingly, it looks like FreeBSD also has very similar facility
and seems to suffer from same skew.  But due to difference in a way
how threads are identified, I haven't bothered to try to support this
mode on FreeBSD.

This commit implements new profiling mode where every thread creates
posix timer which tracks thread's cpu time. Threads also also set up
signal delivery to itself on overflows of that timer.

This new mode requires every thread to be registered in cpu
profiler. Existing ProfilerRegisterThread function is used for that.

Because registering threads requires application support (or suitable
LD_PRELOAD-able wrapper for thread creation API), new mode is off by
default. And it has to be manually activated by setting environment
variable CPUPROFILE_PER_THREAD_TIMERS.

New mode also requires librt symbols to be available. Which we do not
link to due to librt's dependency on libpthread.  Which we avoid due
to perf impact of bringing in libpthread to otherwise single-threaded
programs. So it has to be either already loaded by profiling program
or LD_PRELOAD-ed.
This commit is contained in:
Aliaksey Kandratsenka 2014-09-07 13:09:14 -07:00
parent 714bd93e42
commit 1108d83cf4
6 changed files with 183 additions and 24 deletions

View File

@ -203,6 +203,10 @@ libsysinfo_la_SOURCES = src/base/sysinfo.cc \
$(SYSINFO_INCLUDES)
libsysinfo_la_LIBADD = $(NANOSLEEP_LIBS)
noinst_LTLIBRARIES += libmaybe_threads.la
# .cc is conditionally added below
libmaybe_threads_la_SOURCES = src/maybe_threads.h
# For MinGW, we use also have to use libwindows Luckily, we need the
# windows.a library in exactly the same place we need spinlock.a
# (pretty much everywhere), so we can use the same variable name for
@ -271,9 +275,7 @@ libsysinfo_la_LIBADD += -lshlwapi
TCMALLOC_CC =
# windows has its own system for threads and system memory allocation.
if HAVE_PTHREAD_DESPITE_ASKING_FOR
MAYBE_THREADS_CC = src/maybe_threads.cc
else
MAYBE_THREADS_CC =
libmaybe_threads_la_SOURCES += src/maybe_threads.cc
endif
SYSTEM_ALLOC_CC =
else !MINGW
@ -296,7 +298,7 @@ libspinlock_la_LIBADD = $(NANOSLEEP_LIBS)
LIBSPINLOCK = libspinlock.la libsysinfo.la liblogging.la
TCMALLOC_CC = src/tcmalloc.cc
MAYBE_THREADS_CC = src/maybe_threads.cc
libmaybe_threads_la_SOURCES += src/maybe_threads.cc
SYSTEM_ALLOC_CC = src/system-alloc.cc
endif !MINGW
@ -322,13 +324,12 @@ LOW_LEVEL_ALLOC_UNITTEST_INCLUDES = src/base/low_level_alloc.h \
$(LOGGING_INCLUDES)
low_level_alloc_unittest_SOURCES = src/base/low_level_alloc.cc \
src/malloc_hook.cc \
$(MAYBE_THREADS_CC) \
src/tests/low_level_alloc_unittest.cc \
$(LOW_LEVEL_ALLOC_UNITTEST_INCLUDES)
# By default, MallocHook takes stack traces for use by the heap-checker.
# We don't need that functionality here, so we turn it off to reduce deps.
low_level_alloc_unittest_CXXFLAGS = -DNO_TCMALLOC_SAMPLES
low_level_alloc_unittest_LDADD = $(LIBSPINLOCK)
low_level_alloc_unittest_LDADD = $(LIBSPINLOCK) libmaybe_threads.la
TESTS += atomicops_unittest
ATOMICOPS_UNITTEST_INCLUDES = src/base/atomicops.h \
@ -447,8 +448,7 @@ S_TCMALLOC_MINIMAL_INCLUDES = src/common.h \
src/base/thread_annotations.h \
src/malloc_hook-inl.h \
src/malloc_hook_mmap_linux.h \
src/malloc_hook_mmap_freebsd.h \
src/maybe_threads.h
src/malloc_hook_mmap_freebsd.h
SG_TCMALLOC_MINIMAL_INCLUDES = src/gperftools/malloc_hook.h \
src/gperftools/malloc_hook_c.h \
src/gperftools/malloc_extension.h \
@ -477,7 +477,6 @@ libtcmalloc_minimal_internal_la_SOURCES = src/common.cc \
src/thread_cache.cc \
src/malloc_hook.cc \
src/malloc_extension.cc \
$(MAYBE_THREADS_CC) \
$(TCMALLOC_MINIMAL_INCLUDES)
# We #define NO_TCMALLOC_SAMPLES, since sampling is turned off for _minimal.
libtcmalloc_minimal_internal_la_CXXFLAGS = -DNO_TCMALLOC_SAMPLES \
@ -485,7 +484,7 @@ libtcmalloc_minimal_internal_la_CXXFLAGS = -DNO_TCMALLOC_SAMPLES \
$(PTHREAD_CFLAGS) -DNDEBUG \
$(AM_CXXFLAGS) $(NO_EXCEPTIONS)
libtcmalloc_minimal_internal_la_LDFLAGS = $(PTHREAD_CFLAGS) $(AM_LDFLAGS)
libtcmalloc_minimal_internal_la_LIBADD = $(PTHREAD_LIBS) $(LIBSPINLOCK)
libtcmalloc_minimal_internal_la_LIBADD = $(PTHREAD_LIBS) $(LIBSPINLOCK) libmaybe_threads.la
lib_LTLIBRARIES += libtcmalloc_minimal.la
WINDOWS_PROJECTS += vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj
@ -898,7 +897,7 @@ lib_LTLIBRARIES += libtcmalloc.la
libtcmalloc_la_SOURCES = $(TCMALLOC_CC) $(TCMALLOC_INCLUDES)
libtcmalloc_la_CXXFLAGS = $(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS)
libtcmalloc_la_LDFLAGS = $(PTHREAD_CFLAGS) -version-info @TCMALLOC_SO_VERSION@
libtcmalloc_la_LIBADD = libtcmalloc_internal.la $(PTHREAD_LIBS)
libtcmalloc_la_LIBADD = libtcmalloc_internal.la libmaybe_threads.la $(PTHREAD_LIBS)
if WITH_HEAP_CHECKER
# heap-checker-bcad is last, in hopes its global ctor will run first.
@ -1238,7 +1237,7 @@ libprofiler_la_SOURCES = src/profiler.cc \
src/profile-handler.cc \
src/profiledata.cc \
$(CPU_PROFILER_INCLUDES)
libprofiler_la_LIBADD = libstacktrace.la
libprofiler_la_LIBADD = libstacktrace.la libmaybe_threads.la
# We have to include ProfileData for profiledata_unittest
CPU_PROFILER_SYMBOLS = '(ProfilerStart|ProfilerStartWithOptions|ProfilerStop|ProfilerFlush|ProfilerEnable|ProfilerDisable|ProfilingIsEnabledForAllThreads|ProfilerRegisterThread|ProfilerGetCurrentState|ProfilerState|ProfileData|ProfileHandler)'
libprofiler_la_LDFLAGS = -export-symbols-regex $(CPU_PROFILER_SYMBOLS) \

View File

@ -465,6 +465,18 @@ AM_CONDITIONAL(WITH_STACK_TRACE, test "$enable_cpu_profiler" = yes -o \
"$enable_heap_profiler" = yes -o \
"$enable_heap_checker" = yes)
have_linux_sigev_thread_id=no
AC_MSG_CHECKING([for Linux SIGEV_THREAD_ID])
AC_COMPILE_IFELSE(
[AC_LANG_PROGRAM([[#include <signal.h>
#include <time.h>]],
[[return SIGEV_THREAD_ID || CLOCK_THREAD_CPUTIME_ID || __linux;]])],
[AC_DEFINE(HAVE_LINUX_SIGEV_THREAD_ID, 1,
[Define if this is Linux that has SIGEV_THREAD_ID])
have_linux_sigev_thread_id=yes
AC_MSG_RESULT([yes])],
[AC_MSG_RESULT([no])])
# Write generated configuration file
AC_CONFIG_FILES([Makefile
src/gperftools/tcmalloc.h src/windows/gperftools/tcmalloc.h])

View File

@ -60,6 +60,8 @@
extern "C" {
int pthread_key_create (pthread_key_t*, void (*)(void*))
__THROW ATTRIBUTE_WEAK;
int pthread_key_delete (pthread_key_t)
__THROW ATTRIBUTE_WEAK;
void *pthread_getspecific(pthread_key_t)
__THROW ATTRIBUTE_WEAK;
int pthread_setspecific(pthread_key_t, const void*)
@ -96,6 +98,14 @@ int perftools_pthread_key_create(pthread_key_t *key,
}
}
int perftools_pthread_key_delete(pthread_key_t key) {
if (pthread_key_delete) {
return pthread_key_delete(key);
} else {
return 0;
}
}
void *perftools_pthread_getspecific(pthread_key_t key) {
if (pthread_getspecific) {
return pthread_getspecific(key);

View File

@ -45,6 +45,7 @@
int perftools_pthread_key_create(pthread_key_t *key,
void (*destr_function) (void *));
int perftools_pthread_key_delete(pthread_key_t key);
void *perftools_pthread_getspecific(pthread_key_t key);
int perftools_pthread_setspecific(pthread_key_t key, void *val);
int perftools_pthread_once(pthread_once_t *ctl,

View File

@ -46,6 +46,15 @@
#include <list>
#include <string>
#if HAVE_LINUX_SIGEV_THREAD_ID
// for timer_{create,settime} and associated typedefs & constants
#include <time.h>
// for sys_gettid
#include "base/linux_syscall_support.h"
// for perftools_pthread_key_create
#include "maybe_threads.h"
#endif
#include "base/dynamic_annotations.h"
#include "base/googleinit.h"
#include "base/logging.h"
@ -144,6 +153,14 @@ class ProfileHandler {
// Is profiling allowed at all?
bool allowed_;
bool per_thread_timer_enabled_;
#ifdef HAVE_LINUX_SIGEV_THREAD_ID
// this is used to destroy per-thread profiling timers on thread
// termination
pthread_key_t thread_timer_key;
#endif
// Whether or not the threading system provides interval timers that are
// shared by all threads in a process.
enum {
@ -227,6 +244,83 @@ const int32 ProfileHandler::kDefaultFrequency;
extern "C" int pthread_once(pthread_once_t *, void (*)(void))
ATTRIBUTE_WEAK;
#if HAVE_LINUX_SIGEV_THREAD_ID
// We use weak alias to timer_create to avoid runtime dependency on
// -lrt and in turn -lpthread.
//
// At runtime we detect if timer_create is available and if so we
// can enable linux-sigev-thread mode of profiling
extern "C" {
int timer_create(clockid_t clockid, struct sigevent *evp,
timer_t *timerid)
ATTRIBUTE_WEAK;
int timer_delete(timer_t timerid)
ATTRIBUTE_WEAK;
int timer_settime(timer_t timerid, int flags,
const struct itimerspec *value,
struct itimerspec *ovalue)
ATTRIBUTE_WEAK;
}
struct timer_id_holder {
timer_t timerid;
timer_id_holder(timer_t _timerid) : timerid(_timerid) {}
};
extern "C" {
static void ThreadTimerDestructor(void *arg) {
if (!arg) {
return;
}
timer_id_holder *holder = static_cast<timer_id_holder *>(arg);
timer_delete(holder->timerid);
delete holder;
}
}
static void CreateThreadTimerKey(pthread_key_t *pkey) {
int rv = perftools_pthread_key_create(pkey, ThreadTimerDestructor);
if (rv) {
RAW_LOG(FATAL, "aborting due to pthread_key_create error: %s", strerror(rv));
}
}
static void StartLinuxThreadTimer(int timer_type, int32 frequency, pthread_key_t timer_key) {
int rv;
struct sigevent sevp;
timer_t timerid;
struct itimerspec its;
memset(&sevp, 0, sizeof(sevp));
sevp.sigev_notify = SIGEV_THREAD_ID;
sevp._sigev_un._tid = sys_gettid();
const int signal_number = (timer_type == ITIMER_PROF ? SIGPROF : SIGALRM);
sevp.sigev_signo = signal_number;
clockid_t clock = CLOCK_THREAD_CPUTIME_ID;
if (timer_type == ITIMER_REAL) {
clock = CLOCK_MONOTONIC;
}
rv = timer_create(clock, &sevp, &timerid);
if (rv) {
RAW_LOG(FATAL, "aborting due to timer_create error: %s", strerror(errno));
}
timer_id_holder *holder = new timer_id_holder(timerid);
rv = perftools_pthread_setspecific(timer_key, holder);
if (rv) {
RAW_LOG(FATAL, "aborting due to pthread_setspecific error: %s", strerror(rv));
}
its.it_interval.tv_sec = 0;
its.it_interval.tv_nsec = 1000000000 / frequency;
its.it_value = its.it_interval;
rv = timer_settime(timerid, 0, &its, 0);
if (rv) {
RAW_LOG(FATAL, "aborting due to timer_settime error: %s", strerror(errno));
}
}
#endif
void ProfileHandler::Init() {
instance_ = new ProfileHandler();
}
@ -249,6 +343,7 @@ ProfileHandler::ProfileHandler()
: interrupts_(0),
callback_count_(0),
allowed_(true),
per_thread_timer_enabled_(false),
timer_sharing_(TIMERS_UNTOUCHED) {
SpinLockHolder cl(&control_lock_);
@ -281,10 +376,29 @@ ProfileHandler::ProfileHandler()
// Ignore signals until we decide to turn profiling on. (Paranoia;
// should already be ignored.)
DisableHandler();
#if HAVE_LINUX_SIGEV_THREAD_ID
if (getenv("CPUPROFILE_PER_THREAD_TIMERS")) {
if (timer_create && pthread_once) {
timer_sharing_ = TIMERS_SEPARATE;
CreateThreadTimerKey(&thread_timer_key);
per_thread_timer_enabled_ = true;
} else {
RAW_LOG(INFO,
"Not enabling linux-per-thread-timers mode due to lack of timer_create."
" Preload or link to librt.so for this to work");
}
}
#endif
}
ProfileHandler::~ProfileHandler() {
Reset();
#ifdef HAVE_LINUX_SIGEV_THREAD_ID
if (per_thread_timer_enabled_) {
perftools_pthread_key_delete(thread_timer_key);
}
#endif
}
void ProfileHandler::RegisterThread() {
@ -421,6 +535,14 @@ void ProfileHandler::StartTimer() {
if (!allowed_) {
return;
}
#if HAVE_LINUX_SIGEV_THREAD_ID
if (per_thread_timer_enabled_) {
StartLinuxThreadTimer(timer_type_, frequency_, thread_timer_key);
return;
}
#endif
struct itimerval timer;
timer.it_interval.tv_sec = 0;
timer.it_interval.tv_usec = 1000000 / frequency_;
@ -432,6 +554,10 @@ void ProfileHandler::StopTimer() {
if (!allowed_) {
return;
}
if (per_thread_timer_enabled_) {
RAW_LOG(FATAL, "StopTimer cannot be called in linux-per-thread-timers mode");
}
struct itimerval timer;
memset(&timer, 0, sizeof timer);
setitimer(timer_type_, &timer, 0);
@ -441,6 +567,9 @@ bool ProfileHandler::IsTimerRunning() {
if (!allowed_) {
return false;
}
if (per_thread_timer_enabled_) {
return false;
}
struct itimerval current_timer;
RAW_CHECK(0 == getitimer(timer_type_, &current_timer), "getitimer");
return (current_timer.it_value.tv_sec != 0 ||

View File

@ -81,6 +81,7 @@ int kSleepInterval = 200000000;
int kTimerResetInterval = 5000000;
// Whether each thread has separate timers.
static bool linux_per_thread_timers_mode_ = false;
static bool timer_separate_ = false;
static int timer_type_ = ITIMER_PROF;
static int signal_number_ = SIGPROF;
@ -180,7 +181,7 @@ class BusyThread : public Thread {
while (!stop_work()) {
}
// If timers are separate, check that timer is enabled for this thread.
EXPECT_TRUE(!timer_separate_ || IsTimerEnabled());
EXPECT_TRUE(linux_per_thread_timers_mode_ || !timer_separate_ || IsTimerEnabled());
}
};
@ -188,7 +189,7 @@ class NullThread : public Thread {
private:
void Run() {
// If timers are separate, check that timer is enabled for this thread.
EXPECT_TRUE(!timer_separate_ || IsTimerEnabled());
EXPECT_TRUE(linux_per_thread_timers_mode_ || !timer_separate_ || IsTimerEnabled());
}
};
@ -209,6 +210,9 @@ class ProfileHandlerTest {
signal_number_ = (getenv("CPUPROFILE_REALTIME") ? SIGALRM : SIGPROF);
timer_separate_ = threads_have_separate_timers();
#if HAVE_LINUX_SIGEV_THREAD_ID
linux_per_thread_timers_mode_ = (getenv("CPUPROFILE_PER_THREAD_TIMERS") != NULL);
#endif
Delay(kTimerResetInterval);
}
@ -300,7 +304,7 @@ class ProfileHandlerTest {
// Check the callback count.
EXPECT_GT(GetCallbackCount(), 0);
// Check that the profile timer is enabled.
EXPECT_EQ(FLAGS_test_profiler_enabled, IsTimerEnabled());
EXPECT_EQ(FLAGS_test_profiler_enabled, linux_per_thread_timers_mode_ || IsTimerEnabled());
// Check that the signal handler is enabled.
if (FLAGS_test_profiler_signal_handler) {
EXPECT_EQ(FLAGS_test_profiler_enabled, IsSignalEnabled());
@ -332,10 +336,12 @@ class ProfileHandlerTest {
if (FLAGS_test_profiler_signal_handler) {
EXPECT_FALSE(IsSignalEnabled());
}
if (timer_separate_) {
EXPECT_TRUE(IsTimerEnabled());
} else {
EXPECT_FALSE(IsTimerEnabled());
if (!linux_per_thread_timers_mode_) {
if (timer_separate_) {
EXPECT_TRUE(IsTimerEnabled());
} else {
EXPECT_FALSE(IsTimerEnabled());
}
}
}
}
@ -350,10 +356,12 @@ class ProfileHandlerTest {
// Check that the callback count is 0.
EXPECT_EQ(0, GetCallbackCount());
// Check that the timer is disabled if shared, enabled otherwise.
if (timer_separate_) {
EXPECT_TRUE(IsTimerEnabled());
} else {
EXPECT_FALSE(IsTimerEnabled());
if (!linux_per_thread_timers_mode_) {
if (timer_separate_) {
EXPECT_TRUE(IsTimerEnabled());
} else {
EXPECT_FALSE(IsTimerEnabled());
}
}
// Verify that the ProfileHandler is not accumulating profile ticks.
uint64 interrupts_before = GetInterruptCount();
@ -498,7 +506,7 @@ TEST_F(ProfileHandlerTest, RegisterCallbackBeforeThread) {
// correctly enabled.
RegisterThread();
EXPECT_EQ(1, GetCallbackCount());
EXPECT_EQ(FLAGS_test_profiler_enabled, IsTimerEnabled());
EXPECT_EQ(FLAGS_test_profiler_enabled, linux_per_thread_timers_mode_ || IsTimerEnabled());
if (FLAGS_test_profiler_signal_handler) {
EXPECT_EQ(FLAGS_test_profiler_enabled, IsSignalEnabled());
}