refactor stacktrace.cc and drop x86 backtracer

We had plenty of old and mostly no more correct i386 cruft. Now that
generic_fp backtracer covers i386 just fine, we can drop explicit x86
backtracer.

With that we refactored and simplified stacktrace.cc mostly around
picking default implementation, but also adding few more minor
cleanups.
This commit is contained in:
Aliaksey Kandratsenka 2023-07-02 19:21:21 -04:00
parent d9b178695f
commit 972c12f77d
7 changed files with 161 additions and 606 deletions

View File

@ -634,7 +634,6 @@ if(WITH_STACK_TRACE)
src/stacktrace_powerpc-inl.h
src/stacktrace_powerpc-darwin-inl.h
src/stacktrace_powerpc-linux-inl.h
src/stacktrace_x86-inl.h
src/stacktrace_win32-inl.h
src/stacktrace_instrument-inl.h
src/base/elf_mem_image.h

View File

@ -302,7 +302,6 @@ S_STACKTRACE_INCLUDES = src/stacktrace_impl_setup-inl.h \
src/stacktrace_powerpc-inl.h \
src/stacktrace_powerpc-darwin-inl.h \
src/stacktrace_powerpc-linux-inl.h \
src/stacktrace_x86-inl.h \
src/stacktrace_win32-inl.h \
src/stacktrace_instrument-inl.h \
src/base/elf_mem_image.h \

View File

@ -50,12 +50,7 @@
// the defined constants that define the register we want to see (eg
// REG_EIP). Note this #define must come first!
#define _GNU_SOURCE 1
// If #define _GNU_SOURCE causes problems, this might work instead.
// It will cause problems for FreeBSD though!, because it turns off
// the needed __BSD_VISIBLE.
//#define _XOPEN_SOURCE 500
#include <string.h> // for memcmp
#ifdef HAVE_ASM_PTRACE_H
#include <asm/ptrace.h>
#endif
@ -68,119 +63,10 @@
typedef ucontext ucontext_t;
#endif
// Take the example where function Foo() calls function Bar(). For
// many architectures, Bar() is responsible for setting up and tearing
// down its own stack frame. In that case, it's possible for the
// interrupt to happen when execution is in Bar(), but the stack frame
// is not properly set up (either before it's done being set up, or
// after it's been torn down but before Bar() returns). In those
// cases, the stack trace cannot see the caller function anymore.
//
// GetPC can try to identify this situation, on architectures where it
// might occur, and unwind the current function call in that case to
// avoid false edges in the profile graph (that is, edges that appear
// to show a call skipping over a function). To do this, we hard-code
// in the asm instructions we might see when setting up or tearing
// down a stack frame.
//
// This is difficult to get right: the instructions depend on the
// processor, the compiler ABI, and even the optimization level. This
// is a best effort patch -- if we fail to detect such a situation, or
// mess up the PC, nothing happens; the returned PC is not used for
// any further processing.
struct CallUnrollInfo {
// Offset from (e)ip register where this instruction sequence
// should be matched. Interpreted as bytes. Offset 0 is the next
// instruction to execute. Be extra careful with negative offsets in
// architectures of variable instruction length (like x86) - it is
// not that easy as taking an offset to step one instruction back!
int pc_offset;
// The actual instruction bytes. Feel free to make it larger if you
// need a longer sequence.
unsigned char ins[16];
// How many bytes to match from ins array?
int ins_size;
// The offset from the stack pointer (e)sp where to look for the
// call return address. Interpreted as bytes.
int return_sp_offset;
};
// The dereferences needed to get the PC from a struct ucontext were
// determined at configure time, and stored in the macro
// PC_FROM_UCONTEXT in config.h. The only thing we need to do here,
// then, is to do the magic call-unrolling for systems that support it.
// -- Special case 1: linux x86, for which we have CallUnrollInfo
#if defined(__linux) && defined(__i386) && defined(__GNUC__)
static const CallUnrollInfo callunrollinfo[] = {
// Entry to a function: push %ebp; mov %esp,%ebp
// Top-of-stack contains the caller IP.
{ 0,
{0x55, 0x89, 0xe5}, 3,
0
},
// Entry to a function, second instruction: push %ebp; mov %esp,%ebp
// Top-of-stack contains the old frame, caller IP is +4.
{ -1,
{0x55, 0x89, 0xe5}, 3,
4
},
// Return from a function: RET.
// Top-of-stack contains the caller IP.
{ 0,
{0xc3}, 1,
0
}
};
inline void* GetPC(const ucontext_t& signal_ucontext) {
// See comment above struct CallUnrollInfo. Only try instruction
// flow matching if both eip and esp looks reasonable.
const int eip = signal_ucontext.uc_mcontext.gregs[REG_EIP];
const int esp = signal_ucontext.uc_mcontext.gregs[REG_ESP];
if ((eip & 0xffff0000) != 0 && (~eip & 0xffff0000) != 0 &&
(esp & 0xffff0000) != 0) {
char* eip_char = reinterpret_cast<char*>(eip);
for (int i = 0; i < sizeof(callunrollinfo)/sizeof(*callunrollinfo); ++i) {
if (!memcmp(eip_char + callunrollinfo[i].pc_offset,
callunrollinfo[i].ins, callunrollinfo[i].ins_size)) {
// We have a match.
void **retaddr = (void**)(esp + callunrollinfo[i].return_sp_offset);
return *retaddr;
}
}
}
return (void*)eip;
}
// Special case #2: Windows, which has to do something totally different.
#elif defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) || defined(__MINGW32__)
// If this is ever implemented, probably the way to do it is to have
// profiler.cc use a high-precision timer via timeSetEvent:
// http://msdn2.microsoft.com/en-us/library/ms712713.aspx
// We'd use it in mode TIME_CALLBACK_FUNCTION/TIME_PERIODIC.
// The callback function would be something like prof_handler, but
// alas the arguments are different: no ucontext_t! I don't know
// how we'd get the PC (using StackWalk64?)
// http://msdn2.microsoft.com/en-us/library/ms680650.aspx
#include "base/logging.h" // for RAW_LOG
#ifndef HAVE_CYGWIN_SIGNAL_H
typedef int ucontext_t;
#endif
inline void* GetPC(const struct ucontext_t& signal_ucontext) {
RAW_LOG(ERROR, "GetPC is not yet implemented on Windows\n");
return NULL;
}
// Normal cases. If this doesn't compile, it's probably because
// If this doesn't compile, it's probably because
// PC_FROM_UCONTEXT is the empty string. You need to figure out
// the right value for your system, and add it to the list in
// configure.ac (or set it manually in your config.h).
#else
inline void* GetPC(const ucontext_t& signal_ucontext) {
#if defined(__s390__) && !defined(__s390x__)
// Mask out the AMODE31 bit from the PC recorded in the context.
@ -190,6 +76,4 @@ inline void* GetPC(const ucontext_t& signal_ucontext) {
#endif
}
#endif
#endif // BASE_GETPC_H_

View File

@ -1,5 +1,6 @@
// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
// Copyright (c) 2005, Google Inc.
// Copyright (c) 2023, gperftools Contributors.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
@ -29,25 +30,37 @@
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// ---
// Author: Sanjay Ghemawat
// Original Author: Sanjay Ghemawat.
//
// Most recent significant rework and extensions: Aliaksei
// Kandratsenka (all bugs are mine).
//
// Produce stack trace.
//
// There are three different ways we can try to get the stack trace:
// There few different ways we can try to get the stack trace:
//
// 1) Our hand-coded stack-unwinder. This depends on a certain stack
// layout, which is used by gcc (and those systems using a
// gcc-compatible ABI) on x86 systems, at least since gcc 2.95.
// It uses the frame pointer to do its work.
// layout, which is used by various ABIs. It uses the frame
// pointer to do its work.
//
// 2) The libunwind library. This is still in development, and as a
// separate library adds a new dependency, abut doesn't need a frame
// pointer. It also doesn't call malloc.
// 2) The libunwind library. It also doesn't call malloc (in most
// configurations). Note, there are at least 3 libunwind
// implementations currently available. "Original" libunwind,
// llvm's and Android's. Only original library has been tested so
// far.
//
// 3) The gdb unwinder -- also the one used by the c++ exception code.
// It's obviously well-tested, but has a fatal flaw: it can call
// malloc() from the unwinder. This is a problem because we're
// trying to use the unwinder to instrument malloc().
// 3) The "libgcc" unwinder -- also the one used by the c++ exception
// code. It uses _Unwind_Backtrace facility of modern ABIs. Some
// implementations occasionally call into malloc (which we're able
// to handle). Some implementations also use some internal locks,
// so it is not entirely compatible with backtracing from signal
// handlers.
//
// 4) backtrace() unwinder (available in glibc and execinfo on some
// BSDs). It is typically, but not always implemented on top of
// "libgcc" unwinder. So we have it. We use this one on OSX.
//
// 5) On windows we use RtlCaptureStackBackTrace.
//
// Note: if you add a new implementation here, make sure it works
// correctly when GetStackTrace() is called with max_depth == 0.
@ -111,15 +124,6 @@ struct GetStackImplementation {
#define HAVE_GST_libunwind
#endif // USE_LIBUNWIND
#if defined(__i386__) || defined(__x86_64__)
#define STACKTRACE_INL_HEADER "stacktrace_x86-inl.h"
#define GST_SUFFIX x86
#include "stacktrace_impl_setup-inl.h"
#undef GST_SUFFIX
#undef STACKTRACE_INL_HEADER
#define HAVE_GST_x86
#endif // i386 || x86_64
#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || defined(__riscv) || defined(__arm__))
// NOTE: legacy 32-bit arm works fine with recent clangs, but is broken in gcc: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92172
#define STACKTRACE_INL_HEADER "stacktrace_generic_fp-inl.h"
@ -182,88 +186,98 @@ struct GetStackImplementation {
#define HAVE_GST_win32
#endif
#if __cplusplus >= 202302L
# ifndef HAS_SOME_STACKTRACE_IMPL
# warning "Warning: no stacktrace capturing implementation for your OS"
# endif
#endif
#if (__x86_64__ || __i386__) && FORCED_FRAME_POINTERS
// x86-es (even i386 this days) default to no frame pointers. But
// historically we defaulted to frame pointer unwinder whenever
// --enable-frame-pointers is given. So we keep this behavior.
#define PREFER_FP_UNWINDER 1
#elif TCMALLOC_DONT_PREFER_LIBUNWIND
#define PREFER_FP_UNWINDER 1
#else
#define PREFER_FP_UNWINDER 0
#endif
#if defined(PREFER_LIBGCC_UNWINDER) && !defined(HAVE_GST_libgcc)
#error user asked for libgcc unwinder to be default but it is not available
#endif
static GetStackImplementation impl__null = {
// GetStackFrames
[] (void **result, int *sizes, int max_depth, int skip_count) {
return 0;
},
// GetStackTraceWithContext
[] (void **result, int *sizes, int max_depth, int skip_count, const void* uc) {
return 0;
},
// GetStackTrace
[] (void **result, int max_depth, int skip_count) {
return 0;
},
// GetStackTraceWithContext
[] (void **result, int max_depth, int skip_count, const void* uc) {
return 0;
},
"null" // name
};
static GetStackImplementation *all_impls[] = {
#ifdef HAVE_GST_libgcc
&impl__libgcc,
#endif
#ifdef HAVE_GST_generic
&impl__generic,
#endif
#ifdef HAVE_GST_generic_fp
&impl__generic_fp,
#endif
#ifdef HAVE_GST_generic_fp
&impl__generic_fp_unsafe,
#endif
#ifdef HAVE_GST_libunwind
&impl__libunwind,
#endif
#ifdef HAVE_GST_x86
&impl__x86,
#endif
#ifdef HAVE_GST_arm
&impl__arm,
#endif
#ifdef HAVE_GST_ppc
&impl__ppc,
#endif
#ifdef HAVE_GST_instrument
&impl__instrument,
#endif
#ifdef HAVE_GST_win32
&impl__win32,
#endif
NULL
#ifdef HAVE_GST_ppc
&impl__ppc,
#endif
#if defined(HAVE_GST_generic_fp) && PREFER_FP_UNWINDER
&impl__generic_fp,
&impl__generic_fp_unsafe,
#endif
#if defined(HAVE_GST_libgcc) && defined(PREFER_LIBGCC_UNWINDER)
&impl__libgcc,
#endif
#ifdef HAVE_GST_libunwind
&impl__libunwind,
#endif
#if defined(HAVE_GST_libgcc) && !defined(PREFER_LIBGCC_UNWINDER)
&impl__libgcc,
#endif
#ifdef HAVE_GST_generic
&impl__generic,
#endif
#if defined(HAVE_GST_generic_fp) && !PREFER_FP_UNWINDER
&impl__generic_fp,
&impl__generic_fp_unsafe,
#endif
#ifdef HAVE_GST_arm
&impl__arm,
#endif
&impl__null
};
// ppc and i386 implementations prefer arch-specific asm implementations.
// arm's asm implementation is broken
#if defined(__i386__) || defined(__ppc__) || defined(__PPC__) || defined(__loongarch64)
#if !defined(NO_FRAME_POINTER)
#define TCMALLOC_DONT_PREFER_LIBUNWIND
#endif
#endif
static bool get_stack_impl_inited;
static GetStackImplementation *get_stack_impl;
#if defined(HAVE_GST_instrument)
static GetStackImplementation *get_stack_impl = &impl__instrument;
#elif defined(HAVE_GST_win32)
static GetStackImplementation *get_stack_impl = &impl__win32;
#elif defined(HAVE_GST_generic_fp) && !defined(NO_FRAME_POINTER) \
&& !defined(__riscv) \
&& (!defined(HAVE_GST_libunwind) || defined(TCMALLOC_DONT_PREFER_LIBUNWIND))
static GetStackImplementation *get_stack_impl = &impl__generic_fp;
#elif defined(HAVE_GST_x86) && defined(TCMALLOC_DONT_PREFER_LIBUNWIND)
static GetStackImplementation *get_stack_impl = &impl__x86;
#elif defined(HAVE_GST_ppc) && defined(TCMALLOC_DONT_PREFER_LIBUNWIND)
static GetStackImplementation *get_stack_impl = &impl__ppc;
#elif defined(HAVE_GST_libunwind)
static GetStackImplementation *get_stack_impl = &impl__libunwind;
#elif defined(HAVE_GST_libgcc)
static GetStackImplementation *get_stack_impl = &impl__libgcc;
#elif defined(HAVE_GST_generic)
static GetStackImplementation *get_stack_impl = &impl__generic;
#elif defined(HAVE_GST_arm)
static GetStackImplementation *get_stack_impl = &impl__arm;
#elif 0
#if 0
// This is for the benefit of code analysis tools that may have
// trouble with the computed #include above.
# include "stacktrace_x86-inl.h"
# include "stacktrace_libunwind-inl.h"
# include "stacktrace_generic-inl.h"
# include "stacktrace_powerpc-inl.h"
# include "stacktrace_generic_fp-inl.h"
# include "stacktrace_powerpc-linux-inl.h"
# include "stacktrace_win32-inl.h"
# include "stacktrace_arm-inl.h"
# include "stacktrace_instrument-inl.h"
#else
#error Cannot calculate stack trace: will need to write for your environment
#endif
static int ATTRIBUTE_NOINLINE frame_forcer(int rv) {
return rv;
}
static void init_default_stack_impl_inner(void);
namespace tcmalloc {
@ -272,26 +286,30 @@ namespace tcmalloc {
}
namespace {
using tcmalloc::EnterStacktraceScope;
using tcmalloc::LeaveStacktraceScope;
using tcmalloc::EnterStacktraceScope;
using tcmalloc::LeaveStacktraceScope;
class StacktraceScope {
bool stacktrace_allowed;
public:
StacktraceScope() {
stacktrace_allowed = true;
stacktrace_allowed = EnterStacktraceScope();
class StacktraceScope {
bool stacktrace_allowed;
public:
StacktraceScope() {
stacktrace_allowed = true;
stacktrace_allowed = EnterStacktraceScope();
}
bool IsStacktraceAllowed() {
return stacktrace_allowed;
}
// NOTE: noinline here ensures that we don't tail-call GetStackXXX
// calls below. Which is crucial due to us having to pay attention
// to skip_count argument.
ATTRIBUTE_NOINLINE ~StacktraceScope() {
if (stacktrace_allowed) {
LeaveStacktraceScope();
}
bool IsStacktraceAllowed() {
return stacktrace_allowed;
}
~StacktraceScope() {
if (stacktrace_allowed) {
LeaveStacktraceScope();
}
}
};
}
}
};
} // namespace
ATTRIBUTE_NOINLINE
PERFTOOLS_DLL_DECL int GetStackFrames(void** result, int* sizes, int max_depth,
@ -301,7 +319,8 @@ PERFTOOLS_DLL_DECL int GetStackFrames(void** result, int* sizes, int max_depth,
return 0;
}
init_default_stack_impl_inner();
return frame_forcer(get_stack_impl->GetStackFramesPtr(result, sizes, max_depth, skip_count));
return get_stack_impl->GetStackFramesPtr(result, sizes,
max_depth, skip_count);
}
ATTRIBUTE_NOINLINE
@ -312,9 +331,8 @@ PERFTOOLS_DLL_DECL int GetStackFramesWithContext(void** result, int* sizes, int
return 0;
}
init_default_stack_impl_inner();
return frame_forcer(get_stack_impl->GetStackFramesWithContextPtr(
result, sizes, max_depth,
skip_count, uc));
return get_stack_impl->GetStackFramesWithContextPtr(result, sizes, max_depth,
skip_count, uc);
}
ATTRIBUTE_NOINLINE
@ -325,7 +343,7 @@ PERFTOOLS_DLL_DECL int GetStackTrace(void** result, int max_depth,
return 0;
}
init_default_stack_impl_inner();
return frame_forcer(get_stack_impl->GetStackTracePtr(result, max_depth, skip_count));
return get_stack_impl->GetStackTracePtr(result, max_depth, skip_count);
}
ATTRIBUTE_NOINLINE
@ -336,13 +354,10 @@ PERFTOOLS_DLL_DECL int GetStackTraceWithContext(void** result, int max_depth,
return 0;
}
init_default_stack_impl_inner();
return frame_forcer(get_stack_impl->GetStackTraceWithContextPtr(
result, max_depth, skip_count, uc));
return get_stack_impl->GetStackTraceWithContextPtr(result, max_depth,
skip_count, uc);
}
// As of this writing, aarch64 has completely borked libunwind, so
// lets test this case and fall back to frame pointers (which is
// nearly but not quite perfect).
ATTRIBUTE_NOINLINE
static void maybe_convert_libunwind_to_generic_fp() {
#if defined(HAVE_GST_libunwind) && defined(HAVE_GST_generic_fp)
@ -350,31 +365,36 @@ static void maybe_convert_libunwind_to_generic_fp() {
return;
}
// Okay we're on libunwind and we have generic_fp, check if
// libunwind returns bogus results.
bool want_to_replace = false;
// Sometime recently, aarch64 had completely borked libunwind, so
// lets test this case and fall back to frame pointers (which is
// nearly but not quite perfect). So lets check this case.
void* stack[4];
int rv = get_stack_impl->GetStackTracePtr(stack, 4, 0);
if (rv > 2) {
// Seems fine
return;
want_to_replace = (rv <= 2);
if (want_to_replace) {
get_stack_impl = &impl__generic_fp;
}
// bogus. So replacing with generic_fp
get_stack_impl = &impl__generic_fp;
#endif
#endif // have libunwind and generic_fp
}
static void init_default_stack_impl_inner(void) {
if (get_stack_impl_inited) {
return;
}
get_stack_impl = all_impls[0];
get_stack_impl_inited = true;
const char *val = TCMallocGetenvSafe("TCMALLOC_STACKTRACE_METHOD");
if (!val || !*val) {
// If no explicit implementation is requested, consider changing
// libunwind->generic_fp in some cases.
maybe_convert_libunwind_to_generic_fp();
return;
}
for (GetStackImplementation **p = all_impls; *p; p++) {
GetStackImplementation *c = *p;
for (int i = 0; i < sizeof(all_impls) / sizeof(all_impls[0]); i++) {
GetStackImplementation *c = all_impls[i];
if (strcmp(c->name, val) == 0) {
get_stack_impl = c;
return;
@ -388,11 +408,11 @@ static void init_default_stack_impl(void) {
init_default_stack_impl_inner();
if (EnvToBool("TCMALLOC_STACKTRACE_METHOD_VERBOSE", false)) {
fprintf(stderr, "Chosen stacktrace method is %s\nSupported methods:\n", get_stack_impl->name);
for (GetStackImplementation **p = all_impls; *p; p++) {
GetStackImplementation *c = *p;
for (int i = 0; i < sizeof(all_impls) / sizeof(all_impls[0]); i++) {
GetStackImplementation *c = all_impls[i];
fprintf(stderr, "* %s\n", c->name);
}
fputs("\n", stderr);
fputs("\nUse TCMALLOC_STACKTRACE_METHOD environment variable to override\n", stderr);
}
}

View File

@ -47,7 +47,7 @@
#define IS_STACK_FRAMES 0
#define IS_WITH_CONTEXT 0
#define GET_STACK_TRACE_OR_FRAMES \
SIS_CONCAT(GetStackTrace_, GST_SUFFIX)(void **result, int max_depth, int skip_count)
ATTRIBUTE_NOINLINE SIS_CONCAT(GetStackTrace_, GST_SUFFIX)(void **result, int max_depth, int skip_count)
#include STACKTRACE_INL_HEADER
#undef IS_STACK_FRAMES
#undef IS_WITH_CONTEXT
@ -56,7 +56,7 @@
#define IS_STACK_FRAMES 1
#define IS_WITH_CONTEXT 0
#define GET_STACK_TRACE_OR_FRAMES \
SIS_CONCAT(GetStackFrames_, GST_SUFFIX)(void **result, int *sizes, int max_depth, int skip_count)
ATTRIBUTE_NOINLINE SIS_CONCAT(GetStackFrames_, GST_SUFFIX)(void **result, int *sizes, int max_depth, int skip_count)
#include STACKTRACE_INL_HEADER
#undef IS_STACK_FRAMES
#undef IS_WITH_CONTEXT
@ -65,8 +65,8 @@
#define IS_STACK_FRAMES 0
#define IS_WITH_CONTEXT 1
#define GET_STACK_TRACE_OR_FRAMES \
SIS_CONCAT(GetStackTraceWithContext_, GST_SUFFIX)(void **result, int max_depth, \
int skip_count, const void *ucp)
ATTRIBUTE_NOINLINE SIS_CONCAT(GetStackTraceWithContext_, GST_SUFFIX)(void **result, int max_depth, \
int skip_count, const void *ucp)
#include STACKTRACE_INL_HEADER
#undef IS_STACK_FRAMES
#undef IS_WITH_CONTEXT
@ -75,8 +75,8 @@
#define IS_STACK_FRAMES 1
#define IS_WITH_CONTEXT 1
#define GET_STACK_TRACE_OR_FRAMES \
SIS_CONCAT(GetStackFramesWithContext_, GST_SUFFIX)(void **result, int *sizes, int max_depth, \
int skip_count, const void *ucp)
ATTRIBUTE_NOINLINE SIS_CONCAT(GetStackFramesWithContext_, GST_SUFFIX)(void **result, int *sizes, int max_depth, \
int skip_count, const void *ucp)
#include STACKTRACE_INL_HEADER
#undef IS_STACK_FRAMES
#undef IS_WITH_CONTEXT
@ -92,3 +92,7 @@ static GetStackImplementation SIS_CONCAT(impl__,GST_SUFFIX) = {
#undef SIS_CONCAT2
#undef SIS_CONCAT
#ifndef HAS_SOME_STACKTRACE_IMPL
#define HAS_SOME_STACKTRACE_IMPL
#endif

View File

@ -1,354 +0,0 @@
// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
// Copyright (c) 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// ---
// Author: Sanjay Ghemawat
//
// Produce stack trace
#ifndef BASE_STACKTRACE_X86_INL_H_
#define BASE_STACKTRACE_X86_INL_H_
// Note: this file is included into stacktrace.cc more than once.
// Anything that should only be defined once should be here:
#include "config.h"
#include <stdlib.h> // for NULL
#include <assert.h>
#if defined(HAVE_SYS_UCONTEXT_H)
#include <sys/ucontext.h>
#elif defined(HAVE_UCONTEXT_H)
#include <ucontext.h> // for ucontext_t
#elif defined(HAVE_CYGWIN_SIGNAL_H)
// cygwin/signal.h has a buglet where it uses pthread_attr_t without
// #including <pthread.h> itself. So we have to do it.
# ifdef HAVE_PTHREAD
# include <pthread.h>
# endif
#include <cygwin/signal.h>
typedef ucontext ucontext_t;
#endif
#ifdef HAVE_STDINT_H
#include <stdint.h> // for uintptr_t
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_MMAP
#include <sys/mman.h> // for msync
#include "base/vdso_support.h"
#endif
#include "gperftools/stacktrace.h"
#if defined(__linux__) && defined(__i386__) && defined(__ELF__) && defined(HAVE_MMAP)
// Count "push %reg" instructions in VDSO __kernel_vsyscall(),
// preceding "syscall" or "sysenter".
// If __kernel_vsyscall uses frame pointer, answer 0.
//
// kMaxBytes tells how many instruction bytes of __kernel_vsyscall
// to analyze before giving up. Up to kMaxBytes+1 bytes of
// instructions could be accessed.
//
// Here are known __kernel_vsyscall instruction sequences:
//
// SYSENTER (linux-2.6.26/arch/x86/vdso/vdso32/sysenter.S).
// Used on Intel.
// 0xffffe400 <__kernel_vsyscall+0>: push %ecx
// 0xffffe401 <__kernel_vsyscall+1>: push %edx
// 0xffffe402 <__kernel_vsyscall+2>: push %ebp
// 0xffffe403 <__kernel_vsyscall+3>: mov %esp,%ebp
// 0xffffe405 <__kernel_vsyscall+5>: sysenter
//
// SYSCALL (see linux-2.6.26/arch/x86/vdso/vdso32/syscall.S).
// Used on AMD.
// 0xffffe400 <__kernel_vsyscall+0>: push %ebp
// 0xffffe401 <__kernel_vsyscall+1>: mov %ecx,%ebp
// 0xffffe403 <__kernel_vsyscall+3>: syscall
//
// i386 (see linux-2.6.26/arch/x86/vdso/vdso32/int80.S)
// 0xffffe400 <__kernel_vsyscall+0>: int $0x80
// 0xffffe401 <__kernel_vsyscall+1>: ret
//
static const int kMaxBytes = 10;
// We use assert()s instead of DCHECK()s -- this is too low level
// for DCHECK().
static int CountPushInstructions(const unsigned char *const addr) {
int result = 0;
for (int i = 0; i < kMaxBytes; ++i) {
if (addr[i] == 0x89) {
// "mov reg,reg"
if (addr[i + 1] == 0xE5) {
// Found "mov %esp,%ebp".
return 0;
}
++i; // Skip register encoding byte.
} else if (addr[i] == 0x0F &&
(addr[i + 1] == 0x34 || addr[i + 1] == 0x05)) {
// Found "sysenter" or "syscall".
return result;
} else if ((addr[i] & 0xF0) == 0x50) {
// Found "push %reg".
++result;
} else if (addr[i] == 0xCD && addr[i + 1] == 0x80) {
// Found "int $0x80"
assert(result == 0);
return 0;
} else {
// Unexpected instruction.
assert(0 == "unexpected instruction in __kernel_vsyscall");
return 0;
}
}
// Unexpected: didn't find SYSENTER or SYSCALL in
// [__kernel_vsyscall, __kernel_vsyscall + kMaxBytes) interval.
assert(0 == "did not find SYSENTER or SYSCALL in __kernel_vsyscall");
return 0;
}
#endif
// Given a pointer to a stack frame, locate and return the calling
// stackframe, or return NULL if no stackframe can be found. Perform sanity
// checks (the strictness of which is controlled by the boolean parameter
// "STRICT_UNWINDING") to reduce the chance that a bad pointer is returned.
template<bool STRICT_UNWINDING, bool WITH_CONTEXT>
static void **NextStackFrame(void **old_sp, const void *uc) {
void **new_sp = (void **) *old_sp;
#if defined(__linux__) && defined(__i386__) && defined(HAVE_VDSO_SUPPORT)
if (WITH_CONTEXT && uc != NULL) {
// How many "push %reg" instructions are there at __kernel_vsyscall?
// This is constant for a given kernel and processor, so compute
// it only once.
static int num_push_instructions = -1; // Sentinel: not computed yet.
// Initialize with sentinel value: __kernel_rt_sigreturn can not possibly
// be there.
static const unsigned char *kernel_rt_sigreturn_address = NULL;
static const unsigned char *kernel_vsyscall_address = NULL;
if (num_push_instructions == -1) {
base::VDSOSupport vdso;
if (vdso.IsPresent()) {
base::VDSOSupport::SymbolInfo rt_sigreturn_symbol_info;
base::VDSOSupport::SymbolInfo vsyscall_symbol_info;
if (!vdso.LookupSymbol("__kernel_rt_sigreturn", "LINUX_2.5",
STT_FUNC, &rt_sigreturn_symbol_info) ||
!vdso.LookupSymbol("__kernel_vsyscall", "LINUX_2.5",
STT_FUNC, &vsyscall_symbol_info) ||
rt_sigreturn_symbol_info.address == NULL ||
vsyscall_symbol_info.address == NULL) {
// Unexpected: 32-bit VDSO is present, yet one of the expected
// symbols is missing or NULL.
assert(0 == "VDSO is present, but doesn't have expected symbols");
num_push_instructions = 0;
} else {
kernel_rt_sigreturn_address =
reinterpret_cast<const unsigned char *>(
rt_sigreturn_symbol_info.address);
kernel_vsyscall_address =
reinterpret_cast<const unsigned char *>(
vsyscall_symbol_info.address);
num_push_instructions =
CountPushInstructions(kernel_vsyscall_address);
}
} else {
num_push_instructions = 0;
}
}
if (num_push_instructions != 0 && kernel_rt_sigreturn_address != NULL &&
old_sp[1] == kernel_rt_sigreturn_address) {
const ucontext_t *ucv = static_cast<const ucontext_t *>(uc);
// This kernel does not use frame pointer in its VDSO code,
// and so %ebp is not suitable for unwinding.
void **const reg_ebp =
reinterpret_cast<void **>(ucv->uc_mcontext.gregs[REG_EBP]);
const unsigned char *const reg_eip =
reinterpret_cast<unsigned char *>(ucv->uc_mcontext.gregs[REG_EIP]);
if (new_sp == reg_ebp &&
kernel_vsyscall_address <= reg_eip &&
reg_eip - kernel_vsyscall_address < kMaxBytes) {
// We "stepped up" to __kernel_vsyscall, but %ebp is not usable.
// Restore from 'ucv' instead.
void **const reg_esp =
reinterpret_cast<void **>(ucv->uc_mcontext.gregs[REG_ESP]);
// Check that alleged %esp is not NULL and is reasonably aligned.
if (reg_esp &&
((uintptr_t)reg_esp & (sizeof(reg_esp) - 1)) == 0) {
// Check that alleged %esp is actually readable. This is to prevent
// "double fault" in case we hit the first fault due to e.g. stack
// corruption.
//
// page_size is linker-initalized to avoid async-unsafe locking
// that GCC would otherwise insert (__cxa_guard_acquire etc).
static int page_size;
if (page_size == 0) {
// First time through.
page_size = getpagesize();
}
void *const reg_esp_aligned =
reinterpret_cast<void *>(
(uintptr_t)(reg_esp + num_push_instructions - 1) &
~(page_size - 1));
if (msync(reg_esp_aligned, page_size, MS_ASYNC) == 0) {
// Alleged %esp is readable, use it for further unwinding.
new_sp = reinterpret_cast<void **>(
reg_esp[num_push_instructions - 1]);
}
}
}
}
}
#endif
// Check that the transition from frame pointer old_sp to frame
// pointer new_sp isn't clearly bogus
if (STRICT_UNWINDING) {
// With the stack growing downwards, older stack frame must be
// at a greater address that the current one.
if (new_sp <= old_sp) return NULL;
// Assume stack frames larger than 100,000 bytes are bogus.
if ((uintptr_t)new_sp - (uintptr_t)old_sp > 100000) return NULL;
} else {
// In the non-strict mode, allow discontiguous stack frames.
// (alternate-signal-stacks for example).
if (new_sp == old_sp) return NULL;
if (new_sp > old_sp) {
// And allow frames upto about 1MB.
const uintptr_t delta = (uintptr_t)new_sp - (uintptr_t)old_sp;
const uintptr_t acceptable_delta = 1000000;
if (delta > acceptable_delta) {
return NULL;
}
}
}
if ((uintptr_t)new_sp & (sizeof(void *) - 1)) return NULL;
#ifdef __i386__
// On 64-bit machines, the stack pointer can be very close to
// 0xffffffff, so we explicitly check for a pointer into the
// last two pages in the address space
if ((uintptr_t)new_sp >= 0xffffe000) return NULL;
#endif
#ifdef HAVE_MMAP
if (!STRICT_UNWINDING) {
// Lax sanity checks cause a crash on AMD-based machines with
// VDSO-enabled kernels.
// Make an extra sanity check to insure new_sp is readable.
// Note: NextStackFrame<false>() is only called while the program
// is already on its last leg, so it's ok to be slow here.
static int page_size = getpagesize();
void *new_sp_aligned = (void *)((uintptr_t)new_sp & ~(page_size - 1));
if (msync(new_sp_aligned, page_size, MS_ASYNC) == -1)
return NULL;
}
#endif
return new_sp;
}
#endif // BASE_STACKTRACE_X86_INL_H_
// Note: this part of the file is included several times.
// Do not put globals below.
// The following 4 functions are generated from the code below:
// GetStack{Trace,Frames}()
// GetStack{Trace,Frames}WithContext()
//
// These functions take the following args:
// void** result: the stack-trace, as an array
// int* sizes: the size of each stack frame, as an array
// (GetStackFrames* only)
// int max_depth: the size of the result (and sizes) array(s)
// int skip_count: how many stack pointers to skip before storing in result
// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only)
static int GET_STACK_TRACE_OR_FRAMES {
void **sp;
#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) || __llvm__
// __builtin_frame_address(0) can return the wrong address on gcc-4.1.0-k8.
// It's always correct on llvm, and the techniques below aren't (in
// particular, llvm-gcc will make a copy of pcs, so it's not in sp[2]),
// so we also prefer __builtin_frame_address when running under llvm.
sp = reinterpret_cast<void**>(__builtin_frame_address(0));
#elif defined(__i386__)
// Stack frame format:
// sp[0] pointer to previous frame
// sp[1] caller address
// sp[2] first argument
// ...
// NOTE: This will break under llvm, since result is a copy and not in sp[2]
sp = (void **)&result - 2;
#elif defined(__x86_64__)
unsigned long rbp;
// Move the value of the register %rbp into the local variable rbp.
// We need 'volatile' to prevent this instruction from getting moved
// around during optimization to before function prologue is done.
// An alternative way to achieve this
// would be (before this __asm__ instruction) to call Noop() defined as
// static void Noop() __attribute__ ((noinline)); // prevent inlining
// static void Noop() { asm(""); } // prevent optimizing-away
__asm__ volatile ("mov %%rbp, %0" : "=r" (rbp));
// Arguments are passed in registers on x86-64, so we can't just
// offset from &result
sp = (void **) rbp;
#else
# error Using stacktrace_x86-inl.h on a non x86 architecture!
#endif
skip_count++; // skip parent's frame due to indirection in stacktrace.cc
int n = 0;
while (sp && n < max_depth) {
if (*(sp+1) == reinterpret_cast<void *>(0)) {
// In 64-bit code, we often see a frame that
// points to itself and has a return address of 0.
break;
}
#if !IS_WITH_CONTEXT
const void *const ucp = NULL;
#endif
void **next_sp = NextStackFrame<!IS_STACK_FRAMES, IS_WITH_CONTEXT>(sp, ucp);
if (skip_count > 0) {
skip_count--;
} else {
result[n] = *(sp+1);
#if IS_STACK_FRAMES
if (next_sp > sp) {
sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp;
} else {
// A frame-size of 0 is used to indicate unknown frame size.
sizes[n] = 0;
}
#endif
n++;
}
sp = next_sp;
}
return n;
}

View File

@ -35,9 +35,12 @@
#include <stdio.h>
#include <stdlib.h>
// We only test this on Linux because frame skip count works there and
// doesn't on FreeBSD.
#if __linux__
// Correctly capturing backtrace from signal handler is most
// brittle. A number of configurations on Linux work, but not
// all. Same applies to BSDs. But lets somewhat broadly ask those
// setups to be tested. In general, if right backtraces are needed for
// CPU profiler, this test should pass as well.
#if __linux__ || (__FreeBSD__ && (__x86_64__ || __i386__)) || __NetBSD__
#include <signal.h>
#include <sys/time.h>
#define TEST_UCONTEXT_BITS 1