mirror of
https://github.com/gperftools/gperftools
synced 2025-02-07 23:21:28 +00:00
support more OSes in generic-fp
We're still x86+arm+riscv only, but netbsd and freebsd work too. OSX as well.
This commit is contained in:
parent
4b78ffd03c
commit
d9b178695f
@ -120,9 +120,8 @@ struct GetStackImplementation {
|
||||
#define HAVE_GST_x86
|
||||
#endif // i386 || x86_64
|
||||
|
||||
// Sadly, different OSes have very different mcontexts even for
|
||||
// identical hardware arch. So keep it linux-only for now.
|
||||
#if defined(__GNUC__) && __linux__ && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || defined(__riscv))
|
||||
#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || defined(__riscv) || defined(__arm__))
|
||||
// NOTE: legacy 32-bit arm works fine with recent clangs, but is broken in gcc: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92172
|
||||
#define STACKTRACE_INL_HEADER "stacktrace_generic_fp-inl.h"
|
||||
#define GST_SUFFIX generic_fp
|
||||
#include "stacktrace_impl_setup-inl.h"
|
||||
|
@ -44,10 +44,29 @@
|
||||
// This is only used on OS-es with mmap support.
|
||||
#include <sys/mman.h>
|
||||
|
||||
// Set this to true to disable "probing" of addresses that are read to
|
||||
// make backtracing less-safe, but faster.
|
||||
#ifndef TCMALLOC_UNSAFE_GENERIC_FP_STACKTRACE
|
||||
#define TCMALLOC_UNSAFE_GENERIC_FP_STACKTRACE 0
|
||||
#if HAVE_SYS_SYSCALL_H
|
||||
#include <sys/syscall.h>
|
||||
#endif
|
||||
|
||||
#if defined(PC_FROM_UCONTEXT) && (HAVE_SYS_UCONTEXT_H || HAVE_UCONTEXT_H)
|
||||
#include "getpc.h"
|
||||
#define HAVE_GETPC 1
|
||||
#endif
|
||||
|
||||
// our Autoconf setup enables -fno-omit-frame-pointer, but lets still
|
||||
// ask for it just in case.
|
||||
//
|
||||
// Note: clang doesn't know about optimize attribute. But clang (and
|
||||
// gcc too, apparently) automagically forces generation of frame
|
||||
// pointer whenever __builtin_frame_address is used.
|
||||
#if defined(__GNUC__) && defined(__has_attribute)
|
||||
#if __has_attribute(optimize)
|
||||
#define ENABLE_FP_ATTRIBUTE __attribute__((optimize("no-omit-frame-pointer")))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef ENABLE_FP_ATTRIBUTE
|
||||
#define ENABLE_FP_ATTRIBUTE
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
@ -93,12 +112,25 @@ static bool CheckPageIsReadable(void* ptr, void* checked_ptr) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return (msync(reinterpret_cast<void*>(addr), pagesize, MS_ASYNC) == 0);
|
||||
int rc;
|
||||
#if __FreeBSD__ && defined(SYS_msync)
|
||||
// FreeBSD needs this. Our first stacktrace capturing happens early
|
||||
// and apparently their threading facility isn't ready. And msync as
|
||||
// well us few other "trivial" calls crash.
|
||||
rc = syscall(SYS_msync, reinterpret_cast<void*>(addr), pagesize, MS_ASYNC);
|
||||
#else
|
||||
rc = msync(reinterpret_cast<void*>(addr), pagesize, MS_ASYNC);
|
||||
#endif
|
||||
|
||||
return (rc == 0);
|
||||
}
|
||||
|
||||
template <bool UnsafeAccesses, bool WithSizes>
|
||||
ATTRIBUTE_NOINLINE // forces architectures with link register to save it
|
||||
ENABLE_FP_ATTRIBUTE
|
||||
int capture(void **result, int max_depth, int skip_count,
|
||||
void* initial_frame, void* const * initial_pc) {
|
||||
void* initial_frame, void* const * initial_pc,
|
||||
int *sizes) {
|
||||
int i = 0;
|
||||
|
||||
max_depth += skip_count;
|
||||
@ -118,11 +150,21 @@ int capture(void **result, int max_depth, int skip_count,
|
||||
constexpr uintptr_t kTooSmallAddr = 16 << 10;
|
||||
constexpr uintptr_t kFrameSizeThreshold = 128 << 10;
|
||||
|
||||
#ifdef __arm__
|
||||
// note, (32-bit, legacy) arm support is not entirely functional
|
||||
// w.r.t. frame-pointer-bases backtracing. Only recent clangs
|
||||
// generate "right" frame pointer setup and only with
|
||||
// --enable-frame-pointers. Current gcc's are hopeless (somewhat
|
||||
// older gcc's (circa gcc 6 or so) did something that looks right,
|
||||
// but not recent ones).
|
||||
constexpr uintptr_t kAlignment = 4;
|
||||
#else
|
||||
// This is simplistic yet. Here we're targeting x86, aarch64 and
|
||||
// riscv. They all have 16 bytes stack alignment (even 32 bit
|
||||
// riscv). This can be made more elaborate as we consider more
|
||||
// architectures.
|
||||
constexpr uintptr_t kAlignment = 16;
|
||||
#endif
|
||||
|
||||
uintptr_t initial_frame_addr = reinterpret_cast<uintptr_t>(initial_frame);
|
||||
if (((initial_frame_addr + sizeof(frame)) & (kAlignment - 1)) != 0) {
|
||||
@ -132,11 +174,14 @@ int capture(void **result, int max_depth, int skip_count,
|
||||
return i;
|
||||
}
|
||||
|
||||
frame* prev_f = nullptr;
|
||||
// Note, we assume here that this functions frame pointer is not
|
||||
// bogus. Which is true if this code is built with
|
||||
// -fno-omit-frame-pointer.
|
||||
frame* prev_f = reinterpret_cast<frame*>(__builtin_frame_address(0));
|
||||
frame *f = adjust_fp(reinterpret_cast<frame*>(initial_frame));
|
||||
|
||||
while (i < max_depth) {
|
||||
if (!TCMALLOC_UNSAFE_GENERIC_FP_STACKTRACE
|
||||
if (!UnsafeAccesses
|
||||
&& !CheckPageIsReadable(&f->parent, prev_f)) {
|
||||
break;
|
||||
}
|
||||
@ -147,6 +192,9 @@ int capture(void **result, int max_depth, int skip_count,
|
||||
}
|
||||
|
||||
if (i >= skip_count) {
|
||||
if (WithSizes) {
|
||||
sizes[i - skip_count] = reinterpret_cast<uintptr_t>(prev_f) - reinterpret_cast<uintptr_t>(f);
|
||||
}
|
||||
result[i - skip_count] = pc;
|
||||
}
|
||||
|
||||
@ -158,6 +206,7 @@ int capture(void **result, int max_depth, int skip_count,
|
||||
if (parent_frame_addr < kTooSmallAddr) {
|
||||
break;
|
||||
}
|
||||
|
||||
// stack grows towards smaller addresses, so if we didn't see
|
||||
// frame address increased (going from child to parent), it is bad
|
||||
// frame. We also test if frame is too big since that is another
|
||||
@ -175,6 +224,9 @@ int capture(void **result, int max_depth, int skip_count,
|
||||
|
||||
f = adjust_fp(reinterpret_cast<frame*>(parent_frame_addr));
|
||||
}
|
||||
if (WithSizes && i > 0 && skip_count == 0) {
|
||||
sizes[0] = 0;
|
||||
}
|
||||
return i - skip_count;
|
||||
}
|
||||
|
||||
@ -198,9 +250,24 @@ int capture(void **result, int max_depth, int skip_count,
|
||||
// int skip_count: how many stack pointers to skip before storing in result
|
||||
// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only)
|
||||
|
||||
// Set this to true to disable "probing" of addresses that are read to
|
||||
// make backtracing less-safe, but faster.
|
||||
#ifndef TCMALLOC_UNSAFE_GENERIC_FP_STACKTRACE
|
||||
#define TCMALLOC_UNSAFE_GENERIC_FP_STACKTRACE 0
|
||||
#endif
|
||||
|
||||
ENABLE_FP_ATTRIBUTE
|
||||
static int GET_STACK_TRACE_OR_FRAMES {
|
||||
if (max_depth == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if IS_STACK_FRAMES
|
||||
constexpr bool WithSizes = true;
|
||||
memset(sizes, 0, sizeof(*sizes) * max_depth);
|
||||
#else
|
||||
constexpr bool WithSizes = false;
|
||||
int * const sizes = nullptr;
|
||||
#endif
|
||||
|
||||
// one for this function
|
||||
@ -208,30 +275,79 @@ static int GET_STACK_TRACE_OR_FRAMES {
|
||||
|
||||
void* const * initial_pc = nullptr;
|
||||
void* initial_frame = __builtin_frame_address(0);
|
||||
int n;
|
||||
|
||||
#if IS_WITH_CONTEXT
|
||||
#if IS_WITH_CONTEXT && (HAVE_SYS_UCONTEXT_H || HAVE_UCONTEXT_H)
|
||||
if (ucp) {
|
||||
auto uc = static_cast<const ucontext_t*>(ucp);
|
||||
#ifdef __riscv
|
||||
initial_pc = reinterpret_cast<void* const *>(&uc->uc_mcontext.__gregs[REG_PC]);
|
||||
initial_frame = reinterpret_cast<void*>(uc->uc_mcontext.__gregs[REG_S0]);
|
||||
#elif __aarch64__
|
||||
initial_pc = reinterpret_cast<void* const *>(&uc->uc_mcontext.pc);
|
||||
initial_frame = reinterpret_cast<void*>(uc->uc_mcontext.regs[29]);
|
||||
#elif __i386__
|
||||
initial_pc = reinterpret_cast<void* const *>(&uc->uc_mcontext.gregs[REG_EIP]);
|
||||
initial_frame = reinterpret_cast<void*>(uc->uc_mcontext.gregs[REG_EBP]);
|
||||
|
||||
// We have to resort to macro since different architectures have
|
||||
// different concrete types for those args.
|
||||
#define SETUP_FRAME(pc_ptr, frame_addr) \
|
||||
do { \
|
||||
initial_pc = reinterpret_cast<void* const *>(pc_ptr); \
|
||||
initial_frame = reinterpret_cast<void*>(frame_addr); \
|
||||
} while (false)
|
||||
|
||||
#if __linux__ && __riscv
|
||||
SETUP_FRAME(&uc->uc_mcontext.__gregs[REG_PC], uc->uc_mcontext.__gregs[REG_S0]);
|
||||
#elif __linux__ && __aarch64__
|
||||
SETUP_FRAME(&uc->uc_mcontext.pc, uc->uc_mcontext.regs[29]);
|
||||
#elif __linux__ && __i386__
|
||||
SETUP_FRAME(&uc->uc_mcontext.gregs[REG_EIP], uc->uc_mcontext.gregs[REG_EBP]);
|
||||
#elif __linux__ && __x86_64__
|
||||
SETUP_FRAME(&uc->uc_mcontext.gregs[REG_RIP], uc->uc_mcontext.gregs[REG_RBP]);
|
||||
#elif __FreeBSD__ && __x86_64__
|
||||
SETUP_FRAME(&uc->uc_mcontext.mc_rip, uc->uc_mcontext.mc_rbp);
|
||||
#elif __FreeBSD__ && __i386__
|
||||
SETUP_FRAME(&uc->uc_mcontext.mc_eip, uc->uc_mcontext.mc_ebp);
|
||||
#elif __NetBSD__
|
||||
// NetBSD has those portable defines. Nice!
|
||||
SETUP_FRAME(&_UC_MACHINE_PC(uc), _UC_MACHINE_FP(uc));
|
||||
#elif defined(HAVE_GETPC)
|
||||
// So if we're dealing with architecture that doesn't belong to
|
||||
// one of cases above, we still have plenty more cases supported
|
||||
// by pc_from_ucontext facility we have for cpu profiler. We'll
|
||||
// get top-most instruction pointer from context, and rest will be
|
||||
// grabbed by frame pointer unwinding (with skipping active).
|
||||
//
|
||||
// It is a bit of a guess, but it works for x86 (makes
|
||||
// stacktrace_unittest ucontext test pass). Main idea is skip
|
||||
// count we have will skip just past 'sigreturn' trampoline or
|
||||
// whatever OS has. And those tend to be built without frame
|
||||
// pointers, which causes last "skipping" step to skip past the
|
||||
// frame we need. Also, this is how our CPU profiler is built. It
|
||||
// always places "pc from ucontext" first and then if necessary
|
||||
// deduplicates it from backtrace.
|
||||
result[0] = GetPC(*uc);
|
||||
#else
|
||||
initial_pc = reinterpret_cast<void* const *>(&uc->uc_mcontext.gregs[REG_RIP]);
|
||||
initial_frame = reinterpret_cast<void*>(uc->uc_mcontext.gregs[REG_RBP]);
|
||||
ucp = nullptr;
|
||||
#endif
|
||||
|
||||
#undef SETUP_FRAME
|
||||
}
|
||||
#else
|
||||
void * const ucp = nullptr;
|
||||
#endif // IS_WITH_CONTEXT
|
||||
|
||||
int n = stacktrace_generic_fp::capture(result, max_depth, skip_count,
|
||||
initial_frame, initial_pc);
|
||||
constexpr bool UnsafeAccesses = (TCMALLOC_UNSAFE_GENERIC_FP_STACKTRACE != 0);
|
||||
|
||||
if (ucp && !initial_pc) {
|
||||
// we're dealing with architecture that doesn't have proper ucontext integration
|
||||
n = stacktrace_generic_fp::capture<UnsafeAccesses, WithSizes>(
|
||||
result + 1, max_depth - 1, skip_count,
|
||||
initial_frame, initial_pc, sizes);
|
||||
n++;
|
||||
} else {
|
||||
n = stacktrace_generic_fp::capture<UnsafeAccesses, WithSizes>(
|
||||
result, max_depth, skip_count,
|
||||
initial_frame, initial_pc, sizes);
|
||||
}
|
||||
|
||||
if (n > 0) {
|
||||
// make sure we don't tail-call capture
|
||||
(void)*(const_cast<void * volatile *>(result));
|
||||
}
|
||||
|
||||
// make sure we don't tail-call capture
|
||||
(void)*(const_cast<void * volatile *>(result));
|
||||
return n;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user