From a9b734e3fa697aba64ec4ef3ef228ee532f23430 Mon Sep 17 00:00:00 2001 From: Aliaksey Kandratsenka Date: Sun, 26 Nov 2023 23:43:24 -0500 Subject: [PATCH] perform ucontext->pc variants testing in compile-time As part of cpu profiler we're extracting current PC (program counter) of out signal's ucontext. Different OS and hardware combinations have different ways for that. We had a list of variants that we tested at compile time and populated PC_FROM_UCONTEXT macro into config.h. It caused duplication and occasional mismatches between our autoconf and cmake bits. So this commit changes testing to be compile-time. We remove complexity from build system and add some to C++ source. We use SFINAE to find which of those variants compile (and we silently assume that 'compiles' implies 'works'; this is what config-time testing did too). Occasionally we'll face situations where several variants compile. And we couldn't handle this case in pure C++. So we have a small Ruby program that generates chain of inheritance among SFINAE-specialized class templates. This handles prioritization among variants. List of ucontext->pc extraction variants is mostly same. We dropped super-obsolete (circa Linux kernel 2.0) arm variant. And NetBSD case is now improved. We now use their nice architecture-independent macro instead of x86-specific access. --- CMakeLists.txt | 14 +- cmake/PCFromUContext.cmake | 67 ------ cmake/config.h.in | 3 - configure.ac | 21 +- m4/pc_from_ucontext.m4 | 100 -------- src/gen_getpc.rb | 197 ++++++++++++++++ src/getpc-inl.h | 396 ++++++++++++++++++++++++++++++++ src/getpc.h | 36 ++- src/stacktrace_generic_fp-inl.h | 21 +- src/tests/getpc_test.cc | 13 +- src/windows/config.h | 3 - 11 files changed, 666 insertions(+), 205 deletions(-) delete mode 100644 cmake/PCFromUContext.cmake delete mode 100644 m4/pc_from_ucontext.m4 create mode 100755 src/gen_getpc.rb create mode 100644 src/getpc-inl.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 500884e..ed6165a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -60,7 +60,6 @@ include(GNUInstallDirs) list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) include(DefineTargetVariables) -include(PCFromUContext) define_target_variables() @@ -191,6 +190,9 @@ check_include_file("grp.h" HAVE_GRP_H) # for heapchecker_unittest check_include_file("pwd.h" HAVE_PWD_H) # for heapchecker_unittest check_include_file("sys/resource.h" HAVE_SYS_RESOURCE_H) # for memalign_unittest.cc check_include_file("sys/cdefs.h" HAVE_SYS_CDEFS_H) # Where glibc defines __THROW +check_include_file("ucontext.h" HAVE_UCONTEXT_H) +check_include_file("sys/ucontext.h" HAVE_SYS_UCONTEXT_H) +check_include_file("cygwin/signal.h" HAVE_CYGWIN_SIGNAL_H) check_include_file("unistd.h" HAVE_UNISTD_H) # We also need /, but we get those from @@ -220,16 +222,6 @@ if(NOT WIN32) set(HAVE_MMAP 1) endif() -# We want to access the "PC" (Program Counter) register from a struct -# ucontext. Every system has its own way of doing that. We try all the -# possibilities we know about. Note REG_PC should come first (REG_RIP -# is also defined on solaris, but does the wrong thing). But don't -# bother if we're not doing cpu-profiling. -# [*] means that we've not actually tested one of these systems -if (GPERFTOOLS_BUILD_CPU_PROFILER) - pc_from_ucontext(PC_FROM_UCONTEXT_DEF) -endif () - # Some tests test the behavior of .so files, and only make sense for dynamic. option(GPERFTOOLS_BUILD_STATIC "Enable Static" ON) diff --git a/cmake/PCFromUContext.cmake b/cmake/PCFromUContext.cmake deleted file mode 100644 index b1077ff..0000000 --- a/cmake/PCFromUContext.cmake +++ /dev/null @@ -1,67 +0,0 @@ -include(CheckCSourceCompiles) -include(CheckIncludeFile) - -macro(pc_from_ucontext variable) - set(HAVE_${variable} OFF) - check_include_file("ucontext.h" HAVE_UCONTEXT_H) - if(EXISTS /etc/redhat-release) - set(redhat7_release_pattern "Red Hat Linux release 7") - file(STRINGS /etc/redhat-release redhat_release_match - LIMIT_COUNT 1 - REGEX ${redhat7_release_pattern}) - if(redhat_release_match MATCHES ${redhat7_release_pattern}) - set(HAVE_SYS_UCONTEXT_H OFF) - else() - check_include_file("sys/ucontext.h" HAVE_SYS_UCONTEXT_H) - endif() - else() - check_include_file("sys/ucontext.h" HAVE_SYS_UCONTEXT_H) - endif() - check_include_file("cygwin/signal.h" HAVE_CYGWIN_SIGNAL_H) - - set(pc_fields - "uc_mcontext.gregs[REG_PC]" # Solaris x86 (32 + 64 bit) - "uc_mcontext.gregs[REG_EIP]" # Linux (i386) - "uc_mcontext.gregs[REG_RIP]" # Linux (x86_64) - "uc_mcontext.sc_ip" # Linux (ia64) - "uc_mcontext.__pc" # Linux (loongarch64) - "uc_mcontext.pc" # Linux (mips) - "uc_mcontext.uc_regs->gregs[PT_NIP]" # Linux (ppc) - "uc_mcontext.__gregs[REG_PC]" # Linux (riscv64) - "uc_mcontext.psw.addr" # Linux (s390) - "uc_mcontext.gregs[R15]" # Linux (arm old [untested]) - "uc_mcontext.arm_pc" # Linux (arm arch 5) - "uc_mcontext.gp_regs[PT_NIP]" # Suse SLES 11 (ppc64) - "uc_mcontext.mc_eip" # FreeBSD (i386) - "uc_mcontext.mc_srr0" # FreeBSD (powerpc, powerpc64) - "uc_mcontext.mc_rip" # FreeBSD (x86_64) - "uc_mcontext.__gregs[_REG_EIP]" # NetBSD (i386) - "uc_mcontext.__gregs[_REG_RIP]" # NetBSD (x86_64) - "uc_mcontext->ss.eip" # OS X (i386, <=10.4) - "uc_mcontext->__ss.__eip" # OS X (i386, >=10.5) - "uc_mcontext->ss.rip" # OS X (x86_64, <=10.4) - "uc_mcontext->__ss.__rip" # OS X (x86_64, >=10.5) - "uc_mcontext->ss.srr0" # OS X (ppc, ppc64, <=10.4 [untested]) - "uc_mcontext->__ss.__srr0" # OS X (ppc, ppc64, >=10.5 [untested]) - "uc_mcontext->__ss.__pc") # OS X (arm64, >=11 [untested]) - - set(CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE=1) - if(HAVE_CYGWIN_SIGNAL_H) - set(_inc "cygwin/signal.h") - elseif(HAVE_SYS_UCONTEXT_H) - set(_inc "sys/ucontext.h") - elseif(HAVE_UCONTEXT_H) - set(_inc "ucontext.h") - endif() - foreach(pc_field IN LISTS pc_fields) - string(MAKE_C_IDENTIFIER ${pc_field} pc_field_id) - check_c_source_compiles( - "#include <${_inc}>\nint main() { ucontext_t u; return u.${pc_field} == 0; }" - HAVE_${pc_field_id}) - if(HAVE_${pc_field_id}) - set(HAVE_${variable} ON) - set(${variable} ${pc_field}) - break() - endif() - endforeach() -endmacro() diff --git a/cmake/config.h.in b/cmake/config.h.in index 33e0c26..4bdbbb4 100644 --- a/cmake/config.h.in +++ b/cmake/config.h.in @@ -206,9 +206,6 @@ /* Define to the version of this package. */ #define PACKAGE_VERSION "@PROJECT_VERSION@" -/* How to access the PC from a struct ucontext */ -#define PC_FROM_UCONTEXT @PC_FROM_UCONTEXT_DEF@ - /* Always the empty-string on non-windows systems. On windows, should be "__declspec(dllexport)". This way, when we compile the dll, we export our functions/classes. It's safe to define this here because config.h is only diff --git a/configure.ac b/configure.ac index 8242430..d464757 100644 --- a/configure.ac +++ b/configure.ac @@ -270,8 +270,11 @@ AC_CHECK_HEADERS(grp.h) # for heapchecker_unittest AC_CHECK_HEADERS(pwd.h) # for heapchecker_unittest AC_CHECK_HEADERS(sys/resource.h) # for memalign_unittest.cc AC_CHECK_HEADERS(sys/cdefs.h) # Where glibc defines __THROW -# We also need /, but we get those from -# AC_PC_FROM_UCONTEXT, below. + +AC_CHECK_HEADERS(sys/ucontext.h) +AC_CHECK_HEADERS(ucontext.h) +AC_CHECK_HEADERS(cygwin/signal.h) # ucontext on cywgin +AC_CHECK_HEADERS(asm/ptrace.h) # get ptrace macros, e.g. PT_NIP # check for socketpair, some system, such as QNX, need link in an socket library to use it AC_SEARCH_LIBS([socketpair], [socket]) @@ -305,14 +308,14 @@ case "$host" in esac # We want to access the "PC" (Program Counter) register from a struct -# ucontext. Every system has its own way of doing that. We try all the -# possibilities we know about. Note REG_PC should come first (REG_RIP -# is also defined on solaris, but does the wrong thing). But don't -# bother if we're not doing cpu-profiling. -# [*] means that we've not actually tested one of these systems +# ucontext. Every system has its own way of doing that. But in case +# we're dealing with unknown system, we have to check if GetPC +# actually works. But don't bother if we're not doing cpu-profiling. if test "$enable_cpu_profiler" = yes; then - AC_PC_FROM_UCONTEXT(AC_MSG_WARN(Could not find the PC. Will not try to compile libprofiler...); - enable_cpu_profiler=no) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include "src/getpc.h"]], [GetPC({})])], + [], + [AC_MSG_WARN(Could not find the PC. Will not try to compile libprofiler...) + enable_cpu_profiler=no]) fi # Some tests test the behavior of .so files, and only make sense for dynamic. diff --git a/m4/pc_from_ucontext.m4 b/m4/pc_from_ucontext.m4 deleted file mode 100644 index cc98641..0000000 --- a/m4/pc_from_ucontext.m4 +++ /dev/null @@ -1,100 +0,0 @@ -# We want to access the "PC" (Program Counter) register from a struct -# ucontext. Every system has its own way of doing that. We try all the -# possibilities we know about. Note REG_PC should come first (REG_RIP -# is also defined on solaris, but does the wrong thing). - -# OpenBSD doesn't have ucontext.h, but we can get PC from ucontext_t -# by using signal.h. - -# The first argument of AC_PC_FROM_UCONTEXT will be invoked when we -# cannot find a way to obtain PC from ucontext. - -AC_DEFUN([AC_PC_FROM_UCONTEXT], - [AC_CHECK_HEADERS(ucontext.h) - # Redhat 7 has , but it barfs if we #include it directly - # (this was fixed in later redhats). works fine, so use that. - if grep "Red Hat Linux release 7" /etc/redhat-release >/dev/null 2>&1; then - AC_DEFINE(HAVE_SYS_UCONTEXT_H, 0, [ is broken on redhat 7]) - ac_cv_header_sys_ucontext_h=no - else - AC_CHECK_HEADERS(sys/ucontext.h) # ucontext on OS X 10.6 (at least) - fi - AC_CHECK_HEADERS(cygwin/signal.h) # ucontext on cywgin - AC_CHECK_HEADERS(asm/ptrace.h) # get ptrace macros, e.g. PT_NIP - AC_MSG_CHECKING([how to access the program counter from a struct ucontext]) - pc_fields=" uc_mcontext.gregs[[REG_PC]]" # Solaris x86 (32 + 64 bit) - pc_fields="$pc_fields uc_mcontext.gregs[[REG_EIP]]" # Linux (i386) - pc_fields="$pc_fields uc_mcontext.gregs[[REG_RIP]]" # Linux (x86_64) - pc_fields="$pc_fields uc_mcontext.sc_ip" # Linux (ia64) - pc_fields="$pc_fields uc_mcontext.__pc" # Linux (loongarch64) - pc_fields="$pc_fields uc_mcontext.pc" # Linux (mips) - pc_fields="$pc_fields uc_mcontext.uc_regs->gregs[[PT_NIP]]" # Linux (ppc) - pc_fields="$pc_fields uc_mcontext.__gregs[[REG_PC]]" # Linux (riscv64) - pc_fields="$pc_fields uc_mcontext.psw.addr" # Linux (s390) - pc_fields="$pc_fields uc_mcontext.gregs[[R15]]" # Linux (arm old [untested]) - pc_fields="$pc_fields uc_mcontext.arm_pc" # Linux (arm arch 5) - pc_fields="$pc_fields uc_mcontext.gp_regs[[PT_NIP]]" # Suse SLES 11 (ppc64) - pc_fields="$pc_fields uc_mcontext.mc_eip" # FreeBSD (i386) - pc_fields="$pc_fields uc_mcontext.mc_srr0" # FreeBSD (powerpc, powerpc64) - pc_fields="$pc_fields uc_mcontext.mc_rip" # FreeBSD (x86_64) - pc_fields="$pc_fields uc_mcontext.__gregs[[_REG_EIP]]" # NetBSD (i386) - pc_fields="$pc_fields uc_mcontext.__gregs[[_REG_RIP]]" # NetBSD (x86_64) - pc_fields="$pc_fields uc_mcontext->ss.eip" # OS X (i386, <=10.4) - pc_fields="$pc_fields uc_mcontext->__ss.__eip" # OS X (i386, >=10.5) - pc_fields="$pc_fields uc_mcontext->ss.rip" # OS X (x86_64) - pc_fields="$pc_fields uc_mcontext->__ss.__rip" # OS X (>=10.5 [untested]) - pc_fields="$pc_fields uc_mcontext->ss.srr0" # OS X (ppc, ppc64 [untested]) - pc_fields="$pc_fields uc_mcontext->__ss.__srr0" # OS X (>=10.5 [untested]) - pc_fields="$pc_fields uc_mcontext->__ss.__pc" # OS X (arm64) - pc_field_found=false - for pc_field in $pc_fields; do - if ! $pc_field_found; then - # Prefer sys/ucontext.h to ucontext.h, for OS X's sake. - if test "x$ac_cv_header_cygwin_signal_h" = xyes; then - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#define _GNU_SOURCE 1 - #include ]], [[ucontext_t u; return u.$pc_field == 0;]])],[AC_DEFINE_UNQUOTED(PC_FROM_UCONTEXT, $pc_field, - How to access the PC from a struct ucontext) - AC_MSG_RESULT($pc_field) - pc_field_found=true],[]) - elif test "x$ac_cv_header_asm_ptrace_h" = xyes -a "x$ac_cv_header_sys_ucontext_h" = xyes; then - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#define _GNU_SOURCE 1 - #include - #include ]], [[ucontext_t u; return u.$pc_field == 0;]])],[AC_DEFINE_UNQUOTED(PC_FROM_UCONTEXT, $pc_field, - How to access the PC from a struct ucontext) - AC_MSG_RESULT($pc_field) - pc_field_found=true],[]) - elif test "x$ac_cv_header_sys_ucontext_h" = xyes; then - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#define _GNU_SOURCE 1 - #include ]], [[ucontext_t u; return u.$pc_field == 0;]])],[AC_DEFINE_UNQUOTED(PC_FROM_UCONTEXT, $pc_field, - How to access the PC from a struct ucontext) - AC_MSG_RESULT($pc_field) - pc_field_found=true],[]) - elif test "x$ac_cv_header_ucontext_h" = xyes; then - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#define _GNU_SOURCE 1 - #include ]], [[ucontext_t u; return u.$pc_field == 0;]])],[AC_DEFINE_UNQUOTED(PC_FROM_UCONTEXT, $pc_field, - How to access the PC from a struct ucontext) - AC_MSG_RESULT($pc_field) - pc_field_found=true],[]) - else # hope some standard header gives it to us - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]], [[ucontext_t u; return u.$pc_field == 0;]])],[AC_DEFINE_UNQUOTED(PC_FROM_UCONTEXT, $pc_field, - How to access the PC from a struct ucontext) - AC_MSG_RESULT($pc_field) - pc_field_found=true],[]) - fi - fi - done - if ! $pc_field_found; then - pc_fields=" sc_eip" # OpenBSD (i386) - pc_fields="$pc_fields sc_rip" # OpenBSD (x86_64) - for pc_field in $pc_fields; do - if ! $pc_field_found; then - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include ]], [[ucontext_t u; return u.$pc_field == 0;]])],[AC_DEFINE_UNQUOTED(PC_FROM_UCONTEXT, $pc_field, - How to access the PC from a struct ucontext) - AC_MSG_RESULT($pc_field) - pc_field_found=true],[]) - fi - done - fi - if ! $pc_field_found; then - [$1] - fi]) diff --git a/src/gen_getpc.rb b/src/gen_getpc.rb new file mode 100755 index 0000000..02713a8 --- /dev/null +++ b/src/gen_getpc.rb @@ -0,0 +1,197 @@ +#!/usr/bin/env ruby +# Copyright (c) 2023, gperftools Contributors +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +require 'digest' + +# This is main logic. If you want to add new ucontext-to-pc accessor, add it here +def dwm!(d) + d.template "NetBSD has really nice portable macros", :_UC_MACHINE_PC do |uc| + "_UC_MACHINE_PC(#{uc})" + end + + d.with_prefix "uc_mcontext." do + # first arg is ifdef, second is field (with prefix prepended) and third is comment + d.ifdef :REG_PC, "gregs[REG_PC]", "Solaris/x86" + d.ifdef :REG_EIP, "gregs[REG_EIP]", "Linux/i386" + d.ifdef :REG_RIP, "gregs[REG_RIP]", "Linux/amd64" + d.field "sc_ip", "Linux/ia64" + d.field "__pc", "Linux/loongarch64" + d.field "pc", "Linux/{mips,aarch64}" + d.ifdef :PT_NIP, "uc_regs->gregs[PT_NIP]", "Linux/ppc" + d.ifdef :PT_NIP, "gp_regs[PT_NIP]", "Linux/ppc" + d.ifdef :REG_PC, "__gregs[REG_PC]", "Linux/riscv" + d.field "psw.addr", "Linux/s390" + d.field "arm_pc", "Linux/arm (32-bit; legacy)" + d.field "mc_eip", "FreeBSD/i386" + d.field "mc_srr0", "FreeBSD/ppc" + d.field "mc_rip", "FreeBSD/x86_64" + end + + d.with_prefix "uc_mcontext->" do + d.field "ss.eip", "OS X (i386, <=10.4)" + d.field "__ss.__eip", "OS X (i386, >=10.5)" + d.field "ss.rip", "OS X (x86_64)" + d.field "__ss.__rip", "OS X (>=10.5 [untested])" + d.field "ss.srr0", "OS X (ppc, ppc64 [untested])" + d.field "__ss.__srr0", "OS X (>=10.5 [untested])" + d.field "__ss.__pc", "OS X (arm64)" + end + + d.field "sc_eip", "OpenBSD/i386" + d.field "sc_rip", "OpenBSD/x86_64" +end + +# this is generator logic +class Definer + def initialize + @prefix = "" + @accessors = {} + puts(< combos are +// automagically filtered out (via SFINAE). + +// Each known case is represented as a template class. For SFINAE +// reasons we masquerade ucontext_t type behind U template +// parameter. And we also parameterize by parent class. This allows us +// to arrange all template instantiations in a single ordered chain of +// inheritance. See RawUCToPC below. + +// Note, we do anticipate that most times exactly one of those access +// methods works. But we're prepared there could be several. In +// particular, according to previous comments Solaris/x86 also has +// REG_RIP defined, but it is somehow wrong. So we're careful about +// preserving specific order. We couldn't handle this "multiplicity" +// aspect in pure C++, so we use code generation. + +namespace internal { + +struct Empty { +#ifdef DEFINE_TRIVIAL_GET +#define HAVE_TRIVIAL_GET + // special thing for stacktrace_generic_fp-inl which wants no-op case + static void* Get(...) { + return nullptr; + } +#endif +}; +HERE + end + + def with_prefix(prefix) + old_prefix = @prefix + @prefix = @prefix.dup + prefix + yield + ensure + @prefix = old_prefix + end + + def ifdef define, field, comment + field field, comment, define + end + + def template comment, define = nil, &block + field block, comment, define + end + + def field field, comment, define = nil + tmpl = if field.kind_of? Proc + raise unless @prefix.empty? + field + else + proc do |uc| + "#{uc}->#{@prefix + field}" + end + end + fingerprint = Digest::MD5.hexdigest(tmpl["%"] + "@" + comment)[0,8] + + maybe_open_ifdef = "\n#ifdef #{define}" if define + maybe_close_ifdef = "\n#endif // #{define}" if define + + raise "conflict!" if @accessors.include? fingerprint + + if define + @accessors[fingerprint] = comment + " (with #ifdef #{define})" + else + @accessors[fingerprint] = comment + end + + puts(< +struct get_#{fingerprint} : public P { +};#{maybe_open_ifdef} +template +struct get_#{fingerprint}> : public P { + static void* Get(const U* uc) { + // #{comment} + return (void*)(#{tmpl[:uc]}); + } +};#{maybe_close_ifdef} +HERE + end + + def finalize! + puts + puts(<;" + prev = "g_#{fingerprint}" + end + puts(<getpc-inl.h if you want to +// update. (And submit both files) + +// What this file does? We have several possible ways of fetching PC +// (program counter) of signal's ucontext. We explicitly choose to +// avoid ifdef-ing specific OSes (or even specific versions), to +// increase our chances that stuff simply works. Comments below refer +// to OS/architecture combos for documentation purposes, but what +// works is what is used. + +// How it does it? It uses lightweight C++ template magic where +// "wrong" ucontext_t{nullptr}-> combos are +// automagically filtered out (via SFINAE). + +// Each known case is represented as a template class. For SFINAE +// reasons we masquerade ucontext_t type behind U template +// parameter. And we also parameterize by parent class. This allows us +// to arrange all template instantiations in a single ordered chain of +// inheritance. See RawUCToPC below. + +// Note, we do anticipate that most times exactly one of those access +// methods works. But we're prepared there could be several. In +// particular, according to previous comments Solaris/x86 also has +// REG_RIP defined, but it is somehow wrong. So we're careful about +// preserving specific order. We couldn't handle this "multiplicity" +// aspect in pure C++, so we use code generation. + +namespace internal { + +struct Empty { +#ifdef DEFINE_TRIVIAL_GET +#define HAVE_TRIVIAL_GET + // special thing for stacktrace_generic_fp-inl which wants no-op case + static void* Get(...) { + return nullptr; + } +#endif +}; + +// NetBSD has really nice portable macros +template +struct get_c47a30af : public P { +}; +#ifdef _UC_MACHINE_PC +template +struct get_c47a30af> : public P { + static void* Get(const U* uc) { + // NetBSD has really nice portable macros + return (void*)(_UC_MACHINE_PC(uc)); + } +}; +#endif // _UC_MACHINE_PC + +// Solaris/x86 +template +struct get_c4719e8d : public P { +}; +#ifdef REG_PC +template +struct get_c4719e8duc_mcontext.gregs[REG_PC])>> : public P { + static void* Get(const U* uc) { + // Solaris/x86 + return (void*)(uc->uc_mcontext.gregs[REG_PC]); + } +}; +#endif // REG_PC + +// Linux/i386 +template +struct get_278cba85 : public P { +}; +#ifdef REG_EIP +template +struct get_278cba85uc_mcontext.gregs[REG_EIP])>> : public P { + static void* Get(const U* uc) { + // Linux/i386 + return (void*)(uc->uc_mcontext.gregs[REG_EIP]); + } +}; +#endif // REG_EIP + +// Linux/amd64 +template +struct get_b49f2593 : public P { +}; +#ifdef REG_RIP +template +struct get_b49f2593uc_mcontext.gregs[REG_RIP])>> : public P { + static void* Get(const U* uc) { + // Linux/amd64 + return (void*)(uc->uc_mcontext.gregs[REG_RIP]); + } +}; +#endif // REG_RIP + +// Linux/ia64 +template +struct get_8fda99d3 : public P { +}; +template +struct get_8fda99d3uc_mcontext.sc_ip)>> : public P { + static void* Get(const U* uc) { + // Linux/ia64 + return (void*)(uc->uc_mcontext.sc_ip); + } +}; + +// Linux/loongarch64 +template +struct get_4e9b682d : public P { +}; +template +struct get_4e9b682duc_mcontext.__pc)>> : public P { + static void* Get(const U* uc) { + // Linux/loongarch64 + return (void*)(uc->uc_mcontext.__pc); + } +}; + +// Linux/{mips,aarch64} +template +struct get_b94b7246 : public P { +}; +template +struct get_b94b7246uc_mcontext.pc)>> : public P { + static void* Get(const U* uc) { + // Linux/{mips,aarch64} + return (void*)(uc->uc_mcontext.pc); + } +}; + +// Linux/ppc +template +struct get_d0eeceae : public P { +}; +#ifdef PT_NIP +template +struct get_d0eeceaeuc_mcontext.uc_regs->gregs[PT_NIP])>> : public P { + static void* Get(const U* uc) { + // Linux/ppc + return (void*)(uc->uc_mcontext.uc_regs->gregs[PT_NIP]); + } +}; +#endif // PT_NIP + +// Linux/ppc +template +struct get_a81f6801 : public P { +}; +#ifdef PT_NIP +template +struct get_a81f6801uc_mcontext.gp_regs[PT_NIP])>> : public P { + static void* Get(const U* uc) { + // Linux/ppc + return (void*)(uc->uc_mcontext.gp_regs[PT_NIP]); + } +}; +#endif // PT_NIP + +// Linux/riscv +template +struct get_24e794ef : public P { +}; +#ifdef REG_PC +template +struct get_24e794efuc_mcontext.__gregs[REG_PC])>> : public P { + static void* Get(const U* uc) { + // Linux/riscv + return (void*)(uc->uc_mcontext.__gregs[REG_PC]); + } +}; +#endif // REG_PC + +// Linux/s390 +template +struct get_d9a75ed3 : public P { +}; +template +struct get_d9a75ed3uc_mcontext.psw.addr)>> : public P { + static void* Get(const U* uc) { + // Linux/s390 + return (void*)(uc->uc_mcontext.psw.addr); + } +}; + +// Linux/arm (32-bit; legacy) +template +struct get_07114491 : public P { +}; +template +struct get_07114491uc_mcontext.arm_pc)>> : public P { + static void* Get(const U* uc) { + // Linux/arm (32-bit; legacy) + return (void*)(uc->uc_mcontext.arm_pc); + } +}; + +// FreeBSD/i386 +template +struct get_9be162e6 : public P { +}; +template +struct get_9be162e6uc_mcontext.mc_eip)>> : public P { + static void* Get(const U* uc) { + // FreeBSD/i386 + return (void*)(uc->uc_mcontext.mc_eip); + } +}; + +// FreeBSD/ppc +template +struct get_2812b129 : public P { +}; +template +struct get_2812b129uc_mcontext.mc_srr0)>> : public P { + static void* Get(const U* uc) { + // FreeBSD/ppc + return (void*)(uc->uc_mcontext.mc_srr0); + } +}; + +// FreeBSD/x86_64 +template +struct get_5bb1da03 : public P { +}; +template +struct get_5bb1da03uc_mcontext.mc_rip)>> : public P { + static void* Get(const U* uc) { + // FreeBSD/x86_64 + return (void*)(uc->uc_mcontext.mc_rip); + } +}; + +// OS X (i386, <=10.4) +template +struct get_880f83fe : public P { +}; +template +struct get_880f83feuc_mcontext->ss.eip)>> : public P { + static void* Get(const U* uc) { + // OS X (i386, <=10.4) + return (void*)(uc->uc_mcontext->ss.eip); + } +}; + +// OS X (i386, >=10.5) +template +struct get_92fcd89a : public P { +}; +template +struct get_92fcd89auc_mcontext->__ss.__eip)>> : public P { + static void* Get(const U* uc) { + // OS X (i386, >=10.5) + return (void*)(uc->uc_mcontext->__ss.__eip); + } +}; + +// OS X (x86_64) +template +struct get_773e27c8 : public P { +}; +template +struct get_773e27c8uc_mcontext->ss.rip)>> : public P { + static void* Get(const U* uc) { + // OS X (x86_64) + return (void*)(uc->uc_mcontext->ss.rip); + } +}; + +// OS X (>=10.5 [untested]) +template +struct get_6627078a : public P { +}; +template +struct get_6627078auc_mcontext->__ss.__rip)>> : public P { + static void* Get(const U* uc) { + // OS X (>=10.5 [untested]) + return (void*)(uc->uc_mcontext->__ss.__rip); + } +}; + +// OS X (ppc, ppc64 [untested]) +template +struct get_da992aca : public P { +}; +template +struct get_da992acauc_mcontext->ss.srr0)>> : public P { + static void* Get(const U* uc) { + // OS X (ppc, ppc64 [untested]) + return (void*)(uc->uc_mcontext->ss.srr0); + } +}; + +// OS X (>=10.5 [untested]) +template +struct get_cce47a40 : public P { +}; +template +struct get_cce47a40uc_mcontext->__ss.__srr0)>> : public P { + static void* Get(const U* uc) { + // OS X (>=10.5 [untested]) + return (void*)(uc->uc_mcontext->__ss.__srr0); + } +}; + +// OS X (arm64) +template +struct get_0a082e42 : public P { +}; +template +struct get_0a082e42uc_mcontext->__ss.__pc)>> : public P { + static void* Get(const U* uc) { + // OS X (arm64) + return (void*)(uc->uc_mcontext->__ss.__pc); + } +}; + +// OpenBSD/i386 +template +struct get_3baa113a : public P { +}; +template +struct get_3baa113asc_eip)>> : public P { + static void* Get(const U* uc) { + // OpenBSD/i386 + return (void*)(uc->sc_eip); + } +}; + +// OpenBSD/x86_64 +template +struct get_79f33851 : public P { +}; +template +struct get_79f33851sc_rip)>> : public P { + static void* Get(const U* uc) { + // OpenBSD/x86_64 + return (void*)(uc->sc_rip); + } +}; + +inline void* RawUCToPC(const ucontext_t* uc) { + // OpenBSD/x86_64 + using g_79f33851 = get_79f33851; + // OpenBSD/i386 + using g_3baa113a = get_3baa113a; + // OS X (arm64) + using g_0a082e42 = get_0a082e42; + // OS X (>=10.5 [untested]) + using g_cce47a40 = get_cce47a40; + // OS X (ppc, ppc64 [untested]) + using g_da992aca = get_da992aca; + // OS X (>=10.5 [untested]) + using g_6627078a = get_6627078a; + // OS X (x86_64) + using g_773e27c8 = get_773e27c8; + // OS X (i386, >=10.5) + using g_92fcd89a = get_92fcd89a; + // OS X (i386, <=10.4) + using g_880f83fe = get_880f83fe; + // FreeBSD/x86_64 + using g_5bb1da03 = get_5bb1da03; + // FreeBSD/ppc + using g_2812b129 = get_2812b129; + // FreeBSD/i386 + using g_9be162e6 = get_9be162e6; + // Linux/arm (32-bit; legacy) + using g_07114491 = get_07114491; + // Linux/s390 + using g_d9a75ed3 = get_d9a75ed3; + // Linux/riscv (with #ifdef REG_PC) + using g_24e794ef = get_24e794ef; + // Linux/ppc (with #ifdef PT_NIP) + using g_a81f6801 = get_a81f6801; + // Linux/ppc (with #ifdef PT_NIP) + using g_d0eeceae = get_d0eeceae; + // Linux/{mips,aarch64} + using g_b94b7246 = get_b94b7246; + // Linux/loongarch64 + using g_4e9b682d = get_4e9b682d; + // Linux/ia64 + using g_8fda99d3 = get_8fda99d3; + // Linux/amd64 (with #ifdef REG_RIP) + using g_b49f2593 = get_b49f2593; + // Linux/i386 (with #ifdef REG_EIP) + using g_278cba85 = get_278cba85; + // Solaris/x86 (with #ifdef REG_PC) + using g_c4719e8d = get_c4719e8d; + // NetBSD has really nice portable macros (with #ifdef _UC_MACHINE_PC) + using g_c47a30af = get_c47a30af; + return g_c47a30af::Get(uc); +} + +} // namespace internal diff --git a/src/getpc.h b/src/getpc.h index 6cd7493..87d18b6 100644 --- a/src/getpc.h +++ b/src/getpc.h @@ -44,7 +44,12 @@ #ifndef BASE_GETPC_H_ #define BASE_GETPC_H_ -#include "config.h" +// Note: we include this from one of configure script C++ tests as +// part of verifying that we're able to build CPU profiler. I.e. we +// cannot include config.h as we normally do, since it isn't produced +// yet, but those HAVE_XYZ defines are available, so including +// ucontext etc stuff works. It's usage from profiler.cc (and +// stacktrace_generic_fp-inl.h) is after config.h is included. // On many linux systems, we may need _GNU_SOURCE to get access to // the defined constants that define the register we want to see (eg @@ -63,17 +68,32 @@ typedef ucontext ucontext_t; #endif -// If this doesn't compile, it's probably because -// PC_FROM_UCONTEXT is the empty string. You need to figure out -// the right value for your system, and add it to the list in -// configure.ac (or set it manually in your config.h). +namespace tcmalloc { +namespace getpc { + +// std::void_t is C++ 14. So we steal this from +// https://en.cppreference.com/w/cpp/types/void_t +template +struct make_void { typedef void type; }; +template +using void_t = typename make_void::type; + +#include "getpc-inl.h" + +} // namespace getpc +} // namespace tcmalloc + +// If this doesn't compile, you need to figure out the right value for +// your system, and add it to the list above. inline void* GetPC(const ucontext_t& signal_ucontext) { + void* retval = tcmalloc::getpc::internal::RawUCToPC(&signal_ucontext); + #if defined(__s390__) && !defined(__s390x__) // Mask out the AMODE31 bit from the PC recorded in the context. - return (void*)((unsigned long)signal_ucontext.PC_FROM_UCONTEXT & 0x7fffffffUL); -#else - return (void*)signal_ucontext.PC_FROM_UCONTEXT; // defined in config.h + retval = (void*)((unsigned long)retval & 0x7fffffffUL); #endif + + return retval; } #endif // BASE_GETPC_H_ diff --git a/src/stacktrace_generic_fp-inl.h b/src/stacktrace_generic_fp-inl.h index f7f5af3..12f93bb 100644 --- a/src/stacktrace_generic_fp-inl.h +++ b/src/stacktrace_generic_fp-inl.h @@ -44,8 +44,15 @@ // This is only used on OS-es with mmap support. #include -#if defined(PC_FROM_UCONTEXT) && (HAVE_SYS_UCONTEXT_H || HAVE_UCONTEXT_H) +#if HAVE_SYS_UCONTEXT_H || HAVE_UCONTEXT_H + +#define DEFINE_TRIVIAL_GET #include "getpc.h" + +#if !defined(HAVE_TRIVIAL_GET) && !defined(__NetBSD__) +#error sanity +#endif + #define HAVE_GETPC 1 #endif @@ -164,9 +171,9 @@ int capture(void **result, int max_depth, int skip_count, #ifdef __arm__ // note, (32-bit, legacy) arm support is not entirely functional - // w.r.t. frame-pointer-bases backtracing. Only recent clangs + // w.r.t. frame-pointer-based backtracing. Only recent clangs // generate "right" frame pointer setup and only with - // --enable-frame-pointers. Current gcc's are hopeless (somewhat + // --enable-frame-pointers. Current gcc-s are hopeless (somewhat // older gcc's (circa gcc 6 or so) did something that looks right, // but not recent ones). constexpr uintptr_t kAlignment = 4; @@ -309,6 +316,9 @@ static int GET_STACK_TRACE_OR_FRAMES { SETUP_FRAME(&uc->uc_mcontext.__gregs[REG_PC], uc->uc_mcontext.__gregs[REG_S0]); #elif __linux__ && __aarch64__ SETUP_FRAME(&uc->uc_mcontext.pc, uc->uc_mcontext.regs[29]); +#elif __linux__ && __arm__ + // Note: arm's frame pointer support is borked in recent GCC-s. + SETUP_FRAME(&uc->uc_mcontext.arm_pc, uc->uc_mcontext.arm_fp); #elif __linux__ && __i386__ SETUP_FRAME(&uc->uc_mcontext.gregs[REG_EIP], uc->uc_mcontext.gregs[REG_EBP]); #elif __linux__ && __x86_64__ @@ -335,7 +345,12 @@ static int GET_STACK_TRACE_OR_FRAMES { // frame we need. Also, this is how our CPU profiler is built. It // always places "pc from ucontext" first and then if necessary // deduplicates it from backtrace. + result[0] = GetPC(*uc); + if (result[0] == nullptr) { + // This OS/HW combo actually lacks known way to extract PC. + ucp = nullptr; + } #else ucp = nullptr; #endif diff --git a/src/tests/getpc_test.cc b/src/tests/getpc_test.cc index d75e40b..a37a07e 100644 --- a/src/tests/getpc_test.cc +++ b/src/tests/getpc_test.cc @@ -37,6 +37,9 @@ #include "config.h" #include "getpc.h" // should be first to get the _GNU_SOURCE dfn + +#include "base/basictypes.h" + #include #include #include @@ -46,9 +49,17 @@ static volatile void* getpc_retval = NULL; // what GetPC returns static volatile bool prof_handler_called = false; +extern "C" { + // This helps us inspect codegen of GetPC function, just in case. + ATTRIBUTE_NOINLINE + void* DoGetPC(const ucontext_t* uc) { + return GetPC(*uc); + } +} + static void prof_handler(int sig, siginfo_t*, void* signal_ucontext) { if (!prof_handler_called) - getpc_retval = GetPC(*reinterpret_cast(signal_ucontext)); + getpc_retval = DoGetPC(reinterpret_cast(signal_ucontext)); prof_handler_called = true; // only store the retval once } diff --git a/src/windows/config.h b/src/windows/config.h index db28c1f..96cd35f 100644 --- a/src/windows/config.h +++ b/src/windows/config.h @@ -223,9 +223,6 @@ /* Define to the version of this package. */ #define PACKAGE_VERSION "2.13" -/* How to access the PC from a struct ucontext */ -/* #undef PC_FROM_UCONTEXT */ - /* Always the empty-string on non-windows systems. On windows, should be "__declspec(dllexport)". This way, when we compile the dll, we export our functions/classes. It's safe to define this here because config.h is only