x86_64: Fix "bt" command to handle IRQ exception frames properly

On x86_64, there are cases where crash cannot handle IRQ exception
frames properly.  For example, with RHEL9.3 kernel, "bt" command fails
with with "WARNING possibly bogus exception frame":

  crash> bt -c 30
  PID: 2898241  TASK: ff4cb0ce0da0c680  CPU: 30   COMMAND: "star-ccm+"
   #0 [fffffe4658d88e58] crash_nmi_callback at ffffffffa00675e8
   #1 [fffffe4658d88e68] nmi_handle at ffffffffa002ebab
  ...
  --- <NMI exception stack> ---
  ...
  #13 [ff5eba269937cf90] __do_softirq at ffffffffa0c6c007
  #14 [ff5eba269937cfe0] __irq_exit_rcu at ffffffffa010ef61
  #15 [ff5eba269937cff0] sysvec_apic_timer_interrupt at ffffffffa0c58ca2
  --- <IRQ stack> ---
      RIP: 0000000000000010  RSP: 0000000000000018  RFLAGS: ff5eba26ddc9f7e8
      RAX: 0000000000000a20  RBX: ff5eba26ddc9f940  RCX: 0000000000001000
      RDX: ffffffb559980000  RSI: ff4cb12d67207400  RDI: ffffffffffffffff
      RBP: 0000000000001000   R8: ff5eba26ddc9f940   R9: ff5eba26ddc9f8af
      R10: 0000000000000003  R11: 0000000000000a20  R12: ff5eba26ddc9f8b0
      R13: 000000283c07f000  R14: ff4cb0f5a29a1c00  R15: 0000000000000001
      ORIG_RAX: ffffffffa07c4e60  CS: 0206  SS: 7000001cf0380001
  bt: WARNING: possibly bogus exception frame

Running "crash" with "--machdep irq_eframe_link=0xffffffffffffffe8"
option (i.e. thus irq_eframe_link = -24) works properly:

  PID: 2898241  TASK: ff4cb0ce0da0c680  CPU: 30   COMMAND: "star-ccm+"
   #0 [fffffe4658d88e58] crash_nmi_callback at ffffffffa00675e8
   #1 [fffffe4658d88e68] nmi_handle at ffffffffa002ebab
  ...
  --- <NMI exception stack> ---
  ...
  #13 [ff5eba269937cf90] __do_softirq at ffffffffa0c6c007
  #14 [ff5eba269937cfe0] __irq_exit_rcu at ffffffffa010ef61
  #15 [ff5eba269937cff0] sysvec_apic_timer_interrupt at ffffffffa0c58ca2
  --- <IRQ stack> ---
  #16 [ff5eba26ddc9f738] asm_sysvec_apic_timer_interrupt at ffffffffa0e00e06
      [exception RIP: alloc_pte.constprop.0+32]
      RIP: ffffffffa07c4e60  RSP: ff5eba26ddc9f7e8  RFLAGS: 00000206
      RAX: ff5eba26ddc9f940  RBX: 0000000000001000  RCX: 0000000000000a20
      RDX: 0000000000001000  RSI: ffffffb559980000  RDI: ff4cb12d67207400
      RBP: ff5eba26ddc9f8b0   R8: ff5eba26ddc9f8af   R9: 0000000000000003
      R10: 0000000000000a20  R11: ff5eba26ddc9f940  R12: 000000283c07f000
      R13: ff4cb0f5a29a1c00  R14: 0000000000000001  R15: ff4cb0f5a29a1bf8
      ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
  #17 [ff5eba26ddc9f830] iommu_v1_map_pages at ffffffffa07c5648
  #18 [ff5eba26ddc9f8f8] __iommu_map at ffffffffa07d7803
  #19 [ff5eba26ddc9f990] iommu_map_sg at ffffffffa07d7b71
  #20 [ff5eba26ddc9f9f0] iommu_dma_map_sg at ffffffffa07ddcc9
  #21 [ff5eba26ddc9fa90] __dma_map_sg_attrs at ffffffffa01b5205
  ...

Some background:

asm_common_interrupt:
      callq  error_entry
      movq   %rax,%rsp
      movq   %rsp,%rdi
      movq   0x78(%rsp),%rsi
      movq   $-0x1,0x78(%rsp)
      call common_interrupt    # rsp pointing to regs

common_interrupt:
      pushq  %r12
      pushq  %rbp
      pushq  %rbx
      [...]
      movq   hardirq_stack_ptr,%r11
      movq   %rsp,(%r11)
      movq   %r11,%rsp
      [...]
      call __common_interrupt  # rip:common_interrupt

So frame_size(rip:common_interrupt) = 32 (3 push + ret).

Hence "machdep->machspec->irq_eframe_link = -32;" (see x86_64_irq_eframe_link_init()).

Now:

asm_sysvec_apic_timer_interrupt:
      pushq  $-0x1
      callq  error_entry
      movq   %rax,%rsp
      movq   %rsp,%rdi
      callq  sysvec_apic_timer_interrupt

sysvec_apic_timer_interrupt:
      pushq  %r12
      pushq  %rbp
      [...]
      movq   hardirq_stack_ptr,%r11
      movq   %rsp,(%r11)
      movq   %r11,%rsp
      [...]
      call __sysvec_apic_timer_interrupt  # rip:sysvec_apic_timer_interrupt

Here frame_size(rip:sysvec_apic_timer_interrupt) = 24 (2 push + ret)

We should also notice that:

rip  = *(hardirq_stack_ptr - 8)
rsp  = *(hardirq_stack_ptr)
regs = rsp - frame_size(rip)

But x86_64_get_framesize() does not work with IRQ handlers (returns 0).
So not many options other than hardcoding the most likely value and
looking around it.  Actually x86_64_irq_eframe_link() was trying -32
(default), and then -40, but not -24.

Signed-off-by: Georges Aureau <georges.aureau@hpe.com>
This commit is contained in:
Aureau, Georges (Kernel Tools ERT) 2024-04-04 14:39:06 +00:00 committed by Kazuhito Hagio
parent ced754d3f8
commit 7d4daf0c03
1 changed files with 6 additions and 5 deletions

View File

@ -6623,13 +6623,14 @@ x86_64_irq_eframe_link_init(void)
/*
* Calculate and verify the IRQ exception frame location from the
* stack reference at the top of the IRQ stack, possibly adjusting
* the ms->irq_eframe_link value.
* stack reference at the top of the IRQ stack, keep ms->irq_eframe_link
* as the most likely value, and try a few sizes around it.
*/
static ulong
x86_64_irq_eframe_link(ulong stkref, struct bt_info *bt, FILE *ofp)
{
ulong irq_eframe;
int i, try[] = { 8, -8, 16, -16 };
if (x86_64_exception_frame(EFRAME_VERIFY, stkref, 0, bt, ofp))
return stkref;
@ -6639,9 +6640,9 @@ x86_64_irq_eframe_link(ulong stkref, struct bt_info *bt, FILE *ofp)
if (x86_64_exception_frame(EFRAME_VERIFY, irq_eframe, 0, bt, ofp))
return irq_eframe;
if (x86_64_exception_frame(EFRAME_VERIFY, irq_eframe+8, 0, bt, ofp)) {
machdep->machspec->irq_eframe_link -= 8;
return (irq_eframe + 8);
for (i = 0; i < sizeof(try)/sizeof(int); i++) {
if (x86_64_exception_frame(EFRAME_VERIFY, irq_eframe+try[i], 0, bt, ofp))
return (irq_eframe + try[i]);
}
return irq_eframe;