Fix for support of KASLR enabled kernels captured by the SADUMP

dumpfile facility. SADUMP dumpfile headers do not contain phys_base
or VMCOREINFO notes, so without this patch, the crash session fails
during initialization with the message "crash: seek error: kernel
virtual address: <address>  type: "page_offset_base".  This patch
calculates the phys_base value and the KASLR offset using the IDTR
and CR3 registers from the dumpfile header.
(indou.takao@jp.fujitsu.com)
This commit is contained in:
Dave Anderson 2017-10-23 11:15:39 -04:00
parent 090bf28907
commit 45b74b8953
5 changed files with 534 additions and 1 deletions

4
defs.h
View File

@ -2591,6 +2591,9 @@ struct symbol_table_data {
ulong last_section_end;
ulong _stext_vmlinux;
struct downsized downsized;
ulong divide_error_vmlinux;
ulong idt_table_vmlinux;
ulong saved_command_line_vmlinux;
};
/* flags for st */
@ -6312,6 +6315,7 @@ void sadump_set_zero_excluded(void);
void sadump_unset_zero_excluded(void);
struct sadump_data;
struct sadump_data *get_sadump_data(void);
int sadump_calc_kaslr_offset(ulong *);
/*
* qemu.c

465
sadump.c
View File

@ -1558,12 +1558,17 @@ sadump_display_regs(int cpu, FILE *ofp)
*/
int sadump_phys_base(ulong *phys_base)
{
if (SADUMP_VALID()) {
if (SADUMP_VALID() && !sd->phys_base) {
if (CRASHDEBUG(1))
error(NOTE, "sadump: does not save phys_base.\n");
return FALSE;
}
if (sd->phys_base) {
*phys_base = sd->phys_base;
return TRUE;
}
return FALSE;
}
@ -1649,3 +1654,461 @@ get_sadump_data(void)
{
return sd;
}
#ifdef X86_64
static int
get_sadump_smram_cpu_state_any(struct sadump_smram_cpu_state *smram)
{
ulong offset;
struct sadump_header *sh = sd->dump_header;
int apicid;
struct sadump_smram_cpu_state scs, zero;
offset = sd->sub_hdr_offset + sizeof(uint32_t) +
sd->dump_header->nr_cpus * sizeof(struct sadump_apic_state);
memset(&zero, 0, sizeof(zero));
for (apicid = 0; apicid < sh->nr_cpus; ++apicid) {
if (!read_device(&scs, sizeof(scs), &offset)) {
error(INFO, "sadump: cannot read sub header "
"cpu_state\n");
return FALSE;
}
if (memcmp(&scs, &zero, sizeof(scs)) != 0) {
*smram = scs;
return TRUE;
}
}
return FALSE;
}
/*
* Get address of vector0 interrupt handler (Devide Error) from Interrupt
* Descriptor Table.
*/
static ulong
get_vec0_addr(ulong idtr)
{
struct gate_struct64 {
uint16_t offset_low;
uint16_t segment;
uint32_t ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1;
uint16_t offset_middle;
uint32_t offset_high;
uint32_t zero1;
} __attribute__((packed)) gate;
readmem(idtr, PHYSADDR, &gate, sizeof(gate), "idt_table", FAULT_ON_ERROR);
return ((ulong)gate.offset_high << 32)
+ ((ulong)gate.offset_middle << 16)
+ gate.offset_low;
}
/*
* Parse a string of [size[KMG] ]offset[KMG]
* Import from Linux kernel(lib/cmdline.c)
*/
static ulong memparse(char *ptr, char **retptr)
{
char *endptr;
unsigned long long ret = strtoull(ptr, &endptr, 0);
switch (*endptr) {
case 'E':
case 'e':
ret <<= 10;
case 'P':
case 'p':
ret <<= 10;
case 'T':
case 't':
ret <<= 10;
case 'G':
case 'g':
ret <<= 10;
case 'M':
case 'm':
ret <<= 10;
case 'K':
case 'k':
ret <<= 10;
endptr++;
default:
break;
}
if (retptr)
*retptr = endptr;
return ret;
}
/*
* Find "elfcorehdr=" in the boot parameter of kernel and return the address
* of elfcorehdr.
*/
static ulong
get_elfcorehdr(ulong cr3, ulong kaslr_offset)
{
char cmdline[BUFSIZE], *ptr;
ulong cmdline_vaddr;
ulong cmdline_paddr;
ulong buf_vaddr, buf_paddr;
char *end;
ulong elfcorehdr_addr = 0, elfcorehdr_size = 0;
int verbose = CRASHDEBUG(1)? 1: 0;
cmdline_vaddr = st->saved_command_line_vmlinux + kaslr_offset;
if (!kvtop(NULL, cmdline_vaddr, &cmdline_paddr, verbose))
return 0;
if (CRASHDEBUG(1)) {
fprintf(fp, "cmdline vaddr=%lx\n", cmdline_vaddr);
fprintf(fp, "cmdline paddr=%lx\n", cmdline_paddr);
}
if (!readmem(cmdline_paddr, PHYSADDR, &buf_vaddr, sizeof(ulong),
"saved_command_line", RETURN_ON_ERROR))
return 0;
if (!kvtop(NULL, buf_vaddr, &buf_paddr, verbose))
return 0;
if (CRASHDEBUG(1)) {
fprintf(fp, "cmdline buffer vaddr=%lx\n", buf_vaddr);
fprintf(fp, "cmdline buffer paddr=%lx\n", buf_paddr);
}
memset(cmdline, 0, BUFSIZE);
if (!readmem(buf_paddr, PHYSADDR, cmdline, BUFSIZE,
"saved_command_line", RETURN_ON_ERROR))
return 0;
ptr = strstr(cmdline, "elfcorehdr=");
if (!ptr)
return 0;
if (CRASHDEBUG(1))
fprintf(fp, "2nd kernel detected\n");
ptr += strlen("elfcorehdr=");
elfcorehdr_addr = memparse(ptr, &end);
if (*end == '@') {
elfcorehdr_size = elfcorehdr_addr;
elfcorehdr_addr = memparse(end + 1, &end);
}
if (CRASHDEBUG(1)) {
fprintf(fp, "elfcorehdr_addr=%lx\n", elfcorehdr_addr);
fprintf(fp, "elfcorehdr_size=%lx\n", elfcorehdr_size);
}
return elfcorehdr_addr;
}
/*
* Get vmcoreinfo from elfcorehdr.
* Some codes are imported from Linux kernel(fs/proc/vmcore.c)
*/
static int
get_vmcoreinfo(ulong elfcorehdr, ulong *addr, int *len)
{
unsigned char e_ident[EI_NIDENT];
Elf64_Ehdr ehdr;
Elf64_Phdr phdr;
Elf64_Nhdr nhdr;
ulong ptr;
ulong nhdr_offset = 0;
int i;
if (!readmem(elfcorehdr, PHYSADDR, e_ident, EI_NIDENT,
"EI_NIDENT", RETURN_ON_ERROR))
return FALSE;
if (e_ident[EI_CLASS] != ELFCLASS64) {
error(INFO, "Only ELFCLASS64 is supportd\n");
return FALSE;
}
if (!readmem(elfcorehdr, PHYSADDR, &ehdr, sizeof(ehdr),
"Elf64_Ehdr", RETURN_ON_ERROR))
return FALSE;
/* Sanity Check */
if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 ||
(ehdr.e_type != ET_CORE) ||
ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
ehdr.e_ident[EI_VERSION] != EV_CURRENT ||
ehdr.e_version != EV_CURRENT ||
ehdr.e_ehsize != sizeof(Elf64_Ehdr) ||
ehdr.e_phentsize != sizeof(Elf64_Phdr) ||
ehdr.e_phnum == 0) {
error(INFO, "Invalid elf header\n");
return FALSE;
}
ptr = elfcorehdr + ehdr.e_phoff;
for (i = 0; i < ehdr.e_phnum; i++) {
ulong offset;
char name[16];
if (!readmem(ptr, PHYSADDR, &phdr, sizeof(phdr),
"Elf64_Phdr", RETURN_ON_ERROR))
return FALSE;
ptr += sizeof(phdr);
if (phdr.p_type != PT_NOTE)
continue;
offset = phdr.p_offset;
if (!readmem(offset, PHYSADDR, &nhdr, sizeof(nhdr),
"Elf64_Nhdr", RETURN_ON_ERROR))
return FALSE;
offset += DIV_ROUND_UP(sizeof(Elf64_Nhdr), sizeof(Elf64_Word))*
sizeof(Elf64_Word);
memset(name, 0, sizeof(name));
if (!readmem(offset, PHYSADDR, name, sizeof(name),
"Elf64_Nhdr name", RETURN_ON_ERROR))
return FALSE;
if(!strcmp(name, "VMCOREINFO")) {
nhdr_offset = offset;
break;
}
}
if (!nhdr_offset)
return FALSE;
*addr = nhdr_offset +
DIV_ROUND_UP(nhdr.n_namesz, sizeof(Elf64_Word))*
sizeof(Elf64_Word);
*len = nhdr.n_descsz;
if (CRASHDEBUG(1)) {
fprintf(fp, "vmcoreinfo addr=%lx\n", *addr);
fprintf(fp, "vmcoreinfo len=%d\n", *len);
}
return TRUE;
}
/*
* Check if current kaslr_offset/phys_base is for 1st kernel or 2nd kernel.
* If we are in 2nd kernel, get kaslr_offset/phys_base from vmcoreinfo.
*
* 1. Get command line and try to retrieve "elfcorehdr=" boot parameter
* 2. If "elfcorehdr=" is not found in command line, we are in 1st kernel.
* There is nothing to do.
* 3. If "elfcorehdr=" is found, we are in 2nd kernel. Find vmcoreinfo
* using "elfcorehdr=" and retrieve kaslr_offset/phys_base from vmcoreinfo.
*/
static int
get_kaslr_offset_from_vmcoreinfo(ulong cr3, ulong orig_kaslr_offset,
ulong *kaslr_offset, ulong *phys_base)
{
ulong elfcorehdr_addr = 0;
ulong vmcoreinfo_addr;
int vmcoreinfo_len;
char *buf, *pos;
int ret = FALSE;
/* Find "elfcorehdr=" in the kernel boot parameter */
elfcorehdr_addr = get_elfcorehdr(cr3, orig_kaslr_offset);
if (!elfcorehdr_addr)
return FALSE;
/* Get vmcoreinfo from the address of "elfcorehdr=" */
if (!get_vmcoreinfo(elfcorehdr_addr, &vmcoreinfo_addr, &vmcoreinfo_len))
return FALSE;
if (!vmcoreinfo_len)
return FALSE;
if (CRASHDEBUG(1))
fprintf(fp, "Find vmcoreinfo in kdump memory\n");
buf = GETBUF(vmcoreinfo_len);
if (!readmem(vmcoreinfo_addr, PHYSADDR, buf, vmcoreinfo_len,
"vmcoreinfo", RETURN_ON_ERROR))
goto quit;
/* Get phys_base form vmcoreinfo */
pos = strstr(buf, "NUMBER(phys_base)=");
if (!pos)
goto quit;
*phys_base = strtoull(pos + strlen("NUMBER(phys_base)="), NULL, 0);
/* Get kaslr_offset form vmcoreinfo */
pos = strstr(buf, "KERNELOFFSET=");
if (!pos)
goto quit;
*kaslr_offset = strtoull(pos + strlen("KERNELOFFSET="), NULL, 16);
ret = TRUE;
quit:
FREEBUF(buf);
return ret;
}
/*
* Calculate kaslr_offset and phys_base
*
* kaslr_offset:
* The difference between original address in System.map or vmlinux and
* actual address placed randomly by kaslr feature. To be more accurate,
* kaslr_offset = actual address - original address
*
* phys_base:
* Physical address where the kerenel is placed. In other words, it's a
* physical address of __START_KERNEL_map. This is also decided randomly by
* kaslr.
*
* kaslr offset and phys_base are calculated as follows:
*
* kaslr_offset:
* 1) Get IDTR and CR3 value from the dump header.
* 2) Get a virtual address of IDT from IDTR value
* --- (A)
* 3) Translate (A) to physical address using CR3, which points a top of
* page table.
* --- (B)
* 4) Get an address of vector0 (Devide Error) interrupt handler from
* IDT, which are pointed by (B).
* --- (C)
* 5) Get an address of symbol "divide_error" form vmlinux
* --- (D)
*
* Now we have two addresses:
* (C)-> Actual address of "divide_error"
* (D)-> Original address of "divide_error" in the vmlinux
*
* kaslr_offset can be calculated by the difference between these two
* value.
*
* phys_base;
* 1) Get IDT virtual address from vmlinux
* --- (E)
*
* So phys_base can be calculated using relationship of directly mapped
* address.
*
* phys_base =
* Physical address(B) -
* (Virtual address(E) + kaslr_offset - __START_KERNEL_map)
*
* Note that the address (A) cannot be used instead of (E) because (A) is
* not direct map address, it's a fixed map address.
*
* This solution works in most every case, but does not work in the
* following case.
*
* 1) If the dump is captured on early stage of kernel boot, IDTR points
* early IDT table(early_idts) instead of normal IDT(idt_table).
* 2) If the dump is captured whle kdump is working, IDTR points
* IDT table of 2nd kernel, not 1st kernel.
*
* Current implementation does not support the case 1), need
* enhancement in the future. For the case 2), get kaslr_offset and
* phys_base as follows.
*
* 1) Get kaslr_offset and phys_base using the above solution.
* 2) Get kernel boot parameter from "saved_command_line"
* 3) If "elfcorehdr=" is not included in boot parameter, we are in the
* first kernel, nothing to do any more.
* 4) If "elfcorehdr=" is included in boot parameter, we are in the 2nd
* kernel. Retrieve vmcoreinfo from address of "elfcorehdr=" and
* get kaslr_offset and phys_base from vmcoreinfo.
*/
int
sadump_calc_kaslr_offset(ulong *kaslr_offset)
{
ulong phys_base = 0;
struct sadump_smram_cpu_state scs;
uint64_t idtr = 0, cr3 = 0, idtr_paddr;
ulong divide_error_vmcore;
ulong kaslr_offset_kdump, phys_base_kdump;
int ret = FALSE;
int verbose = CRASHDEBUG(1)? 1: 0;
if (!machine_type("X86_64"))
return FALSE;
memset(&scs, 0, sizeof(scs));
get_sadump_smram_cpu_state_any(&scs);
cr3 = scs.Cr3;
idtr = ((uint64_t)scs.IdtUpper)<<32 | (uint64_t)scs.IdtLower;
/*
* Set up for kvtop.
*
* calc_kaslr_offset() is called before machdep_init(PRE_GDB), so some
* variables are not initialized yet. Set up them here to call kvtop().
*
* TODO: XEN and 5-level is not supported
*/
vt->kernel_pgd[0] = cr3;
machdep->machspec->last_pml4_read = vt->kernel_pgd[0];
machdep->machspec->physical_mask_shift = __PHYSICAL_MASK_SHIFT_2_6;
machdep->machspec->pgdir_shift = PGDIR_SHIFT;
if (!readmem(cr3, PHYSADDR, machdep->machspec->pml4, PAGESIZE(),
"cr3", RETURN_ON_ERROR))
goto quit;
/* Convert virtual address of IDT table to physical address */
if (!kvtop(NULL, idtr, &idtr_paddr, verbose))
goto quit;
/* Now we can calculate kaslr_offset and phys_base */
divide_error_vmcore = get_vec0_addr(idtr_paddr);
*kaslr_offset = divide_error_vmcore - st->divide_error_vmlinux;
phys_base = idtr_paddr -
(st->idt_table_vmlinux + *kaslr_offset - __START_KERNEL_map);
if (CRASHDEBUG(1)) {
fprintf(fp, "calc_kaslr_offset: idtr=%lx\n", idtr);
fprintf(fp, "calc_kaslr_offset: cr3=%lx\n", cr3);
fprintf(fp, "calc_kaslr_offset: idtr(phys)=%lx\n", idtr_paddr);
fprintf(fp, "calc_kaslr_offset: divide_error(vmlinux): %lx\n",
st->divide_error_vmlinux);
fprintf(fp, "calc_kaslr_offset: divide_error(vmcore): %lx\n",
divide_error_vmcore);
}
/*
* Check if current kaslr_offset/phys_base is for 1st kernel or 2nd
* kernel. If we are in 2nd kernel, get kaslr_offset/phys_base
* from vmcoreinfo
*/
if (get_kaslr_offset_from_vmcoreinfo(
cr3, *kaslr_offset, &kaslr_offset_kdump, &phys_base_kdump)) {
*kaslr_offset = kaslr_offset_kdump;
phys_base = phys_base_kdump;
}
if (CRASHDEBUG(1)) {
fprintf(fp, "calc_kaslr_offset: kaslr_offset=%lx\n",
*kaslr_offset);
fprintf(fp, "calc_kaslr_offset: phys_base=%lx\n", phys_base);
}
sd->phys_base = phys_base;
ret = TRUE;
quit:
vt->kernel_pgd[0] = 0;
machdep->machspec->last_pml4_read = 0;
return ret;
}
#else
int
sadump_calc_kaslr_offset(ulong *kaslr_offset)
{
return FALSE;
}
#endif /* X86_64 */

View File

@ -219,6 +219,7 @@ struct sadump_data {
ulonglong backup_offset;
uint64_t max_mapnr;
ulong phys_base;
};
struct sadump_data *sadump_get_sadump_data(void);

View File

@ -624,6 +624,9 @@ kaslr_init(void)
st->_stext_vmlinux = UNINITIALIZED;
}
}
if (SADUMP_DUMPFILE())
kt->flags2 |= KASLR_CHECK;
}
/*
@ -637,6 +640,19 @@ derive_kaslr_offset(bfd *abfd, int dynamic, bfd_byte *start, bfd_byte *end,
unsigned long relocate;
ulong _stext_relocated;
if (SADUMP_DUMPFILE()) {
ulong kaslr_offset = 0;
sadump_calc_kaslr_offset(&kaslr_offset);
if (kaslr_offset) {
kt->relocate = kaslr_offset * -1;
kt->flags |= RELOC_SET;
}
return;
}
if (ACTIVE()) {
_stext_relocated = symbol_value_from_proc_kallsyms("_stext");
if (_stext_relocated == BADVAL)
@ -3052,6 +3068,16 @@ dump_symbol_table(void)
else
fprintf(fp, "\n");
if (SADUMP_DUMPFILE()) {
fprintf(fp, "divide_error_vmlinux: %lx\n", st->divide_error_vmlinux);
fprintf(fp, " idt_table_vmlinux: %lx\n", st->idt_table_vmlinux);
fprintf(fp, "saved_command_line_vmlinux: %lx\n", st->saved_command_line_vmlinux);
} else {
fprintf(fp, "divide_error_vmlinux: (unused)\n");
fprintf(fp, " idt_table_vmlinux: (unused)\n");
fprintf(fp, "saved_command_line_vmlinux: (unused)\n");
}
fprintf(fp, " symval_hash[%d]: %lx\n", SYMVAL_HASH,
(ulong)&st->symval_hash[0]);
@ -12246,6 +12272,24 @@ numeric_forward(const void *P_x, const void *P_y)
}
}
if (SADUMP_DUMPFILE()) {
/* Need for kaslr_offset and phys_base */
if (STREQ(x->name, "divide_error"))
st->divide_error_vmlinux = valueof(x);
else if (STREQ(y->name, "divide_error"))
st->divide_error_vmlinux = valueof(y);
if (STREQ(x->name, "idt_table"))
st->idt_table_vmlinux = valueof(x);
else if (STREQ(y->name, "idt_table"))
st->idt_table_vmlinux = valueof(y);
if (STREQ(x->name, "saved_command_line"))
st->saved_command_line_vmlinux = valueof(x);
else if (STREQ(y->name, "saved_command_line"))
st->saved_command_line_vmlinux = valueof(y);
}
xs = bfd_get_section(x);
ys = bfd_get_section(y);

View File

@ -194,6 +194,9 @@ x86_64_init(int when)
machdep->machspec->kernel_image_size = dtol(string, QUIET, NULL);
free(string);
}
if (SADUMP_DUMPFILE())
/* Need for calculation of kaslr_offset and phys_base */
machdep->kvtop = x86_64_kvtop;
break;
case PRE_GDB:
@ -2019,6 +2022,22 @@ x86_64_kvtop(struct task_context *tc, ulong kvaddr, physaddr_t *paddr, int verbo
ulong pte;
physaddr_t physpage;
if (SADUMP_DUMPFILE() && !(machdep->flags & KSYMS_START)) {
/*
* In the case of sadump, to calculate kaslr_offset and
* phys_base, kvtop is called during symtab_init(). In this
* stage phys_base is not initialized yet and x86_64_VTOP()
* does not work. Jump to the code of pagetable translation.
*/
FILL_PML4();
pml4 = ((ulong *)machdep->machspec->pml4) + pml4_index(kvaddr);
if (verbose) {
fprintf(fp, "PML4 DIRECTORY: %lx\n", vt->kernel_pgd[0]);
fprintf(fp, "PAGE DIRECTORY: %lx\n", *pml4);
}
goto start_vtop_with_pagetable;
}
if (!IS_KVADDR(kvaddr))
return FALSE;
@ -2065,6 +2084,8 @@ x86_64_kvtop(struct task_context *tc, ulong kvaddr, physaddr_t *paddr, int verbo
fprintf(fp, "PAGE DIRECTORY: %lx\n", *pml4);
}
}
start_vtop_with_pagetable:
if (!(*pml4) & _PAGE_PRESENT)
goto no_kpage;
pgd_paddr = (*pml4) & PHYSICAL_PAGE_MASK;