diff --git a/arch/riscv/include/asm/kexec.h b/arch/riscv/include/asm/kexec.h index b9ee8346cc8c9a..315dce0d9ca61b 100644 --- a/arch/riscv/include/asm/kexec.h +++ b/arch/riscv/include/asm/kexec.h @@ -75,4 +75,13 @@ int load_extra_segments(struct kimage *image, unsigned long kernel_start, unsigned long cmdline_len); #endif +#ifndef __ASSEMBLY__ +#ifdef CONFIG_MMU +#define __kexec_tramp_text __section(".kexec.tramp.text") +#else +#define __kexec_tramp_text +#endif +#endif +extern char __kexec_tramp_text_start[]; + #endif diff --git a/arch/riscv/kernel/image-vars.h b/arch/riscv/kernel/image-vars.h index 3bd9d06a8b8ff9..5419609ff89ce2 100644 --- a/arch/riscv/kernel/image-vars.h +++ b/arch/riscv/kernel/image-vars.h @@ -34,4 +34,17 @@ __efistub_sysfb_primary_display = sysfb_primary_display; #endif +#ifdef CONFIG_KEXEC_CORE +#define KEXEC_TRAMP_TEXT \ + . = ALIGN(PAGE_SIZE); \ + __kexec_tramp_text_start = .; \ + KEEP(*(.kexec.tramp.text)) \ + KEEP(*(.kexec.tramp.text.*)) \ + __kexec_tramp_text_end = .; \ + ASSERT((__kexec_tramp_text_end - __kexec_tramp_text_start) <= PAGE_SIZE, \ + ".kexec.tramp.text exceeds 4K"); +#else +#define KEXEC_TRAMP_TEXT /* nothing */ +#endif + #endif /* __RISCV_KERNEL_IMAGE_VARS_H */ diff --git a/arch/riscv/kernel/kexec_relocate.S b/arch/riscv/kernel/kexec_relocate.S index de0a4b35d01efc..2b9892bf04f2d7 100644 --- a/arch/riscv/kernel/kexec_relocate.S +++ b/arch/riscv/kernel/kexec_relocate.S @@ -147,13 +147,35 @@ riscv_kexec_relocate_end: /* Used for jumping to crashkernel */ -.section ".text" +.extern kexec_tramp_satp +.extern riscv_kexec_norelocate_pa +.section ".kexec.tramp.text", "ax" SYM_CODE_START(riscv_kexec_norelocate) + /* + * Two-pass entry: + * - 1st entry: t3 == 0 (initialized by machine_kexec()). + * + * - 2nd entry: t3 holds the physical address of + * riscv_kexec_norelocate, so auipc matches t3 and we fall through + * to label 1 to continue execution under trampoline VA(=PA). + */ + auipc t0, 0 + beq t0, t3, 1f + + la t0, riscv_kexec_norelocate_pa + REG_L t3, 0(t0) + la t0, kexec_tramp_satp + REG_L t1, 0(t0) + csrw CSR_SATP, t1 + sfence.vma x0, x0 + + jr t3 /* * s0: (const) Phys address to jump to * s1: (const) Phys address of the FDT image * s2: (const) The hartid of the current hart */ +1: mv s0, a1 mv s1, a2 mv s2, a3 @@ -199,13 +221,13 @@ SYM_CODE_START(riscv_kexec_norelocate) csrw CSR_SSCRATCH, zero /* - * Switch to physical addressing - * This will also trigger a jump to CSR_STVEC - * which in this case is the address of the new - * kernel. + * We are already executing from the trampoline VA with the trampoline + * page table installed, so there is no need to rely on the old flow + * of programming stvec and taking the implicit trap on SATP switch. + * Jump directly to the target entry instead. */ - csrw CSR_STVEC, a2 csrw CSR_SATP, zero + jr a2 SYM_CODE_END(riscv_kexec_norelocate) diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c index 2306ce3e5f229f..d78e7928c6cfa1 100644 --- a/arch/riscv/kernel/machine_kexec.c +++ b/arch/riscv/kernel/machine_kexec.c @@ -18,6 +18,100 @@ #include #include +unsigned long kexec_tramp_satp; +unsigned long riscv_kexec_norelocate_pa; +static pgd_t kexec_tramp_pgd[PTRS_PER_PGD] __aligned(PAGE_SIZE); +static p4d_t kexec_tramp_p4d[PTRS_PER_P4D] __aligned(PAGE_SIZE); +static pud_t kexec_tramp_pud[PTRS_PER_PUD] __aligned(PAGE_SIZE); +static pmd_t kexec_tramp_pmd[PTRS_PER_PMD] __aligned(PAGE_SIZE); +static pte_t kexec_tramp_pte[PTRS_PER_PTE] __aligned(PAGE_SIZE); +static p4d_t kexec_tramp_p4d2[PTRS_PER_P4D] __aligned(PAGE_SIZE); +static pud_t kexec_tramp_pud2[PTRS_PER_PUD] __aligned(PAGE_SIZE); +static pmd_t kexec_tramp_pmd2[PTRS_PER_PMD] __aligned(PAGE_SIZE); +static pte_t kexec_tramp_pte2[PTRS_PER_PTE] __aligned(PAGE_SIZE); + +static void riscv_kexec_build_tramp(unsigned long va, unsigned long pa) +{ + pgd_t *pgd; + pud_t *pud; + p4d_t *p4d; + pmd_t *pmd; + pte_t *pte; + int index; + + index = pgd_index(va); + pgd = (pgd_t *)kexec_tramp_pgd + index; + if (pgtable_l5_enabled) + set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa_symbol(kexec_tramp_p4d)), + PAGE_TABLE)); + else + set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa_symbol(kexec_tramp_pud)), + PAGE_TABLE)); + + if (pgtable_l5_enabled) { + index = p4d_index(va); + p4d = (p4d_t *)kexec_tramp_p4d + index; + if (pgtable_l4_enabled) + set_p4d(p4d, pfn_p4d(PFN_DOWN(__pa_symbol(kexec_tramp_pud)), + PAGE_TABLE)); + else + set_p4d(p4d, pfn_p4d(PFN_DOWN(__pa_symbol(kexec_tramp_pmd)), + PAGE_TABLE)); + } + + if (pgtable_l4_enabled) { + index = pud_index(va); + pud = (pud_t *)kexec_tramp_pud + index; + set_pud(pud, pfn_pud(PFN_DOWN(__pa_symbol(kexec_tramp_pmd)), PAGE_TABLE)); + } + + index = pmd_index(va); + if (pgtable_l4_enabled) + pmd = (pmd_t *)kexec_tramp_pmd + index; + else + pmd = (pmd_t *)kexec_tramp_pud + index; + set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa_symbol(kexec_tramp_pte)), PAGE_TABLE)); + + index = pte_index(va); + pte = (pte_t *)kexec_tramp_pte + index; + set_pte(pte, pfn_pte(PFN_DOWN(pa), PAGE_KERNEL_EXEC)); + + index = pgd_index(pa); + pgd = (pgd_t *)kexec_tramp_pgd + index; + if (pgtable_l5_enabled) + set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa_symbol(kexec_tramp_p4d2)), PAGE_TABLE)); + else + set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa_symbol(kexec_tramp_pud2)), PAGE_TABLE)); + + if (pgtable_l5_enabled) { + index = p4d_index(pa); + p4d = (p4d_t *)kexec_tramp_p4d2 + index; + if (pgtable_l4_enabled) + set_p4d(p4d, pfn_p4d(PFN_DOWN(__pa_symbol(kexec_tramp_pud2)), + PAGE_TABLE)); + else + set_p4d(p4d, pfn_p4d(PFN_DOWN(__pa_symbol(kexec_tramp_pmd2)), + PAGE_TABLE)); + } + + if (pgtable_l4_enabled) { + index = pud_index(pa); + pud = (pud_t *)kexec_tramp_pud2 + index; + set_pud(pud, pfn_pud(PFN_DOWN(__pa_symbol(kexec_tramp_pmd2)), PAGE_TABLE)); + } + + index = pmd_index(pa); + if (pgtable_l4_enabled) + pmd = (pmd_t *)kexec_tramp_pmd2 + index; + else + pmd = (pmd_t *)kexec_tramp_pud2 + index; + set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa_symbol(kexec_tramp_pte2)), PAGE_TABLE)); + + index = pte_index(pa); + pte = (pte_t *)kexec_tramp_pte2 + index; + set_pte(pte, pfn_pte(PFN_DOWN(pa), PAGE_KERNEL_EXEC)); +} + /* * machine_kexec_prepare - Initialize kexec * @@ -164,8 +258,19 @@ machine_kexec(struct kimage *image) if (image->type != KEXEC_TYPE_CRASH) kexec_method = control_code_buffer; - else + else { kexec_method = (riscv_kexec_method) &riscv_kexec_norelocate; + /* + * Build two 4KB identity-mapping page tables for the + * trampoline page: + * - VA(__kexec_tramp_text_start) -> PA(__kexec_tramp_text_start) + * - PA(__kexec_tramp_text_start) -> PA(__kexec_tramp_text_start) + */ + riscv_kexec_build_tramp((unsigned long)__kexec_tramp_text_start, + __pa_symbol(__kexec_tramp_text_start)); + riscv_kexec_norelocate_pa = __pa_symbol(&riscv_kexec_norelocate); + kexec_tramp_satp = PFN_DOWN(__pa_symbol(kexec_tramp_pgd)) | satp_mode; + } pr_notice("Will call new kernel at %08lx from hart id %lx\n", jump_addr, this_hart_id); @@ -176,6 +281,15 @@ machine_kexec(struct kimage *image) /* Jump to the relocation code */ pr_notice("Bye...\n"); + /* + * Initialize t3 to 0 for riscv_kexec_norelocate(). + * + * The norelocate trampoline uses t3 as a scratch register to record/ + * compare against the current PC when switching to the trampoline + * page table. Keep t3 untouched from here until we branch into + * riscv_kexec_norelocate. + */ + asm volatile ("li t3, 0x0" ::: "t3"); kexec_method(first_ind_entry, jump_addr, fdt_addr, this_hart_id, kernel_map.va_pa_offset); unreachable(); diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index 61bd5ba6680a78..47fefee7726671 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -45,6 +45,7 @@ SECTIONS ENTRY_TEXT IRQENTRY_TEXT SOFTIRQENTRY_TEXT + KEXEC_TRAMP_TEXT _etext = .; }