Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions arch/riscv/include/asm/kexec.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ typedef void (*riscv_kexec_method)(unsigned long first_ind_entry,
unsigned long va_pa_off);

extern riscv_kexec_method riscv_kexec_norelocate;
extern riscv_kexec_method riscv_kexec_relocate_entry;

#ifdef CONFIG_KEXEC_FILE
extern const struct kexec_file_ops elf_kexec_ops;
Expand All @@ -75,4 +76,8 @@ int load_extra_segments(struct kimage *image, unsigned long kernel_start,
unsigned long cmdline_len);
#endif

#ifndef __ASSEMBLY__
extern char __kexec_tramp_text_start[];
#endif

#endif
14 changes: 14 additions & 0 deletions arch/riscv/kernel/image-vars.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,18 @@ __efistub_sysfb_primary_display = sysfb_primary_display;

#endif

#ifdef CONFIG_KEXEC_CORE
#define KEXEC_TRAMP_TEXT \
. = ALIGN(PAGE_SIZE); \
__kexec_tramp_text_start = .; \
KEEP(*(.kexec.tramp.text)) \
KEEP(*(.kexec.tramp.text.*)) \
__kexec_tramp_text_end = .; \
ASSERT((__kexec_tramp_text_end - __kexec_tramp_text_start) <= PAGE_SIZE, \
".kexec.tramp.text exceeds 4K"); \
. = ALIGN(PAGE_SIZE);
#else
#define KEXEC_TRAMP_TEXT /* nothing */
#endif

#endif /* __RISCV_KERNEL_IMAGE_VARS_H */
97 changes: 71 additions & 26 deletions arch/riscv/kernel/kexec_relocate.S
Original file line number Diff line number Diff line change
Expand Up @@ -34,27 +34,13 @@ SYM_CODE_START(riscv_kexec_relocate)
csrw CSR_SIP, zero

/*
* When we switch SATP.MODE to "Bare" we'll only
* play with physical addresses. However the first time
* we try to jump somewhere, the offset on the jump
* will be relative to pc which will still be on VA. To
* deal with this we set stvec to the physical address at
* the start of the loop below so that we jump there in
* any case.
* The trampoline wrapper (riscv_kexec_relocate_entry) has already
* dropped the MMU and handed control to us at this PA copy of the
* relocate code. From here on the entire loop runs with SATP=0 and
* every address (s0, s5, source/dest pointers) is a physical one.
*/
la s6, 1f
sub s6, s6, s4
csrw CSR_STVEC, s6

/*
* With C-extension, here we get 42 Bytes and the next
* .align directive would pad zeros here up to 44 Bytes.
* So manually put a nop here to avoid zeros padding.
*/
nop

/* Process entries in a loop */
.align 2
1:
REG_L t0, 0(s0) /* t0 = *image->entry */
addi s0, s0, RISCV_SZPTR /* image->entry++ */
Expand All @@ -70,8 +56,8 @@ SYM_CODE_START(riscv_kexec_relocate)
andi t1, t0, 0x2
beqz t1, 2f
andi s0, t0, ~0x2
csrw CSR_SATP, zero
jr s6
/* MMU is already off; the entry wrapper handled the transition. */
j 1b

2:
/* IND_DONE entry ? -> jump to done label */
Expand Down Expand Up @@ -147,13 +133,35 @@ riscv_kexec_relocate_end:


/* Used for jumping to crashkernel */
.section ".text"
.extern kexec_tramp_satp
.extern riscv_kexec_norelocate_pa
.section ".kexec.tramp.text", "ax"
SYM_CODE_START(riscv_kexec_norelocate)
/*
* Two-pass entry:
* - 1st entry: t3 == 0 (initialized by machine_kexec()).
*
* - 2nd entry: t3 holds the physical address of
* riscv_kexec_norelocate, so auipc matches t3 and we fall through
* to label 1 to continue execution under trampoline VA(=PA).
*/
auipc t0, 0
beq t0, t3, 1f

la t0, riscv_kexec_norelocate_pa
REG_L t3, 0(t0)
la t0, kexec_tramp_satp
REG_L t1, 0(t0)
csrw CSR_SATP, t1
sfence.vma x0, x0

jr t3
/*
* s0: (const) Phys address to jump to
* s1: (const) Phys address of the FDT image
* s2: (const) The hartid of the current hart
*/
1:
mv s0, a1
mv s1, a2
mv s2, a3
Expand Down Expand Up @@ -199,16 +207,53 @@ SYM_CODE_START(riscv_kexec_norelocate)
csrw CSR_SSCRATCH, zero

/*
* Switch to physical addressing
* This will also trigger a jump to CSR_STVEC
* which in this case is the address of the new
* kernel.
* We are already executing from the trampoline VA with the trampoline
* page table installed, so there is no need to rely on the old flow
* of programming stvec and taking the implicit trap on SATP switch.
* Jump directly to the target entry instead.
*/
csrw CSR_STVEC, a2
csrw CSR_SATP, zero
jr a2

SYM_CODE_END(riscv_kexec_norelocate)

.extern riscv_kexec_relocate_entry_pa
.extern riscv_kexec_cc_buffer_pa
.section ".kexec.tramp.text", "ax"
SYM_CODE_START(riscv_kexec_relocate_entry)
/*
* Two-pass entry, identical in shape to riscv_kexec_norelocate:
* - 1st entry: t3 == 0 (initialized by machine_kexec()).
* - 2nd entry: t3 == PA of riscv_kexec_relocate_entry, so auipc
* matches t3 and we fall through to label 1.
* Args a0..a4 are passed through unchanged to riscv_kexec_relocate.
*/
auipc t0, 0
beq t0, t3, 1f

la t0, riscv_kexec_relocate_entry_pa
REG_L t3, 0(t0)
la t0, kexec_tramp_satp
REG_L t1, 0(t0)
csrw CSR_SATP, t1
sfence.vma x0, x0

jr t3
1:
/*
* Now executing at the PA of this wrapper with the trampoline pgd
* installed (identity-mapped). Drop the MMU; PC stays valid because
* it is already a PA.
*/
csrw CSR_SATP, zero
sfence.vma x0, x0

/* Jump to the PA of control_code_buffer to run the relocate body. */
la t0, riscv_kexec_cc_buffer_pa
REG_L t0, 0(t0)
jr t0
SYM_CODE_END(riscv_kexec_relocate_entry)

.section ".rodata"
SYM_DATA(riscv_kexec_relocate_size,
.long riscv_kexec_relocate_end - riscv_kexec_relocate)
Expand Down
123 changes: 118 additions & 5 deletions arch/riscv/kernel/machine_kexec.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,69 @@
#include <linux/interrupt.h>
#include <linux/irq.h>

unsigned long kexec_tramp_satp;
unsigned long riscv_kexec_norelocate_pa;
unsigned long riscv_kexec_relocate_entry_pa;
unsigned long riscv_kexec_cc_buffer_pa;
static pgd_t kexec_tramp_pgd[PTRS_PER_PGD] __aligned(PAGE_SIZE);
static p4d_t kexec_tramp_p4d[PTRS_PER_P4D] __aligned(PAGE_SIZE);
static pud_t kexec_tramp_pud[PTRS_PER_PUD] __aligned(PAGE_SIZE);
static pmd_t kexec_tramp_pmd[PTRS_PER_PMD] __aligned(PAGE_SIZE);
static pte_t kexec_tramp_pte[PTRS_PER_PTE] __aligned(PAGE_SIZE);
static p4d_t kexec_tramp_p4d2[PTRS_PER_P4D] __aligned(PAGE_SIZE);
static pud_t kexec_tramp_pud2[PTRS_PER_PUD] __aligned(PAGE_SIZE);
static pmd_t kexec_tramp_pmd2[PTRS_PER_PMD] __aligned(PAGE_SIZE);
static pte_t kexec_tramp_pte2[PTRS_PER_PTE] __aligned(PAGE_SIZE);

static void map_tramp_page(p4d_t *p4ds, pud_t *puds, pmd_t *pmds, pte_t *ptes,
unsigned long va, unsigned long pa)
{
pgd_t *pgd = (pgd_t *)kexec_tramp_pgd + pgd_index(va);
pmd_t *pmd;

if (pgtable_l5_enabled) {
p4d_t *p4d;

set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa_symbol(p4ds)), PAGE_TABLE));
p4d = (p4d_t *)p4ds + p4d_index(va);
if (pgtable_l4_enabled)
set_p4d(p4d, pfn_p4d(PFN_DOWN(__pa_symbol(puds)),
PAGE_TABLE));
else
set_p4d(p4d, pfn_p4d(PFN_DOWN(__pa_symbol(pmds)),
PAGE_TABLE));
} else {
set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa_symbol(puds)), PAGE_TABLE));
}

if (pgtable_l4_enabled) {
pud_t *pud = (pud_t *)puds + pud_index(va);

set_pud(pud, pfn_pud(PFN_DOWN(__pa_symbol(pmds)), PAGE_TABLE));
pmd = (pmd_t *)pmds + pmd_index(va);
} else {
pmd = (pmd_t *)puds + pmd_index(va);
}
set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa_symbol(ptes)), PAGE_TABLE));

set_pte((pte_t *)ptes + pte_index(va),
pfn_pte(PFN_DOWN(pa), PAGE_KERNEL_EXEC));
}

static void riscv_kexec_build_tramp(unsigned long va, unsigned long pa)
{
/* VA -> PA: map the trampoline page via its kernel VA. */
map_tramp_page(kexec_tramp_p4d, kexec_tramp_pud,
kexec_tramp_pmd, kexec_tramp_pte, va, pa);

/*
* PA -> PA: identity-map the same page so the second-pass code
* can keep executing after the kernel VA mapping is dropped.
*/
map_tramp_page(kexec_tramp_p4d2, kexec_tramp_pud2,
kexec_tramp_pmd2, kexec_tramp_pte2, pa, pa);
}

/*
* machine_kexec_prepare - Initialize kexec
*
Expand Down Expand Up @@ -58,6 +121,16 @@ machine_kexec_prepare(struct kimage *image)
return -EINVAL;
}

/*
* Build the trampoline page table and capture its SATP value.
* The crash path consumes it today; the non-crash kexec path
* will use the same setup as well.
*/
riscv_kexec_build_tramp((unsigned long)__kexec_tramp_text_start,
__pa_symbol(__kexec_tramp_text_start));
WRITE_ONCE(kexec_tramp_satp,
PFN_DOWN(__pa_symbol(kexec_tramp_pgd)) | satp_mode);

/* Copy the assembler code for relocation to the control page */
if (image->type != KEXEC_TYPE_CRASH) {
control_code_buffer = page_address(image->control_code_page);
Expand All @@ -73,6 +146,14 @@ machine_kexec_prepare(struct kimage *image)

/* Mark the control page executable */
set_memory_x((unsigned long) control_code_buffer, 1);

WRITE_ONCE(riscv_kexec_relocate_entry_pa,
__pa_symbol(&riscv_kexec_relocate_entry));
WRITE_ONCE(riscv_kexec_cc_buffer_pa,
__pa(control_code_buffer));
} else {
WRITE_ONCE(riscv_kexec_norelocate_pa,
__pa_symbol(&riscv_kexec_norelocate));
}

return 0;
Expand Down Expand Up @@ -150,11 +231,15 @@ machine_kexec(struct kimage *image)
{
struct kimage_arch *internal = &image->arch;
unsigned long jump_addr = (unsigned long) image->start;
unsigned long first_ind_entry = (unsigned long) &image->head;
/*
* The relocate body runs entirely with the MMU off (the wrapper
* drops SATP before jumping into control_code_buffer), so the very
* first entry must be a physical address.
*/
unsigned long first_ind_entry = __pa(&image->head);
unsigned long this_cpu_id = __smp_processor_id();
unsigned long this_hart_id = cpuid_to_hartid_map(this_cpu_id);
unsigned long fdt_addr = internal->fdt_addr;
void *control_code_buffer = page_address(image->control_code_page);
riscv_kexec_method kexec_method = NULL;

#ifdef CONFIG_SMP
Expand All @@ -163,7 +248,7 @@ machine_kexec(struct kimage *image)
#endif

if (image->type != KEXEC_TYPE_CRASH)
kexec_method = control_code_buffer;
kexec_method = (riscv_kexec_method) &riscv_kexec_relocate_entry;
else
kexec_method = (riscv_kexec_method) &riscv_kexec_norelocate;

Expand All @@ -176,7 +261,35 @@ machine_kexec(struct kimage *image)

/* Jump to the relocation code */
pr_notice("Bye...\n");
kexec_method(first_ind_entry, jump_addr, fdt_addr,
this_hart_id, kernel_map.va_pa_offset);
/*
* Hand off to the trampoline. For KEXEC_TYPE_CRASH we go into
* riscv_kexec_norelocate, which uses t3 as the 1st/2nd-pass
* discriminator (must be 0 on first entry). A bare
* asm volatile ("li t3, 0" ::: "t3")
* before the C call only declares t3 *modified*; the compiler is
* free to use t3 as scratch when materialising args. Pin t3 = 0
* (and the args) via local register variables and perform the
* indirect jump inside the same inline asm so t3 == 0 is
* guaranteed at the moment control leaves machine_kexec().
*/
{
register unsigned long a0_val asm("a0") = first_ind_entry;
register unsigned long a1_val asm("a1") = jump_addr;
register unsigned long a2_val asm("a2") = fdt_addr;
register unsigned long a3_val asm("a3") = this_hart_id;
register unsigned long a4_val asm("a4") = kernel_map.va_pa_offset;
register unsigned long t3_zero asm("t3") = 0;
register riscv_kexec_method m asm("t6") = kexec_method;

asm volatile (
"jr %[m]"
:
: "r" (a0_val), "r" (a1_val), "r" (a2_val),
"r" (a3_val), "r" (a4_val),
"r" (t3_zero),
[m] "r" (m)
: "memory"
);
}
unreachable();
}
1 change: 1 addition & 0 deletions arch/riscv/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ SECTIONS
ENTRY_TEXT
IRQENTRY_TEXT
SOFTIRQENTRY_TEXT
KEXEC_TRAMP_TEXT
_etext = .;
}

Expand Down
Loading