From 3a103a781b504fbd05593b54219cf0b55ba02382 Mon Sep 17 00:00:00 2001 From: Wang Han Date: Wed, 27 May 2026 20:35:23 +0800 Subject: [PATCH 1/8] scripts/sorttable: Handle RISC-V patchable ftrace entries RISC-V uses -fpatchable-function-entry=8,4 when the compressed ISA is enabled and -fpatchable-function-entry=4,2 otherwise. In both cases, the patchable NOP area starts 8 bytes before the function symbol address. The __mcount_loc entries therefore point at the patchable NOP area associated with a function, while nm reports the function symbol at the entry address used for the function range check. After RISC-V selected HAVE_BUILDTIME_MCOUNT_SORT, sorttable started applying that range check at build time. Without allowing entries just before the reported function address, the mcount sorter treats valid RISC-V ftrace callsites as invalid weak-function entries and writes them back as zero. The resulting kernel boots with no ftrace entries, breaking dynamic ftrace and users such as livepatch. The failure is silent during the final link because zeroing weak-function entries is an expected sorttable operation. At boot, those zero entries are skipped by ftrace_process_locs(), so the only obvious symptom is that the vmlinux ftrace table has lost valid callsites and ftrace users cannot attach to them. CONFIG_FTRACE_SORT_STARTUP_TEST also reports the table as sorted in this state: it only checks that the __mcount_loc entries are in ascending order, which a fully zeroed table trivially satisfies. The original commit relied on this check and did not see the regression. On an affected RISC-V QEMU boot with both CONFIG_FTRACE_SORT_STARTUP_TEST and CONFIG_FTRACE_STARTUP_TEST enabled, the sort check still passes while ftrace reports zero usable entries and the early selftests fail: [ 0.000000] ftrace section at ffffffff8101da98 sorted properly [ 0.000000] ftrace: allocating 0 entries in 128 pages [ 0.054999] Testing tracer function: .. no entries found ..FAILED! [ 0.172407] tracer: function failed selftest, disabling [ 0.178186] Failed to init function_graph tracer, init returned -19 Handle RISC-V like arm64 for the function-range check and allow patchable entries up to 8 bytes before the function address. With this fix, a RISC-V QEMU smoke boot with ftrace startup tests shows the vmlinux ftrace table is populated and dynamic ftrace still works: [ 0.000000] ftrace: allocating 46749 entries in 184 pages [ 0.051115] Testing tracer function: PASSED [ 1.283782] Testing dynamic ftrace: PASSED [ 6.275456] Testing tracer function_graph: PASSED Fixes: 0ca1724b56af ("riscv: ftrace: select HAVE_BUILDTIME_MCOUNT_SORT") Signed-off-by: Wang Han Signed-off-by: Linux RISC-V bot --- scripts/sorttable.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/scripts/sorttable.c b/scripts/sorttable.c index e8ed11c680c6d5..b4061c2c03e1fa 100644 --- a/scripts/sorttable.c +++ b/scripts/sorttable.c @@ -901,11 +901,17 @@ static int do_file(char const *const fname, void *addr) /* fallthrough */ case EM_386: case EM_LOONGARCH: - case EM_RISCV: case EM_S390: case EM_X86_64: custom_sort = sort_relative_table_with_data; break; + case EM_RISCV: +#ifdef MCOUNT_SORT_ENABLED + /* RISC-V uses patchable function entries before function entry. */ + before_func = 8; +#endif + custom_sort = sort_relative_table_with_data; + break; case EM_PARISC: case EM_PPC: case EM_PPC64: From a31178c86b45006176e10ae7687586e625f116b6 Mon Sep 17 00:00:00 2001 From: Wang Han Date: Wed, 27 May 2026 20:35:24 +0800 Subject: [PATCH 2/8] riscv: stacktrace: Add frame record metadata Reliable frame-pointer unwinding needs an explicit way to identify exception boundaries and the final entry frame. The existing unwinder infers those boundaries from return addresses, which is too loose for a future reliable unwinder. Add a small metadata frame record to pt_regs and initialize it on exception entry, kernel thread fork, user fork, and early idle task setup. The record uses a zero {fp, ra} sentinel plus a type field so a later unwinder can distinguish a final user-to-kernel boundary from a nested kernel pt_regs boundary. This follows the arm64 metadata frame-record model, adapted to the RISC-V {fp, ra} frame record convention. The metadata is established at the RISC-V entry boundaries that need an explicit unwind marker: * exception entry clears the metadata {fp, ra} pair and uses SPP (or MPP in M-mode) to record whether the pt_regs frame is the final user-to-kernel boundary or a nested kernel boundary; * _start_kernel builds the init task's final metadata record, while the secondary CPU path sets up s0 before smp_callin() so idle-task unwinding does not inherit an undefined caller frame; * copy_thread creates matching final metadata records for new kernel and user tasks, and keeps s0 available for the frame-pointer chain; * call_on_irq_stack still reserves an aligned stack slot, but links the saved {fp, ra} with the raw frame-record size so s0 points at the RISC-V frame record rather than past the alignment padding. These changes keep s0 reserved for the frame-pointer chain at task and stack-switch boundaries. Signed-off-by: Wang Han Signed-off-by: Linux RISC-V bot --- arch/riscv/include/asm/ptrace.h | 9 ++++ arch/riscv/include/asm/stacktrace/frame.h | 53 +++++++++++++++++++++++ arch/riscv/kernel/asm-offsets.c | 4 ++ arch/riscv/kernel/entry.S | 30 +++++++++++-- arch/riscv/kernel/head.S | 23 ++++++++++ arch/riscv/kernel/process.c | 31 ++++++++++++- 6 files changed, 144 insertions(+), 6 deletions(-) create mode 100644 arch/riscv/include/asm/stacktrace/frame.h diff --git a/arch/riscv/include/asm/ptrace.h b/arch/riscv/include/asm/ptrace.h index addc8188152f7f..4b9b0f2792143b 100644 --- a/arch/riscv/include/asm/ptrace.h +++ b/arch/riscv/include/asm/ptrace.h @@ -8,6 +8,7 @@ #include #include +#include #include #ifndef __ASSEMBLER__ @@ -53,6 +54,14 @@ struct pt_regs { unsigned long cause; /* a0 value before the syscall */ unsigned long orig_a0; + + /* + * This frame record is entirely zeroed on exception entry, allowing the + * unwinder to identify exception boundaries. The type field encodes + * whether the exception was taken from user (FINAL) or kernel (PT_REGS) + * mode. + */ + struct frame_record_meta stackframe; }; #define PTRACE_SYSEMU 0x1f diff --git a/arch/riscv/include/asm/stacktrace/frame.h b/arch/riscv/include/asm/stacktrace/frame.h new file mode 100644 index 00000000000000..5720a6c65fe882 --- /dev/null +++ b/arch/riscv/include/asm/stacktrace/frame.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_RISCV_STACKTRACE_FRAME_H +#define __ASM_RISCV_STACKTRACE_FRAME_H + +/* + * See: arch/arm64/include/asm/stacktrace/frame.h for the reference + * implementation. + */ + +/* + * - FRAME_META_TYPE_NONE + * + * This value is reserved. + * + * - FRAME_META_TYPE_FINAL + * + * The record is the last entry on the stack. + * Unwinding should terminate successfully. + * + * - FRAME_META_TYPE_PT_REGS + * + * The record is embedded within a struct pt_regs, recording the registers at + * an arbitrary point in time. + * Unwinding should consume pt_regs::epc, followed by pt_regs::ra. + * + * Note: all other values are reserved and should result in unwinding + * terminating with an error. + */ +#define FRAME_META_TYPE_NONE 0 +#define FRAME_META_TYPE_FINAL 1 +#define FRAME_META_TYPE_PT_REGS 2 + +#ifndef __ASSEMBLER__ +/* + * A standard RISC-V frame record. + */ +struct frame_record { + unsigned long fp; + unsigned long ra; +}; + +/* + * A metadata frame record indicating a special unwind. + * The record::{fp,ra} fields must be zero to indicate the presence of + * metadata. + */ +struct frame_record_meta { + struct frame_record record; + unsigned long type; +}; +#endif /* __ASSEMBLER__ */ + +#endif /* __ASM_RISCV_STACKTRACE_FRAME_H */ diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index af827448a609e6..8dfcb5a44bb86f 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -131,6 +131,9 @@ void asm_offsets(void) OFFSET(PT_BADADDR, pt_regs, badaddr); OFFSET(PT_CAUSE, pt_regs, cause); + DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe)); + DEFINE(S_STACKFRAME_TYPE, offsetof(struct pt_regs, stackframe.type)); + OFFSET(SUSPEND_CONTEXT_REGS, suspend_context, regs); OFFSET(HIBERN_PBE_ADDR, pbe, address); @@ -501,6 +504,7 @@ void asm_offsets(void) OFFSET(SBI_HART_BOOT_STACK_PTR_OFFSET, sbi_hart_boot_data, stack_ptr); DEFINE(STACKFRAME_SIZE_ON_STACK, ALIGN(sizeof(struct stackframe), STACK_ALIGN)); + DEFINE(STACKFRAME_RECORD_SIZE, sizeof(struct stackframe)); OFFSET(STACKFRAME_FP, stackframe, fp); OFFSET(STACKFRAME_RA, stackframe, ra); #ifdef CONFIG_FUNCTION_TRACER diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index d011fb51c59a04..9cae0e1eba1c69 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -193,6 +194,27 @@ SYM_CODE_START(handle_exception) REG_S s4, PT_CAUSE(sp) REG_S s5, PT_TP(sp) + /* + * Create a metadata frame record. The unwinder will use this to + * identify and unwind exception boundaries. + */ + REG_S zero, (S_STACKFRAME + STACKFRAME_FP)(sp) /* stackframe.record.fp = 0 */ + REG_S zero, (S_STACKFRAME + STACKFRAME_RA)(sp) /* stackframe.record.ra = 0 */ +#ifdef CONFIG_RISCV_M_MODE + li t0, SR_MPP + and t0, s1, t0 +#else + andi t0, s1, SR_SPP +#endif + bnez t0, 1f + li t0, FRAME_META_TYPE_FINAL + j 2f +1: + li t0, FRAME_META_TYPE_PT_REGS +2: + REG_S t0, S_STACKFRAME_TYPE(sp) + addi s0, sp, S_STACKFRAME + STACKFRAME_RECORD_SIZE + /* * Set the scratch register to 0, so that if a recursive exception * occurs, the exception vector knows it came from the kernel @@ -357,8 +379,8 @@ ASM_NOKPROBE(handle_kernel_stack_overflow) SYM_CODE_START(ret_from_fork_kernel_asm) call schedule_tail - move a0, s1 /* fn_arg */ - move a1, s0 /* fn */ + move a0, s3 /* fn_arg */ + move a1, s2 /* fn */ move a2, sp /* pt_regs */ call ret_from_fork_kernel j ret_from_exception @@ -383,7 +405,7 @@ SYM_FUNC_START(call_on_irq_stack) addi sp, sp, -STACKFRAME_SIZE_ON_STACK REG_S ra, STACKFRAME_RA(sp) REG_S s0, STACKFRAME_FP(sp) - addi s0, sp, STACKFRAME_SIZE_ON_STACK + addi s0, sp, STACKFRAME_RECORD_SIZE /* Switch to the per-CPU shadow call stack */ scs_save_current @@ -399,7 +421,7 @@ SYM_FUNC_START(call_on_irq_stack) scs_load_current /* Switch back to the thread stack and restore ra and s0 */ - addi sp, s0, -STACKFRAME_SIZE_ON_STACK + addi sp, s0, -STACKFRAME_RECORD_SIZE REG_L ra, STACKFRAME_RA(sp) REG_L s0, STACKFRAME_FP(sp) addi sp, sp, STACKFRAME_SIZE_ON_STACK diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index f6a8ca49e6277c..00e16a24f1495b 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -14,6 +14,7 @@ #include #include #include +#include #include #include "efi-header.S" @@ -177,6 +178,14 @@ secondary_start_sbi: REG_S a0, (a1) 1: #endif + + /* + * Set up the frame pointer for the secondary idle task so reliable + * stack unwinding terminates at the metadata frame in task_pt_regs(). + * Without this, the first frame records can inherit an undefined caller + * fp and unwind past smp_callin() into .Lsecondary_park. + */ + addi s0, sp, S_STACKFRAME + STACKFRAME_RECORD_SIZE scs_load_current call smp_callin #endif /* CONFIG_SMP */ @@ -305,6 +314,20 @@ SYM_CODE_START(_start_kernel) la tp, init_task la sp, init_thread_union + THREAD_SIZE addi sp, sp, -PT_SIZE_ON_STACK + + /* + * Set up a metadata frame record for the init task so that + * the unwinder can identify the outermost frame by its + * {fp, ra} = {0, 0} sentinel at the bottom of pt_regs. + * fp/s0 points above the metadata record (RISC-V + * convention). + */ + REG_S zero, (S_STACKFRAME + STACKFRAME_FP)(sp) + REG_S zero, (S_STACKFRAME + STACKFRAME_RA)(sp) + li t0, FRAME_META_TYPE_FINAL + REG_S t0, S_STACKFRAME_TYPE(sp) + addi s0, sp, S_STACKFRAME + STACKFRAME_RECORD_SIZE + #if defined(CONFIG_RISCV_SBI) && defined(CONFIG_RISCV_USER_CFI) li a7, SBI_EXT_FWFT li a6, SBI_EXT_FWFT_SET diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index b2df7f72241a5f..5212926b926ba1 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -258,8 +258,23 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) /* Supervisor/Machine, irqs on: */ childregs->status = SR_PP | SR_PIE; - p->thread.s[0] = (unsigned long)args->fn; - p->thread.s[1] = (unsigned long)args->fn_arg; + /* + * Set up a metadata frame record at the bottom of the + * stack for the unwinder. Use FRAME_META_TYPE_FINAL + * since this is the outermost kernel entry for the new + * task. The frame_record::{fp,ra} are already zero from + * memset(). + * + * fp/s0 points above the metadata record (RISC-V + * convention). fn and fn_arg are passed via s2/s3, + * keeping s0 available for the frame pointer chain. + */ + childregs->stackframe.type = FRAME_META_TYPE_FINAL; + + p->thread.s[0] = (unsigned long)(&childregs->stackframe) + + sizeof(struct frame_record); + p->thread.s[2] = (unsigned long)args->fn; + p->thread.s[3] = (unsigned long)args->fn_arg; p->thread.ra = (unsigned long)ret_from_fork_kernel_asm; } else { /* allocate new shadow stack if needed. In case of CLONE_VM we have to */ @@ -278,6 +293,18 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) if (clone_flags & CLONE_SETTLS) childregs->tp = tls; childregs->a0 = 0; /* Return value of fork() */ + + /* + * Set up the unwind boundary: ensure the metadata + * frame record has its {fp,ra} sentinel zeroed and + * point fp/s0 above the metadata record. The type + * field is inherited from the parent's pt_regs. + */ + childregs->stackframe.record.fp = 0; + childregs->stackframe.record.ra = 0; + p->thread.s[0] = (unsigned long)(&childregs->stackframe) + + sizeof(struct frame_record); + p->thread.ra = (unsigned long)ret_from_fork_user_asm; } p->thread.riscv_v_flags = 0; From 8851db48124ff0c6a95c37af73dc4a31eba74315 Mon Sep 17 00:00:00 2001 From: Wang Han Date: Wed, 27 May 2026 20:35:25 +0800 Subject: [PATCH 3/8] riscv: stacktrace: disable KASAN instrumentation for stacktrace.o KASAN records stack traces for every alloc/free, which means it walks the unwinder very frequently. Instrumenting the stack trace collection code itself adds substantial overhead and makes the traces themselves noisier. Mark stacktrace.o as not KASAN-instrumented, matching the arm, arm64 and x86 treatment of their stack unwinding code. This is a prerequisite preference for the upcoming reliable unwinder, but the change is valid on its own. Signed-off-by: Wang Han Signed-off-by: Linux RISC-V bot --- arch/riscv/kernel/Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index cabb99cadfb6d1..1cb6c9ab298173 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -44,6 +44,11 @@ CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_sbi_ecall.o = $(CC_FLAGS_FTRACE) endif +# When KASAN is enabled, a stack trace is recorded for every alloc/free, which +# can significantly impact performance. Avoid instrumenting the stack trace +# collection code to minimize this impact. +KASAN_SANITIZE_stacktrace.o := n + always-$(KBUILD_BUILTIN) += vmlinux.lds obj-y += head.o From 2e35e2e25bd00a252ebc4b087587c0269ed7ee77 Mon Sep 17 00:00:00 2001 From: Wang Han Date: Wed, 27 May 2026 20:35:26 +0800 Subject: [PATCH 4/8] riscv: ftrace: always preserve s0 in dynamic ftrace register frame The dynamic ftrace entry/exit only saved s0 (the architectural frame pointer) when HAVE_FUNCTION_GRAPH_FP_TEST was selected. The upcoming reliable frame-pointer unwinder needs s0 to be present in ftrace_regs unconditionally so it can use the frame pointer as the function-graph return-address cookie regardless of FP_TEST. Save and restore s0 unconditionally in the dynamic ftrace ABI register frame. The cost is one extra REG_S/REG_L pair per traced call, which is negligible compared to the overall ftrace cost; the benefit is a consistent ftrace_regs layout for the unwinder. Signed-off-by: Wang Han Signed-off-by: Linux RISC-V bot --- arch/riscv/kernel/mcount-dyn.S | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S index 082fe0b0e3c083..26c55fba8fec7a 100644 --- a/arch/riscv/kernel/mcount-dyn.S +++ b/arch/riscv/kernel/mcount-dyn.S @@ -85,9 +85,7 @@ addi sp, sp, -FREGS_SIZE_ON_STACK REG_S t0, FREGS_EPC(sp) REG_S x1, FREGS_RA(sp) -#ifdef HAVE_FUNCTION_GRAPH_FP_TEST REG_S x8, FREGS_S0(sp) -#endif REG_S x6, FREGS_T1(sp) #ifdef CONFIG_CC_IS_CLANG REG_S x7, FREGS_T2(sp) @@ -113,9 +111,7 @@ .macro RESTORE_ABI_REGS REG_L t0, FREGS_EPC(sp) REG_L x1, FREGS_RA(sp) -#ifdef HAVE_FUNCTION_GRAPH_FP_TEST REG_L x8, FREGS_S0(sp) -#endif REG_L x6, FREGS_T1(sp) #ifdef CONFIG_CC_IS_CLANG REG_L x7, FREGS_T2(sp) From 44604357cf9dfa66c38f03a951ef05a0dca70f16 Mon Sep 17 00:00:00 2001 From: Wang Han Date: Wed, 27 May 2026 20:35:27 +0800 Subject: [PATCH 5/8] riscv: stacktrace: introduce stack-bound tracking helpers A reliable unwinder needs to validate that every frame record it reads is fully contained in a known kernel stack, and it needs to refuse to walk back into a stack it has already left. Add the building blocks for that: * struct stack_info / struct unwind_state in a new asm/stacktrace/common.h, modelled on the arm64 reference implementation. * stackinfo_get_irq() / stackinfo_get_task() / stackinfo_get_overflow() plus the corresponding on_*_stack() predicates in asm/stacktrace.h, so callers can ask "is this object on stack X?" by stack kind rather than open-coded address arithmetic. * unwind_init_common(), unwind_find_stack() and unwind_consume_stack() helpers that enforce the forward-progress-only invariant required for reliability. No existing user is wired up to these helpers in this commit; the unwinder switch comes in a follow-up. The header changes leave on_thread_stack() with the same semantics as before, just expressed in terms of the new helpers. Signed-off-by: Wang Han Signed-off-by: Linux RISC-V bot --- arch/riscv/include/asm/stacktrace.h | 65 ++++++++- arch/riscv/include/asm/stacktrace/common.h | 159 +++++++++++++++++++++ 2 files changed, 222 insertions(+), 2 deletions(-) create mode 100644 arch/riscv/include/asm/stacktrace/common.h diff --git a/arch/riscv/include/asm/stacktrace.h b/arch/riscv/include/asm/stacktrace.h index b1495a7e06ce69..bc87c49403798b 100644 --- a/arch/riscv/include/asm/stacktrace.h +++ b/arch/riscv/include/asm/stacktrace.h @@ -3,8 +3,13 @@ #ifndef _ASM_RISCV_STACKTRACE_H #define _ASM_RISCV_STACKTRACE_H +#include #include +#include + +#include #include +#include struct stackframe { unsigned long fp; @@ -16,14 +21,70 @@ extern void notrace walk_stackframe(struct task_struct *task, struct pt_regs *re extern void dump_backtrace(struct pt_regs *regs, struct task_struct *task, const char *loglvl); -static inline bool on_thread_stack(void) +/* + * IRQ stack accessors + */ +static inline struct stack_info stackinfo_get_irq(void) +{ + unsigned long low = (unsigned long)raw_cpu_read(irq_stack_ptr); + unsigned long high = low + IRQ_STACK_SIZE; + + return (struct stack_info) { + .low = low, + .high = high, + }; +} + +static inline bool on_irq_stack(unsigned long sp, unsigned long size) +{ + struct stack_info info = stackinfo_get_irq(); + + return stackinfo_on_stack(&info, sp, size); +} + +/* + * Task stack accessors + */ +static inline struct stack_info stackinfo_get_task(const struct task_struct *tsk) { - return !(((unsigned long)(current->stack) ^ current_stack_pointer) & ~(THREAD_SIZE - 1)); + unsigned long low = (unsigned long)task_stack_page(tsk); + unsigned long high = low + THREAD_SIZE; + + return (struct stack_info) { + .low = low, + .high = high, + }; +} + +static inline bool on_task_stack(const struct task_struct *tsk, + unsigned long sp, unsigned long size) +{ + struct stack_info info = stackinfo_get_task(tsk); + + return stackinfo_on_stack(&info, sp, size); } +/* + * Cast is necessary since current->stack is an opaque ptr. + */ +#define on_thread_stack() (on_task_stack(current, current_stack_pointer, 1)) +/* + * Overflow stack accessors + */ #ifdef CONFIG_VMAP_STACK DECLARE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack); + +static inline struct stack_info stackinfo_get_overflow(void) +{ + unsigned long low = (unsigned long)raw_cpu_ptr(overflow_stack); + unsigned long high = low + OVERFLOW_STACK_SIZE; + + return (struct stack_info) { + .low = low, + .high = high, + }; +} #endif /* CONFIG_VMAP_STACK */ #endif /* _ASM_RISCV_STACKTRACE_H */ diff --git a/arch/riscv/include/asm/stacktrace/common.h b/arch/riscv/include/asm/stacktrace/common.h new file mode 100644 index 00000000000000..87d6d40672f357 --- /dev/null +++ b/arch/riscv/include/asm/stacktrace/common.h @@ -0,0 +1,159 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * RISC-V common stack unwinder types and helpers. + * + * See: arch/arm64/include/asm/stacktrace/common.h for the reference + * implementation. + * + * Copyright (C) 2024 + */ +#ifndef __ASM_RISCV_STACKTRACE_COMMON_H +#define __ASM_RISCV_STACKTRACE_COMMON_H + +#include +#include +#include + +#include + +/** + * struct stack_info - describes the bounds of a stack. + * + * @low: The lowest valid address on the stack. + * @high: The highest valid address on the stack. + */ +struct stack_info { + unsigned long low; + unsigned long high; +}; + +/** + * struct unwind_state - state used for robust unwinding. + * + * @fp: The fp value in the frame record (or the real fp). + * @pc: The ra value in the frame record (or the real ra). + * + * @stack: The stack currently being unwound. + * @stacks: An array of stacks which can be unwound. + * @nr_stacks: The number of stacks in @stacks. + */ +struct unwind_state { + unsigned long fp; + unsigned long pc; + + struct stack_info stack; + struct stack_info *stacks; + int nr_stacks; +}; + +/** + * stackinfo_get_unknown() - Get an unknown stack_info. + * + * Return: a stack_info with low and high set to 0. + */ +static inline struct stack_info stackinfo_get_unknown(void) +{ + return (struct stack_info) { + .low = 0, + .high = 0, + }; +} + +/** + * stackinfo_on_stack() - Check whether an object is fully within a stack. + * + * @info: The stack to check against. + * @sp: The base address of the object. + * @size: The size of the object. + * + * Return: true if the object is fully contained within the stack. + */ +static inline bool stackinfo_on_stack(const struct stack_info *info, + unsigned long sp, unsigned long size) +{ + if (!info->low) + return false; + + if (sp < info->low || sp + size < sp || sp + size > info->high) + return false; + + return true; +} + +/** + * unwind_init_common() - Initialize the common parts of the unwind state. + * + * @state: the unwind state to initialize. + */ +static inline void unwind_init_common(struct unwind_state *state) +{ + state->stack = stackinfo_get_unknown(); +} + +/** + * unwind_find_stack() - Find the accessible stack which entirely contains an + * object. + * + * @state: the current unwind state. + * @sp: the base address of the object. + * @size: the size of the object. + * + * Return: a pointer to the relevant stack_info if found; NULL otherwise. + */ +static inline struct stack_info *unwind_find_stack(struct unwind_state *state, + unsigned long sp, + unsigned long size) +{ + struct stack_info *info = &state->stack; + + if (stackinfo_on_stack(info, sp, size)) + return info; + + for (int i = 0; i < state->nr_stacks; i++) { + info = &state->stacks[i]; + if (stackinfo_on_stack(info, sp, size)) + return info; + } + + return NULL; +} + +/** + * unwind_consume_stack() - Update stack boundaries so that future unwind steps + * cannot consume this object again. + * + * @state: the current unwind state. + * @info: the stack_info of the stack containing the object. + * @sp: the base address of the object. + * @size: the size of the object. + * + * Stack transitions are strictly one-way, and once we've + * transitioned from one stack to another, it's never valid to + * unwind back to the old stack. + * + * Note that stacks can nest in several valid orders, e.g. + * + * TASK -> IRQ -> OVERFLOW + * + * ... so we do not check the specific order of stack + * transitions. + */ +static inline void unwind_consume_stack(struct unwind_state *state, + struct stack_info *info, + unsigned long sp, + unsigned long size) +{ + struct stack_info tmp; + + tmp = *info; + *info = stackinfo_get_unknown(); + state->stack = tmp; + + /* + * Future unwind steps can only consume stack above this frame record. + * Update the current stack to start immediately above it. + */ + state->stack.low = sp + size; +} + +#endif /* __ASM_RISCV_STACKTRACE_COMMON_H */ From c8870e599af887172091f33a20d52a4cf5a09e8b Mon Sep 17 00:00:00 2001 From: Wang Han Date: Wed, 27 May 2026 20:35:28 +0800 Subject: [PATCH 6/8] riscv: stacktrace: switch to frame-pointer based unwinder Replace the open-coded frame-pointer walker in arch_stack_walk() with a robust kunwind state machine, modelled on arch/arm64/kernel/stacktrace.c and retargeted to the RISC-V {fp, ra} frame record convention. The new walker tracks stack bounds, consumes frame records monotonically, understands the metadata pt_regs records added in the previous frame record metadata patch, and recovers return addresses replaced by function graph tracing and kretprobes. This commit introduces arch_stack_walk_reliable() but does not yet select HAVE_RELIABLE_STACKTRACE; that is done in a follow-up Kconfig patch so this commit can be reviewed and bisected as a pure unwinder replacement. Until that Kconfig change lands, livepatch is not yet enabled and arch_stack_walk_reliable() has no in-tree caller. Three related callers are updated to keep the same frame-record assumptions everywhere: * Function graph tracing: the old RISC-V unwinder matched function graph return-stack entries by the saved return-address slot. That was consistent with the static mcount path, but not with the dynamic ftrace path where the parent slot is ftrace_regs::ra. Use the architectural frame pointer as the function graph return-address cookie, matching the kunwind walker. * Perf callchains: route kernel callchain collection through arch_stack_walk() so perf sees the same frame-pointer unwind behaviour as dump_stack() and the upcoming livepatch path. * dump_backtrace() / __get_wchan() / show_stack(): these now go through arch_stack_walk(); the explicit "Call Trace:" header is moved into dump_backtrace() to preserve the original output. The non-frame-pointer fallback walker is kept untouched for !CONFIG_FRAME_POINTER builds. Signed-off-by: Wang Han Signed-off-by: Linux RISC-V bot --- arch/riscv/kernel/ftrace.c | 6 +- arch/riscv/kernel/perf_callchain.c | 2 +- arch/riscv/kernel/stacktrace.c | 560 ++++++++++++++++++++++++----- 3 files changed, 472 insertions(+), 96 deletions(-) diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index b430edfb83f4c8..5d55199a9230e1 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -242,7 +242,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, */ old = *parent; - if (!function_graph_enter(old, self_addr, frame_pointer, parent)) + if (!function_graph_enter(old, self_addr, frame_pointer, + (void *)frame_pointer)) *parent = return_hooker; } @@ -264,7 +265,8 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, */ old = *parent; - if (!function_graph_enter_regs(old, ip, frame_pointer, parent, fregs)) + if (!function_graph_enter_regs(old, ip, frame_pointer, + (void *)frame_pointer, fregs)) *parent = return_hooker; } #endif /* CONFIG_DYNAMIC_FTRACE */ diff --git a/arch/riscv/kernel/perf_callchain.c b/arch/riscv/kernel/perf_callchain.c index b465bc9eb870ec..436af96ea59cab 100644 --- a/arch/riscv/kernel/perf_callchain.c +++ b/arch/riscv/kernel/perf_callchain.c @@ -44,5 +44,5 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, return; } - walk_stackframe(NULL, regs, fill_callchain, entry); + arch_stack_walk(fill_callchain, entry, NULL, regs); } diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 2692d3a06afa2b..0d76320b3a2973 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -11,98 +11,16 @@ #include #include #include +#include +#include #include -#ifdef CONFIG_FRAME_POINTER - /* - * This disables KASAN checking when reading a value from another task's stack, - * since the other task could be running on another CPU and could have poisoned - * the stack in the meantime. + * Non-frame-pointer fallback unwinder. + * Only compiled when CONFIG_FRAME_POINTER is not enabled. */ -#define READ_ONCE_TASK_STACK(task, x) \ -({ \ - unsigned long val; \ - unsigned long addr = x; \ - if ((task) == current) \ - val = READ_ONCE(addr); \ - else \ - val = READ_ONCE_NOCHECK(addr); \ - val; \ -}) - -extern asmlinkage void handle_exception(void); -extern unsigned long ret_from_exception_end; - -static inline int fp_is_valid(unsigned long fp, unsigned long sp) -{ - unsigned long low, high; - - low = sp + sizeof(struct stackframe); - high = ALIGN(sp, THREAD_SIZE); - - return !(fp < low || fp > high || fp & 0x07); -} - -void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, - bool (*fn)(void *, unsigned long), void *arg) -{ - unsigned long fp, sp, pc; - int graph_idx = 0; - int level = 0; - - if (regs) { - fp = frame_pointer(regs); - sp = user_stack_pointer(regs); - pc = instruction_pointer(regs); - } else if (task == NULL || task == current) { - fp = (unsigned long)__builtin_frame_address(0); - sp = current_stack_pointer; - pc = (unsigned long)walk_stackframe; - level = -1; - } else { - /* task blocked in __switch_to */ - fp = task->thread.s[0]; - sp = task->thread.sp; - pc = task->thread.ra; - } - - for (;;) { - struct stackframe *frame; - - if (unlikely(!__kernel_text_address(pc) || (level++ >= 0 && !fn(arg, pc)))) - break; - - if (unlikely(!fp_is_valid(fp, sp))) - break; - - /* Unwind stack frame */ - frame = (struct stackframe *)fp - 1; - sp = fp; - if (regs && (regs->epc == pc) && fp_is_valid(frame->ra, sp)) { - /* We hit function where ra is not saved on the stack */ - fp = frame->ra; - pc = regs->ra; - } else { - fp = READ_ONCE_TASK_STACK(task, frame->fp); - pc = READ_ONCE_TASK_STACK(task, frame->ra); - pc = ftrace_graph_ret_addr(task, &graph_idx, pc, - &frame->ra); - if (pc >= (unsigned long)handle_exception && - pc < (unsigned long)&ret_from_exception_end) { - if (unlikely(!fn(arg, pc))) - break; - - pc = ((struct pt_regs *)sp)->epc; - fp = ((struct pt_regs *)sp)->s0; - } - } - - } -} - -#else /* !CONFIG_FRAME_POINTER */ +#ifndef CONFIG_FRAME_POINTER void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, bool (*fn)(void *, unsigned long), void *arg) @@ -133,7 +51,12 @@ void notrace walk_stackframe(struct task_struct *task, } } -#endif /* CONFIG_FRAME_POINTER */ +#endif /* !CONFIG_FRAME_POINTER */ + +/* + * Common trace helpers. + * These are used by both the FP (kunwind) and non-FP (walk_stackframe) paths. + */ static bool print_trace_address(void *arg, unsigned long pc) { @@ -146,12 +69,12 @@ static bool print_trace_address(void *arg, unsigned long pc) noinline void dump_backtrace(struct pt_regs *regs, struct task_struct *task, const char *loglvl) { - walk_stackframe(task, regs, print_trace_address, (void *)loglvl); + printk("%sCall Trace:\n", loglvl); + arch_stack_walk(print_trace_address, (void *)loglvl, task, regs); } void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) { - pr_cont("%sCall Trace:\n", loglvl); dump_backtrace(NULL, task, loglvl); } @@ -171,17 +94,468 @@ unsigned long __get_wchan(struct task_struct *task) if (!try_get_task_stack(task)) return 0; - walk_stackframe(task, NULL, save_wchan, &pc); + arch_stack_walk(save_wchan, &pc, task, NULL); put_task_stack(task); return pc; } -noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, - struct task_struct *task, struct pt_regs *regs) +/* + * Frame-pointer-based kernel unwind infrastructure. + * Only compiled when CONFIG_FRAME_POINTER is enabled. + * + * See: arch/arm64/kernel/stacktrace.c for the reference implementation. + */ +#ifdef CONFIG_FRAME_POINTER + +/* + * Per-cpu stacks are only accessible when unwinding the current task in a + * non-preemptible context. + */ +#define STACKINFO_CPU(task, name) \ + ({ \ + (((task) == current) && !preemptible()) \ + ? stackinfo_get_##name() \ + : stackinfo_get_unknown(); \ + }) + +enum kunwind_source { + KUNWIND_SOURCE_UNKNOWN, + KUNWIND_SOURCE_FRAME, + KUNWIND_SOURCE_CALLER, + KUNWIND_SOURCE_TASK, + KUNWIND_SOURCE_REGS_PC, +}; + +union unwind_flags { + unsigned long all; + struct { + unsigned long fgraph : 1, + kretprobe : 1; + }; +}; + +/* + * Kernel unwind state + * + * @common: Common unwind state. + * @task: The task being unwound. + * @graph_idx: Used by ftrace_graph_ret_addr() for optimized stack unwinding. + * @kr_cur: When KRETPROBES is selected, holds the kretprobe instance + * associated with the most recently encountered replacement ra + * value. + */ +struct kunwind_state { + struct unwind_state common; + struct task_struct *task; + int graph_idx; +#ifdef CONFIG_KRETPROBES + struct llist_node *kr_cur; +#endif + enum kunwind_source source; + union unwind_flags flags; + struct pt_regs *regs; +}; + +static __always_inline void +kunwind_init(struct kunwind_state *state, + struct task_struct *task) +{ + unwind_init_common(&state->common); + state->task = task; + state->source = KUNWIND_SOURCE_UNKNOWN; + state->flags.all = 0; + state->regs = NULL; +} + +/* + * Start an unwind from a pt_regs. + * + * The unwind will begin at the PC within the regs. + * + * The regs must be on a stack currently owned by the calling task. + */ +static __always_inline void +kunwind_init_from_regs(struct kunwind_state *state, + struct pt_regs *regs) +{ + kunwind_init(state, current); + + state->regs = regs; + state->common.fp = frame_pointer(regs); + state->common.pc = instruction_pointer(regs); + state->source = KUNWIND_SOURCE_REGS_PC; +} + +/* + * Start an unwind from a caller. + * + * The unwind will begin at the caller of whichever function this is inlined + * into. + * + * The function which invokes this must be noinline. + */ +static __always_inline void +kunwind_init_from_caller(struct kunwind_state *state) +{ + unsigned long fp = (unsigned long)__builtin_frame_address(0); + struct frame_record *record = (struct frame_record *)fp - 1; + + kunwind_init(state, current); + + state->common.fp = READ_ONCE(record->fp); + state->common.pc = READ_ONCE(record->ra); + state->source = KUNWIND_SOURCE_CALLER; +} + +/* + * Start an unwind from a blocked task. + * + * The unwind will begin at the blocked task's saved PC (i.e. the caller of + * __switch_to). + * + * The caller should ensure the task is blocked in __switch_to for the + * duration of the unwind, or the unwind will be bogus. It is never valid to + * call this for the current task. + */ +static __always_inline void +kunwind_init_from_task(struct kunwind_state *state, + struct task_struct *task) +{ + kunwind_init(state, task); + + state->common.fp = task->thread.s[0]; + state->common.pc = task->thread.ra; + state->source = KUNWIND_SOURCE_TASK; +} + +static __always_inline int +kunwind_recover_return_address(struct kunwind_state *state) +{ +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + if (state->task->ret_stack && + state->common.pc == (unsigned long)return_to_handler) { + unsigned long orig_pc; + + orig_pc = ftrace_graph_ret_addr(state->task, &state->graph_idx, + state->common.pc, + (void *)state->common.fp); + if (state->common.pc == orig_pc) { + WARN_ON_ONCE(state->task == current); + return -EINVAL; + } + state->common.pc = orig_pc; + state->flags.fgraph = 1; + } +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + +#ifdef CONFIG_KRETPROBES + if (is_kretprobe_trampoline(state->common.pc)) { + unsigned long orig_pc; + + orig_pc = kretprobe_find_ret_addr(state->task, + (void *)state->common.fp, + &state->kr_cur); + if (!orig_pc) + return -EINVAL; + state->common.pc = orig_pc; + state->flags.kretprobe = 1; + } +#endif /* CONFIG_KRETPROBES */ + + return 0; +} + +/* + * When we reach an exception boundary marked by a metadata frame record, + * extract pt_regs from the stack and continue unwinding from the saved + * context (epc and s0/fp). + * + * On RISC-V, fp points above the metadata record, so the record's + * frame_record portion is at fp - sizeof(struct frame_record). + */ +static __always_inline int +kunwind_next_regs_pc(struct kunwind_state *state) +{ + struct stack_info *info; + unsigned long fp = state->common.fp; + struct pt_regs *regs; + + regs = container_of((unsigned long *)(fp - sizeof(struct frame_record)), + struct pt_regs, stackframe.record.fp); + + info = unwind_find_stack(&state->common, (unsigned long)regs, + sizeof(*regs)); + if (!info) + return -EINVAL; + + unwind_consume_stack(&state->common, info, (unsigned long)regs, + sizeof(*regs)); + + state->regs = regs; + state->common.pc = regs->epc; + state->common.fp = frame_pointer(regs); + state->regs = NULL; + state->source = KUNWIND_SOURCE_REGS_PC; + return 0; +} + +/* + * Handle a metadata frame record embedded in pt_regs. + * + * On RISC-V, fp points above the record (fp = metadata + 16), so the + * frame_record_meta starts at fp - sizeof(struct frame_record). + * + * FRAME_META_TYPE_FINAL: This is the outermost exception entry + * (user -> kernel). Unwinding terminates successfully. + * FRAME_META_TYPE_PT_REGS: This is a nested exception entry + * (kernel -> kernel). Continue unwinding from the saved context. + */ +static __always_inline int +kunwind_next_frame_record_meta(struct kunwind_state *state) +{ + struct task_struct *tsk = state->task; + unsigned long fp = state->common.fp; + unsigned long meta_base = fp - sizeof(struct frame_record); + struct frame_record_meta *meta; + struct stack_info *info; + + info = unwind_find_stack(&state->common, meta_base, sizeof(*meta)); + if (!info) + return -EINVAL; + + meta = (struct frame_record_meta *)meta_base; + switch (READ_ONCE(meta->type)) { + case FRAME_META_TYPE_FINAL: + if (meta == &task_pt_regs(tsk)->stackframe) + return -ENOENT; + WARN_ON_ONCE(tsk == current); + return -EINVAL; + case FRAME_META_TYPE_PT_REGS: + return kunwind_next_regs_pc(state); + default: + WARN_ON_ONCE(tsk == current); + return -EINVAL; + } +} + +/* + * Unwind from one frame record to the next. + * + * On RISC-V, the frame record sits at fp - sizeof(struct frame_record), + * immediately below the address pointed to by fp/s0. This applies to both + * normal frame records and metadata frame records (embedded in pt_regs). + * + * A metadata record is identified by both fp and ra being zero in the + * frame_record portion, with a type value following at fp + 16. + */ +static __always_inline int +kunwind_next_frame_record(struct kunwind_state *state) +{ + unsigned long fp = state->common.fp; + struct frame_record *record; + struct stack_info *info; + unsigned long new_fp, new_pc; + unsigned long record_base; + + if (fp & 0x7) + return -EINVAL; + + record_base = fp - sizeof(*record); + + info = unwind_find_stack(&state->common, record_base, sizeof(*record)); + if (!info) + return -EINVAL; + + record = (struct frame_record *)record_base; + new_fp = READ_ONCE(record->fp); + new_pc = READ_ONCE(record->ra); + + if (!new_fp && !new_pc) + return kunwind_next_frame_record_meta(state); + + unwind_consume_stack(&state->common, info, record_base, + sizeof(*record)); + + state->common.fp = new_fp; + state->common.pc = new_pc; + state->source = KUNWIND_SOURCE_FRAME; + + return 0; +} + +/* + * Unwind from one frame record (A) to the next frame record (B). + * + * We terminate early if the location of B indicates a malformed chain of frame + * records (e.g. a cycle), determined based on the location and fp value of A + * and the location (but not the fp value) of B. + */ +static __always_inline int +kunwind_next(struct kunwind_state *state) +{ + int err; + + state->flags.all = 0; + + switch (state->source) { + case KUNWIND_SOURCE_FRAME: + case KUNWIND_SOURCE_CALLER: + case KUNWIND_SOURCE_TASK: + case KUNWIND_SOURCE_REGS_PC: + err = kunwind_next_frame_record(state); + break; + default: + err = -EINVAL; + } + + if (err) + return err; + + return kunwind_recover_return_address(state); +} + +typedef bool (*kunwind_consume_fn)(const struct kunwind_state *state, void *cookie); + +static __always_inline int +do_kunwind(struct kunwind_state *state, kunwind_consume_fn consume_state, + void *cookie) +{ + int ret; + + ret = kunwind_recover_return_address(state); + if (ret) + return ret; + + while (1) { + if (!consume_state(state, cookie)) + return -EINVAL; + ret = kunwind_next(state); + if (ret == -ENOENT) + return 0; + if (ret < 0) + return ret; + } +} + +static __always_inline int +kunwind_stack_walk(kunwind_consume_fn consume_state, + void *cookie, struct task_struct *task, + struct pt_regs *regs) +{ + struct task_struct *tsk = task ?: current; + struct stack_info stacks[] = { + stackinfo_get_task(tsk), + STACKINFO_CPU(tsk, irq), +#ifdef CONFIG_VMAP_STACK + STACKINFO_CPU(tsk, overflow), +#endif + }; + struct kunwind_state state = { + .common = { + .stacks = stacks, + .nr_stacks = ARRAY_SIZE(stacks), + }, + }; + + if (regs) { + if (tsk != current) + return -EINVAL; + kunwind_init_from_regs(&state, regs); + } else if (tsk == current) { + kunwind_init_from_caller(&state); + } else { + kunwind_init_from_task(&state, tsk); + } + + return do_kunwind(&state, consume_state, cookie); +} + +struct kunwind_consume_entry_data { + stack_trace_consume_fn consume_entry; + void *cookie; +}; + +static __always_inline bool +arch_kunwind_consume_entry(const struct kunwind_state *state, void *cookie) +{ + struct kunwind_consume_entry_data *data = cookie; + + return data->consume_entry(data->cookie, state->common.pc); +} + +static __always_inline bool +arch_reliable_kunwind_consume_entry(const struct kunwind_state *state, void *cookie) +{ + /* + * At an exception boundary we can reliably consume the saved PC. We do + * not know whether the LR was live when the exception was taken, and + * so we cannot perform the next unwind step reliably. + * + * All that matters is whether the *entire* unwind is reliable, so give + * up as soon as we hit an exception boundary. + */ + if (state->source == KUNWIND_SOURCE_REGS_PC) + return false; + + return arch_kunwind_consume_entry(state, cookie); +} + +#endif /* CONFIG_FRAME_POINTER */ + +/* + * arch_stack_walk - dual implementation. + * + * When CONFIG_FRAME_POINTER is enabled, uses the kunwind infrastructure for + * robust frame-pointer-based unwinding, consistent with arch_stack_walk_reliable. + * + * When CONFIG_FRAME_POINTER is disabled, falls back to the simple stack scan + * in walk_stackframe(). + */ +#ifdef CONFIG_FRAME_POINTER + +noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry, + void *cookie, struct task_struct *task, + struct pt_regs *regs) +{ + struct kunwind_consume_entry_data data = { + .consume_entry = consume_entry, + .cookie = cookie, + }; + + kunwind_stack_walk(arch_kunwind_consume_entry, &data, task, regs); +} + +#else + +noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry, + void *cookie, struct task_struct *task, + struct pt_regs *regs) { walk_stackframe(task, regs, consume_entry, cookie); } +#endif /* CONFIG_FRAME_POINTER */ + +/* + * Reliable stack walk for livepatch (CONFIG_FRAME_POINTER only). + */ +#ifdef CONFIG_FRAME_POINTER + +noinline noinstr int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, + void *cookie, + struct task_struct *task) +{ + struct kunwind_consume_entry_data data = { + .consume_entry = consume_entry, + .cookie = cookie, + }; + + return kunwind_stack_walk(arch_reliable_kunwind_consume_entry, &data, + task, NULL); +} + +#endif /* CONFIG_FRAME_POINTER */ + /* * Get the return address for a single stackframe and return a pointer to the * next frame tail. From 46cee0d74b6919ea22f77241739ce58cff18b482 Mon Sep 17 00:00:00 2001 From: Wang Han Date: Wed, 27 May 2026 20:35:29 +0800 Subject: [PATCH 7/8] riscv: Kconfig: enable HAVE_RELIABLE_STACKTRACE and HAVE_LIVEPATCH Now that the metadata frame records, the kunwind state machine and arch_stack_walk_reliable() are all in place, advertise the capability to the rest of the kernel: * select HAVE_RELIABLE_STACKTRACE under FRAME_POINTER && 64BIT, so only the configurations that actually have the metadata records and the FP-based reliable walker enable it. * select HAVE_LIVEPATCH under the same condition and source kernel/livepatch/Kconfig so the livepatch menu is reachable from the RISC-V configuration. This is split out from the unwinder change so the policy decision and the implementation can be reviewed and reverted independently. Signed-off-by: Wang Han Signed-off-by: Linux RISC-V bot --- arch/riscv/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 674044754378af..2921680d213229 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -185,6 +185,7 @@ config RISCV select HAVE_KRETPROBES # https://github.com/ClangBuiltLinux/linux/issues/1881 select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if !LD_IS_LLD + select HAVE_LIVEPATCH if FRAME_POINTER && 64BIT select HAVE_MOVE_PMD select HAVE_MOVE_PUD select HAVE_PAGE_SIZE_4KB @@ -195,6 +196,7 @@ config RISCV select HAVE_POSIX_CPU_TIMERS_TASK_WORK select HAVE_PREEMPT_DYNAMIC_KEY select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_RELIABLE_STACKTRACE if FRAME_POINTER && 64BIT select HAVE_RETHOOK select HAVE_RSEQ select HAVE_RUST if RUSTC_SUPPORTS_RISCV && CC_IS_CLANG @@ -1394,3 +1396,5 @@ endmenu # "CPU Power Management" source "arch/riscv/kvm/Kconfig" source "drivers/acpi/Kconfig" + +source "kernel/livepatch/Kconfig" From e8f9a967ba2bd13283fbb6dbf3e5fc5e39634bff Mon Sep 17 00:00:00 2001 From: Wang Han Date: Wed, 27 May 2026 20:35:30 +0800 Subject: [PATCH 8/8] selftests/livepatch: Add RISC-V syscall wrapper prefix The syscall livepatch selftest resolves and patches a syscall wrapper symbol. To use that test for RISC-V livepatch validation, add the RISC-V FN_PREFIX definition for ARCH_HAS_SYSCALL_WRAPPER. Without this macro, the syscall livepatch selftest cannot resolve the RISC-V target symbol, and the syscall-related livepatch test fails on RISC-V. Signed-off-by: Wang Han Signed-off-by: Linux RISC-V bot --- .../testing/selftests/livepatch/test_modules/test_klp_syscall.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/livepatch/test_modules/test_klp_syscall.c b/tools/testing/selftests/livepatch/test_modules/test_klp_syscall.c index dd802783ea849f..275e4b10cf5950 100644 --- a/tools/testing/selftests/livepatch/test_modules/test_klp_syscall.c +++ b/tools/testing/selftests/livepatch/test_modules/test_klp_syscall.c @@ -18,6 +18,8 @@ #define FN_PREFIX __s390x_ #elif defined(__aarch64__) #define FN_PREFIX __arm64_ +#elif defined(__riscv) +#define FN_PREFIX __riscv_ #else /* powerpc does not select ARCH_HAS_SYSCALL_WRAPPER */ #define FN_PREFIX