From 3cc0b64c9379515d65a7801bbdd69f4748387b19 Mon Sep 17 00:00:00 2001 From: Vivian Wang Date: Mon, 2 Mar 2026 10:21:30 +0800 Subject: [PATCH 1/3] riscv: mm: Rename new_vmalloc into new_valid_map_cpus In preparation of a future patch using this mechanism for non-vmalloc mappings, rename new_vmalloc into new_valid_map_cpus to avoid misleading readers. No functional change intended. Signed-off-by: Vivian Wang Signed-off-by: Linux RISC-V bot --- arch/riscv/include/asm/cacheflush.h | 6 ++--- arch/riscv/kernel/entry.S | 38 ++++++++++++++--------------- arch/riscv/mm/init.c | 2 +- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index 0092513c3376c56..b6d1a5eb7564edb 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -41,7 +41,7 @@ do { \ } while (0) #ifdef CONFIG_64BIT -extern u64 new_vmalloc[NR_CPUS / sizeof(u64) + 1]; +extern u64 new_valid_map_cpus[NR_CPUS / sizeof(u64) + 1]; extern char _end[]; #define flush_cache_vmap flush_cache_vmap static inline void flush_cache_vmap(unsigned long start, unsigned long end) @@ -54,8 +54,8 @@ static inline void flush_cache_vmap(unsigned long start, unsigned long end) * the only place this can happen is in handle_exception() where * an sfence.vma is emitted. */ - for (i = 0; i < ARRAY_SIZE(new_vmalloc); ++i) - new_vmalloc[i] = -1ULL; + for (i = 0; i < ARRAY_SIZE(new_valid_map_cpus); ++i) + new_valid_map_cpus[i] = -1ULL; } } #define flush_cache_vmap_early(start, end) local_flush_tlb_kernel_range(start, end) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 60eb221296a6046..e57a0f550860605 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -20,44 +20,44 @@ .section .irqentry.text, "ax" -.macro new_vmalloc_check +.macro new_valid_map_cpus_check REG_S a0, TASK_TI_A0(tp) csrr a0, CSR_CAUSE /* Exclude IRQs */ - blt a0, zero, .Lnew_vmalloc_restore_context_a0 + blt a0, zero, .Lnew_valid_map_cpus_restore_context_a0 REG_S a1, TASK_TI_A1(tp) - /* Only check new_vmalloc if we are in page/protection fault */ + /* Only check new_valid_map_cpus if we are in page/protection fault */ li a1, EXC_LOAD_PAGE_FAULT - beq a0, a1, .Lnew_vmalloc_kernel_address + beq a0, a1, .Lnew_valid_map_cpus_kernel_address li a1, EXC_STORE_PAGE_FAULT - beq a0, a1, .Lnew_vmalloc_kernel_address + beq a0, a1, .Lnew_valid_map_cpus_kernel_address li a1, EXC_INST_PAGE_FAULT - bne a0, a1, .Lnew_vmalloc_restore_context_a1 + bne a0, a1, .Lnew_valid_map_cpus_restore_context_a1 -.Lnew_vmalloc_kernel_address: +.Lnew_valid_map_cpus_kernel_address: /* Is it a kernel address? */ csrr a0, CSR_TVAL - bge a0, zero, .Lnew_vmalloc_restore_context_a1 + bge a0, zero, .Lnew_valid_map_cpus_restore_context_a1 /* Check if a new vmalloc mapping appeared that could explain the trap */ REG_S a2, TASK_TI_A2(tp) /* * Computes: - * a0 = &new_vmalloc[BIT_WORD(cpu)] + * a0 = &new_valid_map_cpus[BIT_WORD(cpu)] * a1 = BIT_MASK(cpu) */ lw a2, TASK_TI_CPU(tp) /* - * Compute the new_vmalloc element position: + * Compute the new_valid_map_cpus element position: * (cpu / 64) * 8 = (cpu >> 6) << 3 */ srli a1, a2, 6 slli a1, a1, 3 - la a0, new_vmalloc + la a0, new_valid_map_cpus add a0, a0, a1 /* - * Compute the bit position in the new_vmalloc element: + * Compute the bit position in the new_valid_map_cpus element: * bit_pos = cpu % 64 = cpu - (cpu / 64) * 64 = cpu - (cpu >> 6) << 6 * = cpu - ((cpu >> 6) << 3) << 3 */ @@ -67,12 +67,12 @@ li a2, 1 sll a1, a2, a1 - /* Check the value of new_vmalloc for this cpu */ + /* Check the value of new_valid_map_cpus for this cpu */ REG_L a2, 0(a0) and a2, a2, a1 - beq a2, zero, .Lnew_vmalloc_restore_context + beq a2, zero, .Lnew_valid_map_cpus_restore_context - /* Atomically reset the current cpu bit in new_vmalloc */ + /* Atomically reset the current cpu bit in new_valid_map_cpus */ amoxor.d a0, a1, (a0) /* Only emit a sfence.vma if the uarch caches invalid entries */ @@ -84,11 +84,11 @@ csrw CSR_SCRATCH, x0 sret -.Lnew_vmalloc_restore_context: +.Lnew_valid_map_cpus_restore_context: REG_L a2, TASK_TI_A2(tp) -.Lnew_vmalloc_restore_context_a1: +.Lnew_valid_map_cpus_restore_context_a1: REG_L a1, TASK_TI_A1(tp) -.Lnew_vmalloc_restore_context_a0: +.Lnew_valid_map_cpus_restore_context_a0: REG_L a0, TASK_TI_A0(tp) .endm @@ -144,7 +144,7 @@ SYM_CODE_START(handle_exception) * could "miss" the new mapping and traps: in that case, we only need * to retry the access, no sfence.vma is required. */ - new_vmalloc_check + new_valid_map_cpus_check #endif REG_S sp, TASK_TI_KERNEL_SP(tp) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 257df6bd258fa61..f306e8688e37b43 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -37,7 +37,7 @@ #include "../kernel/head.h" -u64 new_vmalloc[NR_CPUS / sizeof(u64) + 1]; +u64 new_valid_map_cpus[NR_CPUS / sizeof(u64) + 1]; struct kernel_mapping kernel_map __ro_after_init; EXPORT_SYMBOL(kernel_map); From d6247d2494ba42ce08a9b8f6ef7ee4e671a87793 Mon Sep 17 00:00:00 2001 From: Vivian Wang Date: Mon, 2 Mar 2026 10:21:31 +0800 Subject: [PATCH 2/3] riscv: mm: Extract helper mark_new_valid_map() In preparation of a future patch using the same mechanism for non-vmalloc addresses, extract the mark_new_valid_map() helper from flush_cache_vmap(). No functional change intended. Signed-off-by: Vivian Wang Signed-off-by: Linux RISC-V bot --- arch/riscv/include/asm/cacheflush.h | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index b6d1a5eb7564edb..8c7a0ef2635adf1 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -43,20 +43,23 @@ do { \ #ifdef CONFIG_64BIT extern u64 new_valid_map_cpus[NR_CPUS / sizeof(u64) + 1]; extern char _end[]; +static inline void mark_new_valid_map(void) +{ + int i; + + /* + * We don't care if concurrently a cpu resets this value since + * the only place this can happen is in handle_exception() where + * an sfence.vma is emitted. + */ + for (i = 0; i < ARRAY_SIZE(new_valid_map_cpus); ++i) + new_valid_map_cpus[i] = -1ULL; +} #define flush_cache_vmap flush_cache_vmap static inline void flush_cache_vmap(unsigned long start, unsigned long end) { - if (is_vmalloc_or_module_addr((void *)start)) { - int i; - - /* - * We don't care if concurrently a cpu resets this value since - * the only place this can happen is in handle_exception() where - * an sfence.vma is emitted. - */ - for (i = 0; i < ARRAY_SIZE(new_valid_map_cpus); ++i) - new_valid_map_cpus[i] = -1ULL; - } + if (is_vmalloc_or_module_addr((void *)start)) + mark_new_valid_map(); } #define flush_cache_vmap_early(start, end) local_flush_tlb_kernel_range(start, end) #endif From b74ba04bc4f328bc9a2e86f0da7375ad584dc12e Mon Sep 17 00:00:00 2001 From: Vivian Wang Date: Mon, 2 Mar 2026 10:21:32 +0800 Subject: [PATCH 3/3] riscv: kfence: Call mark_new_valid_map() for kfence_unprotect() In kfence_protect_page(), which kfence_unprotect() calls, we cannot send IPIs to other CPUs to ask them to flush TLB. This may lead to those CPUs spuriously faulting on a recently allocated kfence object despite it being valid, leading to false positive use-after-free reports. Fix this by calling mark_new_valid_map() so that the page fault handling code path notices the spurious fault and flushes TLB then retries the access. Update the comment in handle_exception to indicate that new_valid_map_cpus_check also handles kfence_unprotect() spurious faults. Note that kfence_protect() has the same stale TLB entries problem, but that leads to false negatives, which is fine with kfence. Cc: Reported-by: Yanko Kaneti Fixes: b3431a8bb336 ("riscv: Fix IPIs usage in kfence_protect_page()") Signed-off-by: Vivian Wang Signed-off-by: Linux RISC-V bot --- arch/riscv/include/asm/kfence.h | 7 +++++-- arch/riscv/kernel/entry.S | 6 ++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/riscv/include/asm/kfence.h b/arch/riscv/include/asm/kfence.h index d08bf7fb3aee610..29cb3a6ee113dde 100644 --- a/arch/riscv/include/asm/kfence.h +++ b/arch/riscv/include/asm/kfence.h @@ -6,6 +6,7 @@ #include #include #include +#include #include static inline bool arch_kfence_init_pool(void) @@ -17,10 +18,12 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect) { pte_t *pte = virt_to_kpte(addr); - if (protect) + if (protect) { set_pte(pte, __pte(pte_val(ptep_get(pte)) & ~_PAGE_PRESENT)); - else + } else { set_pte(pte, __pte(pte_val(ptep_get(pte)) | _PAGE_PRESENT)); + mark_new_valid_map(); + } preempt_disable(); local_flush_tlb_kernel_range(addr, addr + PAGE_SIZE); diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index e57a0f550860605..9c6acfd091416fe 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -136,8 +136,10 @@ SYM_CODE_START(handle_exception) #ifdef CONFIG_64BIT /* - * The RISC-V kernel does not eagerly emit a sfence.vma after each - * new vmalloc mapping, which may result in exceptions: + * The RISC-V kernel does not flush TLBs on all CPUS after each new + * vmalloc mapping or kfence_unprotect(), which may result in + * exceptions: + * * - if the uarch caches invalid entries, the new mapping would not be * observed by the page table walker and an invalidation is needed. * - if the uarch does not cache invalid entries, a reordered access