From 62b7c59ce9ded8d0ce9259c81bbeb716c05dcacb Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 1 Jun 2026 17:47:43 +0800 Subject: [PATCH 01/23] riscv: kexec_file: Fix crashk_low_res not exclude bug As done in commit 944a45abfabc ("arm64: kdump: Reimplement crashkernel=X") and commit 4831be702b95 ("arm64/kexec: Fix missing extra range for crashkres_low.") for arm64, while implementing crashkernel=X,[high,low], riscv should have excluded the "crashk_low_res" reserved ranges from the crash kernel memory to prevent them from being exported through /proc/vmcore, and the exclusion would need an extra crash_mem range. Just simply tested on qemu with crashkernel=4G with kexec in [1] mentioned in [2]. And the second kernel can be started normally. # dmesg | grep crash [ 0.000000] crashkernel low memory reserved: 0xf8000000 - 0x100000000 (128 MB) [ 0.000000] crashkernel reserved: 0x000000017fe00000 - 0x000000027fe00000 (4096 MB) Cc: Guo Ren Cc: Baoquan He [1]: https://github.com/chenjh005/kexec-tools/tree/build-test-riscv-v2 [2]: https://lore.kernel.org/all/20230726175000.2536220-1-chenjiahao16@huawei.com/ Fixes: 5882e5acf18d ("riscv: kdump: Implement crashkernel=X,[high,low]") Reviewed-by: Guo Ren Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- arch/riscv/kernel/machine_kexec_file.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/riscv/kernel/machine_kexec_file.c b/arch/riscv/kernel/machine_kexec_file.c index 59d4bbc848a896..fa2946aa9b8f40 100644 --- a/arch/riscv/kernel/machine_kexec_file.c +++ b/arch/riscv/kernel/machine_kexec_file.c @@ -62,7 +62,7 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) unsigned int nr_ranges; int ret; - nr_ranges = 1; /* For exclusion of crashkernel region */ + nr_ranges = 2; /* For exclusion of crashkernel region */ walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); cmem = kmalloc_flex(*cmem, ranges, nr_ranges); @@ -77,8 +77,16 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) /* Exclude crashkernel region */ ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); - if (!ret) - ret = crash_prepare_elf64_headers(cmem, true, addr, sz); + if (ret) + goto out; + + if (crashk_low_res.end) { + ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); + if (ret) + goto out; + } + + ret = crash_prepare_elf64_headers(cmem, true, addr, sz); out: kfree(cmem); From 6fd19857ef31522b0f7836f6c7bb063205c9a351 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 1 Jun 2026 17:47:44 +0800 Subject: [PATCH 02/23] powerpc/crash: Fix possible memory leak in update_crash_elfcorehdr() In get_crash_memory_ranges(), if crash_exclude_mem_range() failed after realloc_mem_ranges() has successfully allocated the cmem memory, it just returns an error but leaves cmem pointing to the allocated memory, nor is it freed in the caller update_crash_elfcorehdr(), which cause a memory leak, goto out to free the cmem. Cc: Sourabh Jain Cc: Hari Bathini Cc: Michael Ellerman Fixes: 849599b702ef ("powerpc/crash: add crash memory hotplug support") Reviewed-by: Sourabh Jain Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- arch/powerpc/kexec/crash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c index e6539f213b3d14..a520f851c3a6bb 100644 --- a/arch/powerpc/kexec/crash.c +++ b/arch/powerpc/kexec/crash.c @@ -502,7 +502,7 @@ static void update_crash_elfcorehdr(struct kimage *image, struct memory_notify * ret = get_crash_memory_ranges(&cmem); if (ret) { pr_err("Failed to get crash mem range\n"); - return; + goto out; } /* From 36930f60457014164782658a1bd26f4e1b399333 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 1 Jun 2026 17:47:45 +0800 Subject: [PATCH 03/23] powerpc/kexec_file: Fix NULL pointer dereference in kexec_extra_fdt_size_ppc64() A static Sashiko AI review identified a potential NULL pointer dereference in kexec_extra_fdt_size_ppc64(). When get_reserved_memory_ranges() successfully returns 0 on platforms without any reserved memory regions, the allocated 'rmem' pointer remains NULL. Passing this unallocated pointer directly to kexec_extra_fdt_size_ppc64() leads to a kernel panic when evaluating 'rmem->nr_ranges'. Fix this by adding a defensive NULL pointer check at the beginning of kexec_extra_fdt_size_ppc64(), returning 0 extra space immediately if no reserved memory structure exists. Cc: Sourabh Jain Cc: Hari Bathini Cc: Michael Ellerman Cc: stable@vger.kernel.org Fixes: 0d3ff067331e ("powerpc/kexec_file: fix extra size calculation for kexec FDT") Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- arch/powerpc/kexec/file_load_64.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index 8c72e12ea44e5a..fdeedf102c38e3 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -649,6 +649,9 @@ unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image, struct crash_mem * struct device_node *dn; unsigned int cpu_nodes = 0, extra_size = 0; + if (!rmem) + return 0; + // Budget some space for the password blob. There's already extra space // for the key name if (plpks_is_available()) From 0d5cdf380cb27199a244afaf2e212af6acb896d8 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 1 Jun 2026 17:47:46 +0800 Subject: [PATCH 04/23] powerpc/kexec_file: Fix memory range truncation in __merge_memory_ranges() Sashiko AI review pointed out the following issue. The __merge_memory_ranges() function incorrectly handles overlapping memory ranges when merging them. Although sort_memory_ranges() sorts all ranges by their start address in ascending order beforehand, the merge logic remains defective in two ways: 1. It compares the current range's start against the previous element (i-1) instead of the running target index (idx) 2. It unconditionally overwrites 'ranges[idx].end' with 'ranges[i].end'. This logic flaw leads to critical memory truncation when a larger memory range completely subsumes subsequent smaller ranges. For example, consider a sorted input array with three ranges: Range A (idx=0): [0x1000 - 0x9000] Range B (i=1): [0x2000 - 0x5000] (completely inside Range A) Range C (i=2): [0x6000 - 0x8000] (completely inside Range A) 1. When i=1 (Range B): ranges[1].start (0x2000) <= ranges[0].end + 1 (0x9001) is TRUE. The code executes: ranges[0].end = ranges[1].end, which erroneously shrinks Range A's end from 0x9000 down to 0x5000. 2. When i=2 (Range C): ranges[2].start (0x6000) <= ranges[1].end + 1 (0x5001) is FALSE. The code falls into the else block, creating a broken new range. As a result, valid memory fragments [0x5001 - 0x5fff] and [0x8001 - 0x9000] are completely lost from the kexec exclude lists, potentially allowing the crash kernel to overwrite active memory, causing data corruption or crashes. Fix this by ensuring the start of the current range is compared against the end of the active merged range (idx), and use max() to safely prevent the outer boundary from being truncated. Cc: Sourabh Jain Cc: Hari Bathini Cc: Michael Ellerman Cc: stable@vger.kernel.org Fixes: 180adfc532a8 ("powerpc/kexec_file: Add helper functions for getting memory ranges") Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- arch/powerpc/kexec/ranges.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c index 867135560e5c8b..eb45e89502caee 100644 --- a/arch/powerpc/kexec/ranges.c +++ b/arch/powerpc/kexec/ranges.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -105,19 +106,16 @@ static void __merge_memory_ranges(struct crash_mem *mem_rngs) struct range *ranges; int i, idx; - if (!mem_rngs) + if (!mem_rngs || mem_rngs->nr_ranges <= 1) return; idx = 0; - ranges = &(mem_rngs->ranges[0]); + ranges = mem_rngs->ranges; for (i = 1; i < mem_rngs->nr_ranges; i++) { - if (ranges[i].start <= (ranges[i-1].end + 1)) - ranges[idx].end = ranges[i].end; + if (ranges[i].start <= (ranges[idx].end + 1)) + ranges[idx].end = max(ranges[idx].end, ranges[i].end); else { idx++; - if (i == idx) - continue; - ranges[idx] = ranges[i]; } } From b3714a31e9e42c8a3bb702b18953739663ef696b Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Mon, 1 Jun 2026 17:47:47 +0800 Subject: [PATCH 05/23] powerpc/crash: sort crash memory ranges before preparing elfcorehdr During a memory hot-remove event, the elfcorehdr is rebuilt to exclude the removed memory. While updating the crash memory ranges for this operation, the crash memory ranges array can become unsorted. This happens because remove_mem_range() may split a memory range into two parts and append the higher-address part as a separate range at the end of the array. So far, no issues have been observed due to the unsorted crash memory ranges. However, this could lead to problems once crash memory range removal is handled by generic code, as introduced in the upcoming patches in this series. Currently, powerpc uses a platform-specific function, remove_mem_range(), to exclude hot-removed memory from the crash memory ranges. This function performs the same task as the generic crash_exclude_mem_range() in crash_core.c. The generic helper also ensures that the crash memory ranges remain sorted. So remove the redundant powerpc-specific implementation and instead call crash_exclude_mem_range_guarded() (which internally calls crash_exclude_mem_range()) to exclude the hot-removed memory ranges. Cc: Andrew Morton Cc: Baoquan he Cc: Jinjie Ruan Cc: Hari Bathini Cc: Madhavan Srinivasan Cc: Mahesh Salgaonkar Cc: Michael Ellerman Cc: Ritesh Harjani (IBM) Cc: Shivang Upadhyay Cc: linux-kernel@vger.kernel.org Acked-by: Baoquan He Reviewed-by: Ritesh Harjani (IBM) Acked-by: Mike Rapoport (Microsoft) Signed-off-by: Sourabh Jain Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- arch/powerpc/include/asm/kexec_ranges.h | 4 +- arch/powerpc/kexec/crash.c | 5 +- arch/powerpc/kexec/ranges.c | 87 +------------------------ 3 files changed, 7 insertions(+), 89 deletions(-) diff --git a/arch/powerpc/include/asm/kexec_ranges.h b/arch/powerpc/include/asm/kexec_ranges.h index 14055896cbcbcd..ad95e3792d10cc 100644 --- a/arch/powerpc/include/asm/kexec_ranges.h +++ b/arch/powerpc/include/asm/kexec_ranges.h @@ -7,7 +7,9 @@ void sort_memory_ranges(struct crash_mem *mrngs, bool merge); struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges); int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size); -int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size); +int crash_exclude_mem_range_guarded(struct crash_mem **mem_ranges, + unsigned long long mstart, + unsigned long long mend); int get_exclude_memory_ranges(struct crash_mem **mem_ranges); int get_reserved_memory_ranges(struct crash_mem **mem_ranges); int get_crash_memory_ranges(struct crash_mem **mem_ranges); diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c index a520f851c3a6bb..d634db67becc6e 100644 --- a/arch/powerpc/kexec/crash.c +++ b/arch/powerpc/kexec/crash.c @@ -493,7 +493,7 @@ static void update_crash_elfcorehdr(struct kimage *image, struct memory_notify * struct crash_mem *cmem = NULL; struct kexec_segment *ksegment; void *ptr, *mem, *elfbuf = NULL; - unsigned long elfsz, memsz, base_addr, size; + unsigned long elfsz, memsz, base_addr, size, end; ksegment = &image->segment[image->elfcorehdr_index]; mem = (void *) ksegment->mem; @@ -512,7 +512,8 @@ static void update_crash_elfcorehdr(struct kimage *image, struct memory_notify * if (image->hp_action == KEXEC_CRASH_HP_REMOVE_MEMORY) { base_addr = PFN_PHYS(mn->start_pfn); size = mn->nr_pages * PAGE_SIZE; - ret = remove_mem_range(&cmem, base_addr, size); + end = base_addr + size - 1; + ret = crash_exclude_mem_range_guarded(&cmem, base_addr, end); if (ret) { pr_err("Failed to remove hot-unplugged memory from crash memory ranges\n"); goto out; diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c index eb45e89502caee..b2fb78562cdc3b 100644 --- a/arch/powerpc/kexec/ranges.c +++ b/arch/powerpc/kexec/ranges.c @@ -551,7 +551,7 @@ int get_usable_memory_ranges(struct crash_mem **mem_ranges) #endif /* CONFIG_KEXEC_FILE */ #ifdef CONFIG_CRASH_DUMP -static int crash_exclude_mem_range_guarded(struct crash_mem **mem_ranges, +int crash_exclude_mem_range_guarded(struct crash_mem **mem_ranges, unsigned long long mstart, unsigned long long mend) { @@ -639,89 +639,4 @@ int get_crash_memory_ranges(struct crash_mem **mem_ranges) pr_err("Failed to setup crash memory ranges\n"); return ret; } - -/** - * remove_mem_range - Removes the given memory range from the range list. - * @mem_ranges: Range list to remove the memory range to. - * @base: Base address of the range to remove. - * @size: Size of the memory range to remove. - * - * (Re)allocates memory, if needed. - * - * Returns 0 on success, negative errno on error. - */ -int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size) -{ - u64 end; - int ret = 0; - unsigned int i; - u64 mstart, mend; - struct crash_mem *mem_rngs = *mem_ranges; - - if (!size) - return 0; - - /* - * Memory range are stored as start and end address, use - * the same format to do remove operation. - */ - end = base + size - 1; - - for (i = 0; i < mem_rngs->nr_ranges; i++) { - mstart = mem_rngs->ranges[i].start; - mend = mem_rngs->ranges[i].end; - - /* - * Memory range to remove is not part of this range entry - * in the memory range list - */ - if (!(base >= mstart && end <= mend)) - continue; - - /* - * Memory range to remove is equivalent to this entry in the - * memory range list. Remove the range entry from the list. - */ - if (base == mstart && end == mend) { - for (; i < mem_rngs->nr_ranges - 1; i++) { - mem_rngs->ranges[i].start = mem_rngs->ranges[i+1].start; - mem_rngs->ranges[i].end = mem_rngs->ranges[i+1].end; - } - mem_rngs->nr_ranges--; - goto out; - } - /* - * Start address of the memory range to remove and the - * current memory range entry in the list is same. Just - * move the start address of the current memory range - * entry in the list to end + 1. - */ - else if (base == mstart) { - mem_rngs->ranges[i].start = end + 1; - goto out; - } - /* - * End address of the memory range to remove and the - * current memory range entry in the list is same. - * Just move the end address of the current memory - * range entry in the list to base - 1. - */ - else if (end == mend) { - mem_rngs->ranges[i].end = base - 1; - goto out; - } - /* - * Memory range to remove is not at the edge of current - * memory range entry. Split the current memory entry into - * two half. - */ - else { - size = mem_rngs->ranges[i].end - end + 1; - mem_rngs->ranges[i].end = base - 1; - ret = add_mem_range(mem_ranges, end + 1, size); - } - } -out: - return ret; -} #endif /* CONFIG_CRASH_DUMP */ From 8d19495fb2d48741a9553f9a30976e88af247047 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 1 Jun 2026 17:47:48 +0800 Subject: [PATCH 06/23] kexec: Extract kexec_free_segment_cma() from kimage_free_cma() The generic kimage_free_cma() relies on `image->nr_segments` to iterate and free allocated CMA pages. However, during architecture-specific segment placement retry loops (e.g., arm64's image_load()), a mid-way failure will truncate `image->nr_segments` back to its initial value. This truncation permanently hides any CMA pages allocated outside the new boundary from global cleanup, causing silent background memory leaks. To allow architecture-specific loaders to execute fine-grained memory reclamation before truncation occurs, extract the single-pass CMA release logic into a dedicated and exported helper: void kexec_free_segment_cma(struct kimage *image, unsigned long idx); Refactor the main kimage_free_cma() to invoke this helper sequentially to maintain backward compatibility while expanding single-slot flexibility. Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- include/linux/kexec.h | 2 ++ kernel/kexec_core.c | 25 ++++++++++++++----------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 8a22bc9b8c6c85..6f1eabda03006e 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -532,6 +532,7 @@ extern bool kexec_file_dbg_print; extern void *kimage_map_segment(struct kimage *image, int idx); extern void kimage_unmap_segment(void *buffer); +extern void kexec_free_segment_cma(struct kimage *image, unsigned long idx); #else /* !CONFIG_KEXEC_CORE */ struct pt_regs; struct task_struct; @@ -543,6 +544,7 @@ static inline int kexec_crash_loaded(void) { return 0; } static inline void *kimage_map_segment(struct kimage *image, int idx) { return NULL; } static inline void kimage_unmap_segment(void *buffer) { } +static inline void kexec_free_segment_cma(struct kimage *image, unsigned long idx) { } #define kexec_in_progress false #endif /* CONFIG_KEXEC_CORE */ diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index a43d2da0fe3e70..9195f81e53c48b 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -554,22 +554,25 @@ static void kimage_free_entry(kimage_entry_t entry) kimage_free_pages(page); } -static void kimage_free_cma(struct kimage *image) +void kexec_free_segment_cma(struct kimage *image, unsigned long idx) { - unsigned long i; + u32 nr_pages = image->segment[idx].memsz >> PAGE_SHIFT; + struct page *cma = image->segment_cma[idx]; - for (i = 0; i < image->nr_segments; i++) { - struct page *cma = image->segment_cma[i]; - u32 nr_pages = image->segment[i].memsz >> PAGE_SHIFT; + if (!cma) + return; - if (!cma) - continue; + arch_kexec_pre_free_pages(page_address(cma), nr_pages); + dma_release_from_contiguous(NULL, cma, nr_pages); + image->segment_cma[idx] = NULL; +} - arch_kexec_pre_free_pages(page_address(cma), nr_pages); - dma_release_from_contiguous(NULL, cma, nr_pages); - image->segment_cma[i] = NULL; - } +static void kimage_free_cma(struct kimage *image) +{ + unsigned long i; + for (i = 0; i < image->nr_segments; i++) + kexec_free_segment_cma(image, i); } void kimage_free(struct kimage *image) From 58c6241d2ef2ba46a55473fe180c667d9f808d8b Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 1 Jun 2026 17:47:49 +0800 Subject: [PATCH 07/23] arm64: kexec_file: Fix CMA page leaks during segment placement retry loops Sashiko AI code review pointed out, during arm64 kexec image placement retry loops in image_load(), the loader repeatedly attempts to find a suitable memory hole for the kernel and its associated segments (initrd, dtb, etc.). When a placement attempt fails midway, the core framework rolls back `image->nr_segments` to its initial state to purge the failed segments logically. However, this truncation causes a severe background memory leak. Any CMA pages successfully allocated via kexec_add_buffer() during the failed attempt are recorded in the `image->segment_cma` array. Since the subsequent global kimage_free_cma() cleanup only iterates up to the truncated (smaller) `nr_segments` boundary, these allocated CMA pages outside the new boundary become completely orphaned and permanently leaked. Fix this by leverage the newly introduced generic kexec_free_segment_cma() helper to execute fine-grained memory reclamation before any truncation occurs: 1. In image_load(), explicitly invoke kexec_free_segment_cma() to release the CMA buffer allocated for the current failed kernel segment before decrementing `image->nr_segments`. 2. In the error path of load_other_segments(), iterate backward from the failed segment index down to `orig_segments`, sequentially freeing each orphan CMA segment allocation before restoring the initial segment count. This guarantees that all temporary CMA pages allocated during placement failures are cleanly returned to the contiguous memory allocator, eliminating silent background memory leaks across all retry paths. Cc: Catalin Marinas Cc: Will Deacon Cc: Breno Leitao Cc: Pratyush Yadav Cc: Andrew Morton Cc: Yeoreum Yun Cc: Kees Cook Cc: "Rob Herring (Arm)" Cc: Baoquan He Cc: Coiby Xu Cc: Alexander Graf Cc: Pasha Tatashin Cc: stable@vger.kernel.org Fixes: 07d24902977e4 ("kexec: enable CMA based contiguous allocation") Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- arch/arm64/kernel/kexec_image.c | 1 + arch/arm64/kernel/machine_kexec_file.c | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c index b70f4df15a1ae5..ffcb7f9075e625 100644 --- a/arch/arm64/kernel/kexec_image.c +++ b/arch/arm64/kernel/kexec_image.c @@ -107,6 +107,7 @@ static void *image_load(struct kimage *image, * We couldn't find space for the other segments; erase the * kernel segment and try the next available hole. */ + kexec_free_segment_cma(image, kernel_segment_number); image->nr_segments -= 1; kbuf.buf_min = kernel_segment->mem + kernel_segment->memsz; kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index e31fabed378a59..13c247c2886602 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -195,7 +195,10 @@ int load_other_segments(struct kimage *image, return 0; out_err: - image->nr_segments = orig_segments; + while (image->nr_segments > orig_segments) { + kexec_free_segment_cma(image, image->nr_segments - 1); + image->nr_segments--; + } kvfree(dtb); return ret; } From 1173243d5741262c7a766fafc18f3719cdc4d1b3 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 1 Jun 2026 17:47:50 +0800 Subject: [PATCH 08/23] arm64: kexec_file: Fix image->elf_headers memory leak during retry loop Sashiko AI code review pointed out a potential memory leak of image->elf_headers when load_other_segments() fails on error paths. In the arm64 kexec_file file-load path, kexec_image.c runs a retry loop calling kexec_add_buffer() to find a suitable location for the kernel segment. On each iteration, load_other_segments() is invoked to allocate and populate alternative segments such as initrd, DTB, and ELF headers. However, if a placement or allocation failure occurs later in load_other_segments() (e.g., when adding initrd or dtb), the execution jumps to the out_err label. While this path restores image->nr_segments via orig_segments, it returns an error back to the caller without freeing the previously allocated image->elf_headers vmalloc buffer. As a result, the retry loop in image_load() unconditionally allocates new ELF headers on the next iteration and overwrites image->elf_headers, permanently leaking the memory blocks allocated in previous iterations. To fix this, decouple the ELF header allocation from the target-seeking retry loop. Since the contents and size of ELF headers only depend on the host memory layout and do not change with the kernel's physical placement, move prepare_elf_headers() completely outside and prior to the while retry loop in image_load(). And if kexec_add_buffer() for elf headers fails, not need to vfree headers, because the err path will vfree `image->elf_headers` by calling arch_kimage_file_post_load_cleanup(). This optimization eliminates redundant memory allocation/deallocation overhead during kexec placement retries and eradicates the Use-After-Free and memory leak risk. Concurrently, remove the prepare_elf_headers() call from inside load_other_segments() and have it directly reuse the single, pre-allocated image->elf_headers. Cc: Catalin Marinas Cc: Will Deacon Cc: Thomas Huth Cc: Breno Leitao Cc: Andrew Morton Cc: Yeoreum Yun Cc: Coiby Xu Cc: Baoquan He Cc: Kees Cook Cc: Benjamin Gwin Cc: stable@vger.kernel.org Fixes: 108aa503657e ("arm64: kexec_file: try more regions if loading segments fails") Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- arch/arm64/include/asm/kexec.h | 1 + arch/arm64/kernel/kexec_image.c | 16 ++++++++++++++++ arch/arm64/kernel/machine_kexec_file.c | 23 +++++------------------ 3 files changed, 22 insertions(+), 18 deletions(-) diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index 892e5bebda957b..7ffa2ff5fcfd4e 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -128,6 +128,7 @@ extern int load_other_segments(struct kimage *image, unsigned long kernel_load_addr, unsigned long kernel_size, char *initrd, unsigned long initrd_len, char *cmdline); +extern int prepare_elf_headers(void **addr, unsigned long *sz); #endif #endif /* __ASSEMBLER__ */ diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c index ffcb7f9075e625..424b9527db0908 100644 --- a/arch/arm64/kernel/kexec_image.c +++ b/arch/arm64/kernel/kexec_image.c @@ -89,6 +89,22 @@ static void *image_load(struct kimage *image, kernel_segment_number = image->nr_segments; +#ifdef CONFIG_CRASH_DUMP + if (image->type == KEXEC_TYPE_CRASH) { + /* load elf core header */ + unsigned long headers_sz; + void *headers; + + ret = prepare_elf_headers(&headers, &headers_sz); + if (ret) { + pr_err("Preparing elf core header failed\n"); + return ERR_PTR(ret); + } + image->elf_headers = headers; + image->elf_headers_sz = headers_sz; + } +#endif + /* * The location of the kernel segment may make it impossible to satisfy * the other segment requirements, so we try repeatedly to find a diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index 13c247c2886602..4cbb71e1f8ed56 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -40,7 +40,7 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image) } #ifdef CONFIG_CRASH_DUMP -static int prepare_elf_headers(void **addr, unsigned long *sz) +int prepare_elf_headers(void **addr, unsigned long *sz) { struct crash_mem *cmem; unsigned int nr_ranges; @@ -105,32 +105,19 @@ int load_other_segments(struct kimage *image, kbuf.buf_min = kernel_load_addr + kernel_size; #ifdef CONFIG_CRASH_DUMP - /* load elf core header */ - void *headers; - unsigned long headers_sz; if (image->type == KEXEC_TYPE_CRASH) { - ret = prepare_elf_headers(&headers, &headers_sz); - if (ret) { - pr_err("Preparing elf core header failed\n"); - goto out_err; - } - - kbuf.buffer = headers; - kbuf.bufsz = headers_sz; + kbuf.buffer = image->elf_headers; + kbuf.bufsz = image->elf_headers_sz; kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; - kbuf.memsz = headers_sz; + kbuf.memsz = image->elf_headers_sz; kbuf.buf_align = SZ_64K; /* largest supported page size */ kbuf.buf_max = ULONG_MAX; kbuf.top_down = true; ret = kexec_add_buffer(&kbuf); - if (ret) { - vfree(headers); + if (ret) goto out_err; - } - image->elf_headers = headers; image->elf_load_addr = kbuf.mem; - image->elf_headers_sz = headers_sz; kexec_dprintk("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n", image->elf_load_addr, kbuf.bufsz, kbuf.memsz); From 8bab94533729ad77f9862d3c485ac57cf5782206 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 1 Jun 2026 17:47:51 +0800 Subject: [PATCH 09/23] kexec: Fix UAF and Double Free in crash_load_dm_crypt_keys() A static memory safety review by Sashiko AI identified a high-severity Use-After-Free (UAF) and Double Free vulnerability in the dm-crypt keys handling path during arm64 kexec image placement retry loops. In crash_load_dm_crypt_keys(), when the segment allocation fails via kexec_add_buffer(), the error path invokes `kvfree((void *)kbuf.buffer)` to reclaim the keys buffer. However, the global pointer `keys_header` is left dangling with a stale address, creating an insecure memory trap. When the top-level loader image_load() retries the next available placement hole, crash_load_dm_crypt_keys() is re-entered. Since `is_dm_key_reused` is a read-only global configuration managed by user-space configfs, it cannot be mutated by the kernel. If it remains true, the loader skips build_keys_header() and blindly reuses the stale `keys_header` pointer for kbuf.buffer, triggering a severe Use-After-Free or a Null pointer dereference during kexec_add_buffer(). Alternatively, a new headers build can trigger a recursive Double Free inside build_keys_header(). Fix this by setting the global `keys_header` to NULL immediately after it is freed in the failure path. Concurrently, upgrade the header regeneration check to a composite condition: `if (!is_dm_key_reused || !keys_header)` This ensures that if a previous retry attempt wiped the buffer, the kernel will automatically and safely trigger a fresh header regeneration internally without modifying the user-configured `is_dm_key_reused` state flag, achieving absolute data consistency and memory safety across all retry paths. Cc: Andrew Morton Cc: Baoquan He Cc: Mike Rapoport Cc: Pasha Tatashin Cc: Pratyush Yadav Cc: Dave Young Cc: stable@vger.kernel.org Fixes: e3a84be1ec2f ("arm64,ppc64le/kdump: pass dm-crypt keys to kdump kernel") Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- kernel/crash_dump_dm_crypt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/crash_dump_dm_crypt.c b/kernel/crash_dump_dm_crypt.c index cb875ddb6ba68b..2c54628763377a 100644 --- a/kernel/crash_dump_dm_crypt.c +++ b/kernel/crash_dump_dm_crypt.c @@ -412,13 +412,12 @@ int crash_load_dm_crypt_keys(struct kimage *image) }; int r; - if (key_count <= 0) { kexec_dprintk("No dm-crypt keys\n"); return 0; } - if (!is_dm_key_reused) { + if (!is_dm_key_reused || unlikely(!keys_header)) { image->dm_crypt_keys_addr = 0; r = build_keys_header(); if (r) { @@ -437,6 +436,7 @@ int crash_load_dm_crypt_keys(struct kimage *image) if (r) { pr_err("Failed to call kexec_add_buffer, ret=%d\n", r); kvfree((void *)kbuf.buffer); + keys_header = NULL; return r; } image->dm_crypt_keys_addr = kbuf.mem; From 00305c5bf52fd39407860bc10e7c93c8882037bf Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 1 Jun 2026 17:47:52 +0800 Subject: [PATCH 10/23] crash_core: Introduce CRASH_HOTPLUG_SAFETY_PADDING for memory hotplug safety Introduce CRASH_HOTPLUG_SAFETY_PADDING to allocate extra slots for the crash memory ranges array, mitigating potential TOCTOU races caused by concurrent memory hotplug events. When CONFIG_MEMORY_HOTPLUG is disabled, the padding safely defaults to 0 as the memory layout remains static. Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- include/linux/crash_core.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index c1dee3f971a918..d4762e000098be 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -14,6 +14,12 @@ struct crash_mem { struct range ranges[] __counted_by(max_nr_ranges); }; +#ifdef CONFIG_MEMORY_HOTPLUG +#define CRASH_HOTPLUG_SAFETY_PADDING 128 +#else +#define CRASH_HOTPLUG_SAFETY_PADDING 0 +#endif + #ifdef CONFIG_CRASH_DUMP int crash_shrink_memory(unsigned long new_size); From 42ea56d1cecab039eeacd933609229119f4d38e5 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 1 Jun 2026 17:47:53 +0800 Subject: [PATCH 11/23] x86: kexec_file: Fix TOCTOU buffer overflow via memory region padding Sashiko AI code review pointed out there is a TOCTOU (Time-of-Check to Time-of-Use) race condition in prepare_elf_headers() between the initial pass that counts System RAM ranges and the second pass that populates them. If a memory hotplug event occurs between these two steps, the number of memory regions may increase, causing an out-of-bounds write to the cmem->ranges[] array. Fix this fundamentally by using `CRASH_HOTPLUG_SAFETY_PADDING`(128 slots) to expand the flexible array allocation ceiling upfront. This safely absorbs any concurrent memory region expansion. Concurrently, add a defensive boundary check inside the callback to return -EAGAIN on unexpected overrun, fully eradicating the overflow window and ensuring system stability. Cc: AKASHI Takahiro Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Andrew Morton Cc: Baoquan He Cc: Mike Rapoport Cc: stable@vger.kernel.org Fixes: 8d5f894a3108 ("x86: kexec_file: lift CRASH_MAX_RANGES limit on crash_mem buffer") Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- arch/x86/kernel/crash.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index cd796818d94d9e..a1089907728da9 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -177,7 +177,7 @@ static struct crash_mem *fill_up_crash_elf_data(void) * But in order to lest the low 1M could be changed in the future, * (e.g. [start, 1M]), add a extra slot. */ - nr_ranges += 3 + crashk_cma_cnt; + nr_ranges += 3 + crashk_cma_cnt + CRASH_HOTPLUG_SAFETY_PADDING; cmem = vzalloc(struct_size(cmem, ranges, nr_ranges)); if (!cmem) return NULL; @@ -226,6 +226,9 @@ static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) { struct crash_mem *cmem = arg; + if (unlikely(cmem->nr_ranges >= cmem->max_nr_ranges)) + return -EAGAIN; + cmem->ranges[cmem->nr_ranges].start = res->start; cmem->ranges[cmem->nr_ranges].end = res->end; cmem->nr_ranges++; From 756f0e14f43afe7c446acab75badc9249058b107 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 1 Jun 2026 17:47:54 +0800 Subject: [PATCH 12/23] arm64: kexec_file: Fix TOCTOU buffer overflow via memory region padding Sashiko AI code review pointed out there is a TOCTOU (Time-of-Check to Time-of-Use) race condition in prepare_elf_headers() between the initial pass that counts System RAM ranges and the second pass that populates them. If a memory hotplug event occurs between these two steps, the number of memory regions may increase, causing an out-of-bounds write to the cmem->ranges[] array. Fix this fundamentally by using `CRASH_HOTPLUG_SAFETY_PADDING` (128 slots) to expand the flexible array allocation ceiling upfront. This safely absorbs any concurrent memory region expansion. Concurrently, add a defensive boundary check to return -EAGAIN on unexpected overrun, fully eradicating the overflow window and ensuring system stability. Cc: Catalin Marinas Cc: Will Deacon Cc: Andrew Morton Cc: Baoquan He Cc: Breno Leitao Cc: stable@vger.kernel.org Fixes: 3751e728cef2 ("arm64: kexec_file: add crash dump support") Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- arch/arm64/kernel/machine_kexec_file.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index 4cbb71e1f8ed56..8a96fb68b88d6e 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -48,7 +48,8 @@ int prepare_elf_headers(void **addr, unsigned long *sz) u64 i; phys_addr_t start, end; - nr_ranges = 2; /* for exclusion of crashkernel region */ + /* for exclusion of crashkernel region */ + nr_ranges = 2 + CRASH_HOTPLUG_SAFETY_PADDING; for_each_mem_range(i, &start, &end) nr_ranges++; @@ -59,6 +60,11 @@ int prepare_elf_headers(void **addr, unsigned long *sz) cmem->max_nr_ranges = nr_ranges; cmem->nr_ranges = 0; for_each_mem_range(i, &start, &end) { + if (unlikely(cmem->nr_ranges >= cmem->max_nr_ranges)) { + ret = -EAGAIN; + goto out; + } + cmem->ranges[cmem->nr_ranges].start = start; cmem->ranges[cmem->nr_ranges].end = end - 1; cmem->nr_ranges++; From b71a7cb19a71542e419e5b5d9b3ab6e6d40b0f6c Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 1 Jun 2026 17:47:55 +0800 Subject: [PATCH 13/23] riscv: kexec_file: Fix TOCTOU buffer overflow via memory region padding Sashiko AI code review pointed out there is a TOCTOU (Time-of-Check to Time-of-Use) race condition in prepare_elf_headers() between the initial pass that counts System RAM ranges and the second pass that populates them. If a memory hotplug event occurs between these two steps, the number of memory regions may increase, causing an out-of-bounds write to the cmem->ranges[] array. Fix this fundamentally by using `CRASH_HOTPLUG_SAFETY_PADDING` (128 slots) to expand the flexible array allocation ceiling upfront. This safely absorbs any concurrent memory region expansion. Concurrently, add a defensive boundary check inside the callback to return -EAGAIN on unexpected overrun, fully eradicating the overflow window and ensuring system stability. Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Alexandre Ghiti Cc: songshuaishuai@tinylab.org Cc: bjorn@rivosinc.com Cc: leitao@debian.org Fixes: 8acea455fafa ("RISC-V: Support for kexec_file on panic") Reviewed-by: Guo Ren Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- arch/riscv/kernel/machine_kexec_file.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kernel/machine_kexec_file.c b/arch/riscv/kernel/machine_kexec_file.c index fa2946aa9b8f40..c6dd36dd4218d9 100644 --- a/arch/riscv/kernel/machine_kexec_file.c +++ b/arch/riscv/kernel/machine_kexec_file.c @@ -49,6 +49,9 @@ static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) { struct crash_mem *cmem = arg; + if (unlikely(cmem->nr_ranges >= cmem->max_nr_ranges)) + return -EAGAIN; + cmem->ranges[cmem->nr_ranges].start = res->start; cmem->ranges[cmem->nr_ranges].end = res->end; cmem->nr_ranges++; @@ -62,7 +65,8 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) unsigned int nr_ranges; int ret; - nr_ranges = 2; /* For exclusion of crashkernel region */ + /* For exclusion of crashkernel region */ + nr_ranges = 2 + CRASH_HOTPLUG_SAFETY_PADDING; walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); cmem = kmalloc_flex(*cmem, ranges, nr_ranges); From ae2f832c27b135d9f3ac0a3f8845383145859213 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 1 Jun 2026 17:47:56 +0800 Subject: [PATCH 14/23] LoongArch: kexec_file: Fix TOCTOU buffer overflow via memory region padding Sashiko AI code review pointed out there is a TOCTOU (Time-of-Check to Time-of-Use) race condition in prepare_elf_headers() between the initial pass that counts System RAM ranges and the second pass that populates them. If a memory hotplug event occurs between these two steps, the number of memory regions may increase, causing an out-of-bounds write to the cmem->ranges[] array. Fix this fundamentally by using `CRASH_HOTPLUG_SAFETY_PADDING` (128 slots) to expand the flexible array allocation ceiling upfront. This safely absorbs any concurrent memory region expansion. Concurrently, add a defensive boundary check to return -EAGAIN on unexpected overrun, fully eradicating the overflow window and ensuring system stability. Cc: Youling Tang Cc: Huacai Chen Cc: WANG Xuerui Cc: stable@vger.kernel.org Fixes: 1bcca8620a91 ("LoongArch: Add crash dump support for kexec_file") Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- arch/loongarch/kernel/machine_kexec_file.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/machine_kexec_file.c b/arch/loongarch/kernel/machine_kexec_file.c index 5584b798ba4645..3c369124586e13 100644 --- a/arch/loongarch/kernel/machine_kexec_file.c +++ b/arch/loongarch/kernel/machine_kexec_file.c @@ -64,7 +64,8 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) phys_addr_t start, end; struct crash_mem *cmem; - nr_ranges = 2; /* for exclusion of crashkernel region */ + /* for exclusion of crashkernel region */ + nr_ranges = 2 + CRASH_HOTPLUG_SAFETY_PADDING; for_each_mem_range(i, &start, &end) nr_ranges++; @@ -75,6 +76,11 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) cmem->max_nr_ranges = nr_ranges; cmem->nr_ranges = 0; for_each_mem_range(i, &start, &end) { + if (unlikely(cmem->nr_ranges >= cmem->max_nr_ranges)) { + ret = -EAGAIN; + goto out; + } + cmem->ranges[cmem->nr_ranges].start = start; cmem->ranges[cmem->nr_ranges].end = end - 1; cmem->nr_ranges++; From c18c99f2d618690c502b6e4dd3fec4b463945c5a Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 1 Jun 2026 17:47:57 +0800 Subject: [PATCH 15/23] crash: Add crash_prepare_headers() to exclude crash kernel memory The crash memory alloc, and the exclude of crashk_res, crashk_low_res and crashk_cma memory are almost identical across different architectures, handling them in the crash core would eliminate a lot of duplication, so add crash_prepare_headers() helper to handle them in the common code. To achieve the above goal, three architecture-specific functions are introduced: - arch_get_system_nr_ranges(). Pre-counts the max number of memory ranges. - arch_crash_populate_cmem(). Collects the memory ranges and fills them into cmem. - arch_crash_exclude_ranges(). Architecture's additional crash memory ranges exclusion, defaulting to empty. Reviewed-by: Sourabh Jain Acked-by: Baoquan He Acked-by: Mike Rapoport (Microsoft) Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- include/linux/crash_core.h | 5 +++ kernel/crash_core.c | 82 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 84 insertions(+), 3 deletions(-) diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index d4762e000098be..43baf9c87355e3 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -65,6 +65,8 @@ extern int crash_exclude_mem_range(struct crash_mem *mem, unsigned long long mend); extern int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, void **addr, unsigned long *sz); +extern int crash_prepare_headers(int need_kernel_map, void **addr, + unsigned long *sz, unsigned long *nr_mem_ranges); struct kimage; struct kexec_segment; @@ -82,6 +84,9 @@ int kexec_should_crash(struct task_struct *p); int kexec_crash_loaded(void); void crash_save_cpu(struct pt_regs *regs, int cpu); extern int kimage_crash_copy_vmcoreinfo(struct kimage *image); +extern unsigned int arch_get_system_nr_ranges(void); +extern int arch_crash_populate_cmem(struct crash_mem *cmem); +extern int arch_crash_exclude_ranges(struct crash_mem *cmem); #else /* !CONFIG_CRASH_DUMP*/ struct pt_regs; diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 4f21fc3b108b83..481babc2913109 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -168,9 +168,6 @@ static inline resource_size_t crash_resource_size(const struct resource *res) return !res->end ? 0 : resource_size(res); } - - - int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, void **addr, unsigned long *sz) { @@ -272,6 +269,85 @@ int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, return 0; } +static struct crash_mem *alloc_cmem(unsigned int nr_ranges) +{ + struct crash_mem *cmem; + + cmem = kvzalloc_flex(*cmem, ranges, nr_ranges); + if (!cmem) + return NULL; + + cmem->max_nr_ranges = nr_ranges; + return cmem; +} + +unsigned int __weak arch_get_system_nr_ranges(void) { return 0; } +int __weak arch_crash_populate_cmem(struct crash_mem *cmem) { return -1; } +int __weak arch_crash_exclude_ranges(struct crash_mem *cmem) { return 0; } + +static int crash_exclude_core_ranges(struct crash_mem *cmem) +{ + int ret, i; + + /* Exclude crashkernel region */ + ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); + if (ret) + return ret; + + if (crashk_low_res.end) { + ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); + if (ret) + return ret; + } + + for (i = 0; i < crashk_cma_cnt; ++i) { + ret = crash_exclude_mem_range(cmem, crashk_cma_ranges[i].start, + crashk_cma_ranges[i].end); + if (ret) + return ret; + } + + return 0; +} + +int crash_prepare_headers(int need_kernel_map, void **addr, unsigned long *sz, + unsigned long *nr_mem_ranges) +{ + unsigned int max_nr_ranges; + struct crash_mem *cmem; + int ret; + + max_nr_ranges = arch_get_system_nr_ranges(); + if (!max_nr_ranges) + return -ENOMEM; + + cmem = alloc_cmem(max_nr_ranges); + if (!cmem) + return -ENOMEM; + + ret = arch_crash_populate_cmem(cmem); + if (ret) + goto out; + + ret = crash_exclude_core_ranges(cmem); + if (ret) + goto out; + + ret = arch_crash_exclude_ranges(cmem); + if (ret) + goto out; + + /* Return the computed number of memory ranges, for hotplug usage */ + if (nr_mem_ranges) + *nr_mem_ranges = cmem->nr_ranges; + + ret = crash_prepare_elf64_headers(cmem, need_kernel_map, addr, sz); + +out: + kvfree(cmem); + return ret; +} + /** * crash_exclude_mem_range - exclude a mem range for existing ranges * @mem: mem->range contains an array of ranges sorted in ascending order From 977dfc203c0426ceaa56f3c572d0518bcbc11349 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 1 Jun 2026 17:47:58 +0800 Subject: [PATCH 16/23] arm64: kexec_file: Use crash_prepare_headers() helper to simplify code Use the newly introduced crash_prepare_headers() function to replace the existing prepare_elf_headers(), allocate cmem and exclude crash kernel memory in the crash core, which reduce code duplication. Only the following two architecture functions need to be implemented: - arch_get_system_nr_ranges(). Use for_each_mem_range() to traverse and pre-count the max number of memory ranges. - arch_crash_populate_cmem(). Use for_each_mem_range to traverse and collect the memory ranges and fills them into cmem. Acked-by: Catalin Marinas Reviewed-by: Sourabh Jain Acked-by: Baoquan He Acked-by: Mike Rapoport (Microsoft) Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- arch/arm64/include/asm/kexec.h | 1 - arch/arm64/kernel/kexec_image.c | 2 +- arch/arm64/kernel/machine_kexec_file.c | 46 ++++++++------------------ 3 files changed, 15 insertions(+), 34 deletions(-) diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index 7ffa2ff5fcfd4e..892e5bebda957b 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -128,7 +128,6 @@ extern int load_other_segments(struct kimage *image, unsigned long kernel_load_addr, unsigned long kernel_size, char *initrd, unsigned long initrd_len, char *cmdline); -extern int prepare_elf_headers(void **addr, unsigned long *sz); #endif #endif /* __ASSEMBLER__ */ diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c index 424b9527db0908..93c36a3aa618ef 100644 --- a/arch/arm64/kernel/kexec_image.c +++ b/arch/arm64/kernel/kexec_image.c @@ -95,7 +95,7 @@ static void *image_load(struct kimage *image, unsigned long headers_sz; void *headers; - ret = prepare_elf_headers(&headers, &headers_sz); + ret = crash_prepare_headers(true, &headers, &headers_sz, NULL); if (ret) { pr_err("Preparing elf core header failed\n"); return ERR_PTR(ret); diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index 8a96fb68b88d6e..14e65351133ea8 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -40,52 +40,34 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image) } #ifdef CONFIG_CRASH_DUMP -int prepare_elf_headers(void **addr, unsigned long *sz) +unsigned int arch_get_system_nr_ranges(void) { - struct crash_mem *cmem; - unsigned int nr_ranges; - int ret; - u64 i; + /* for exclusion of crashkernel region */ + unsigned int nr_ranges = 2 + CRASH_HOTPLUG_SAFETY_PADDING; phys_addr_t start, end; + u64 i; - /* for exclusion of crashkernel region */ - nr_ranges = 2 + CRASH_HOTPLUG_SAFETY_PADDING; for_each_mem_range(i, &start, &end) nr_ranges++; - cmem = kmalloc_flex(*cmem, ranges, nr_ranges); - if (!cmem) - return -ENOMEM; + return nr_ranges; +} + +int arch_crash_populate_cmem(struct crash_mem *cmem) +{ + phys_addr_t start, end; + u64 i; - cmem->max_nr_ranges = nr_ranges; - cmem->nr_ranges = 0; for_each_mem_range(i, &start, &end) { - if (unlikely(cmem->nr_ranges >= cmem->max_nr_ranges)) { - ret = -EAGAIN; - goto out; - } + if (unlikely(cmem->nr_ranges >= cmem->max_nr_ranges)) + return -EAGAIN; cmem->ranges[cmem->nr_ranges].start = start; cmem->ranges[cmem->nr_ranges].end = end - 1; cmem->nr_ranges++; } - /* Exclude crashkernel region */ - ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); - if (ret) - goto out; - - if (crashk_low_res.end) { - ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); - if (ret) - goto out; - } - - ret = crash_prepare_elf64_headers(cmem, true, addr, sz); - -out: - kfree(cmem); - return ret; + return 0; } #endif From a8fd74ee7ae6404a2fe851ef03efff7edef3ff9c Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 1 Jun 2026 17:47:59 +0800 Subject: [PATCH 17/23] x86: kexec_file: Use crash_prepare_headers() helper to simplify code Use the newly introduced crash_prepare_headers() function to replace the existing prepare_elf_headers(), allocate cmem and exclude crash kernel memory in the crash core, which reduce code duplication. Only the following three architecture functions need to be implemented: - arch_get_system_nr_ranges(). Call get_nr_ram_ranges_callback() to pre-count the max number of memory ranges. - arch_crash_populate_cmem(). Use prepare_elf64_ram_headers_callback() to collect the memory ranges and fills them into cmem. - arch_crash_exclude_ranges(). Exclude the low 1M for x86. By the way, remove the unused "nr_mem_ranges" in arch_crash_handle_hotplug_event(). Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: Andrew Morton Cc: Vivek Goyal Reviewed-by: Sourabh Jain Acked-by: Baoquan He Acked-by: Mike Rapoport (Microsoft) Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- arch/x86/kernel/crash.c | 89 +++++------------------------------------ 1 file changed, 11 insertions(+), 78 deletions(-) diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index a1089907728da9..7145b00da4ee1c 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -153,16 +153,8 @@ static int get_nr_ram_ranges_callback(struct resource *res, void *arg) return 0; } -/* Gather all the required information to prepare elf headers for ram regions */ -static struct crash_mem *fill_up_crash_elf_data(void) +unsigned int arch_get_system_nr_ranges(void) { - unsigned int nr_ranges = 0; - struct crash_mem *cmem; - - walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); - if (!nr_ranges) - return NULL; - /* * Exclusion of crash region, crashk_low_res and/or crashk_cma_ranges * may cause range splits. So add extra slots here. @@ -177,49 +169,16 @@ static struct crash_mem *fill_up_crash_elf_data(void) * But in order to lest the low 1M could be changed in the future, * (e.g. [start, 1M]), add a extra slot. */ - nr_ranges += 3 + crashk_cma_cnt + CRASH_HOTPLUG_SAFETY_PADDING; - cmem = vzalloc(struct_size(cmem, ranges, nr_ranges)); - if (!cmem) - return NULL; - - cmem->max_nr_ranges = nr_ranges; + unsigned int nr_ranges = 3 + crashk_cma_cnt + CRASH_HOTPLUG_SAFETY_PADDING; - return cmem; + walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); + return nr_ranges; } -/* - * Look for any unwanted ranges between mstart, mend and remove them. This - * might lead to split and split ranges are put in cmem->ranges[] array - */ -static int elf_header_exclude_ranges(struct crash_mem *cmem) +int arch_crash_exclude_ranges(struct crash_mem *cmem) { - int ret = 0; - int i; - /* Exclude the low 1M because it is always reserved */ - ret = crash_exclude_mem_range(cmem, 0, SZ_1M - 1); - if (ret) - return ret; - - /* Exclude crashkernel region */ - ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); - if (ret) - return ret; - - if (crashk_low_res.end) - ret = crash_exclude_mem_range(cmem, crashk_low_res.start, - crashk_low_res.end); - if (ret) - return ret; - - for (i = 0; i < crashk_cma_cnt; ++i) { - ret = crash_exclude_mem_range(cmem, crashk_cma_ranges[i].start, - crashk_cma_ranges[i].end); - if (ret) - return ret; - } - - return 0; + return crash_exclude_mem_range(cmem, 0, SZ_1M - 1); } static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) @@ -236,35 +195,9 @@ static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) return 0; } -/* Prepare elf headers. Return addr and size */ -static int prepare_elf_headers(void **addr, unsigned long *sz, - unsigned long *nr_mem_ranges) +int arch_crash_populate_cmem(struct crash_mem *cmem) { - struct crash_mem *cmem; - int ret; - - cmem = fill_up_crash_elf_data(); - if (!cmem) - return -ENOMEM; - - ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback); - if (ret) - goto out; - - /* Exclude unwanted mem ranges */ - ret = elf_header_exclude_ranges(cmem); - if (ret) - goto out; - - /* Return the computed number of memory ranges, for hotplug usage */ - *nr_mem_ranges = cmem->nr_ranges; - - /* By default prepare 64bit headers */ - ret = crash_prepare_elf64_headers(cmem, IS_ENABLED(CONFIG_X86_64), addr, sz); - -out: - vfree(cmem); - return ret; + return walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback); } #endif @@ -422,7 +355,8 @@ int crash_load_segments(struct kimage *image) .buf_max = ULONG_MAX, .top_down = false }; /* Prepare elf headers and add a segment */ - ret = prepare_elf_headers(&kbuf.buffer, &kbuf.bufsz, &pnum); + ret = crash_prepare_headers(IS_ENABLED(CONFIG_X86_64), &kbuf.buffer, + &kbuf.bufsz, &pnum); if (ret) return ret; @@ -515,7 +449,6 @@ unsigned int arch_crash_get_elfcorehdr_size(void) void arch_crash_handle_hotplug_event(struct kimage *image, void *arg) { void *elfbuf = NULL, *old_elfcorehdr; - unsigned long nr_mem_ranges; unsigned long mem, memsz; unsigned long elfsz = 0; @@ -533,7 +466,7 @@ void arch_crash_handle_hotplug_event(struct kimage *image, void *arg) * Create the new elfcorehdr reflecting the changes to CPU and/or * memory resources. */ - if (prepare_elf_headers(&elfbuf, &elfsz, &nr_mem_ranges)) { + if (crash_prepare_headers(IS_ENABLED(CONFIG_X86_64), &elfbuf, &elfsz, NULL)) { pr_err("unable to create new elfcorehdr"); goto out; } From 0f55b20e6e303a4482051efc66d4f49c38f2e65b Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 1 Jun 2026 17:48:00 +0800 Subject: [PATCH 18/23] riscv: kexec_file: Use crash_prepare_headers() helper to simplify code Use the newly introduced crash_prepare_headers() function to replace the existing prepare_elf_headers(), allocate cmem and exclude crash kernel memory in the crash core, which reduce code duplication. Only the following two architecture functions need to be implemented: - arch_get_system_nr_ranges(). Call get_nr_ram_ranges_callback() to pre-counts the max number of memory ranges. - arch_crash_populate_cmem(). Use prepare_elf64_ram_headers_callback() to collects the memory ranges and fills them into cmem. Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Alexandre Ghiti Cc: Guo Ren Reviewed-by: Sourabh Jain Acked-by: Baoquan He Acked-by: Mike Rapoport (Microsoft) Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- arch/riscv/kernel/machine_kexec_file.c | 49 +++++++------------------- 1 file changed, 13 insertions(+), 36 deletions(-) diff --git a/arch/riscv/kernel/machine_kexec_file.c b/arch/riscv/kernel/machine_kexec_file.c index c6dd36dd4218d9..a33390dde3165b 100644 --- a/arch/riscv/kernel/machine_kexec_file.c +++ b/arch/riscv/kernel/machine_kexec_file.c @@ -45,6 +45,16 @@ static int get_nr_ram_ranges_callback(struct resource *res, void *arg) return 0; } +unsigned int arch_get_system_nr_ranges(void) +{ + /* For exclusion of crashkernel region */ + unsigned int nr_ranges = 2 + CRASH_HOTPLUG_SAFETY_PADDING; + + walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); + + return nr_ranges; +} + static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) { struct crash_mem *cmem = arg; @@ -59,42 +69,9 @@ static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) return 0; } -static int prepare_elf_headers(void **addr, unsigned long *sz) +int arch_crash_populate_cmem(struct crash_mem *cmem) { - struct crash_mem *cmem; - unsigned int nr_ranges; - int ret; - - /* For exclusion of crashkernel region */ - nr_ranges = 2 + CRASH_HOTPLUG_SAFETY_PADDING; - walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); - - cmem = kmalloc_flex(*cmem, ranges, nr_ranges); - if (!cmem) - return -ENOMEM; - - cmem->max_nr_ranges = nr_ranges; - cmem->nr_ranges = 0; - ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback); - if (ret) - goto out; - - /* Exclude crashkernel region */ - ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); - if (ret) - goto out; - - if (crashk_low_res.end) { - ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); - if (ret) - goto out; - } - - ret = crash_prepare_elf64_headers(cmem, true, addr, sz); - -out: - kfree(cmem); - return ret; + return walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback); } static char *setup_kdump_cmdline(struct kimage *image, char *cmdline, @@ -286,7 +263,7 @@ int load_extra_segments(struct kimage *image, unsigned long kernel_start, if (image->type == KEXEC_TYPE_CRASH) { void *headers; unsigned long headers_sz; - ret = prepare_elf_headers(&headers, &headers_sz); + ret = crash_prepare_headers(true, &headers, &headers_sz, NULL); if (ret) { pr_err("Preparing elf core header failed\n"); goto out; From 19623b4a0b292ca5bf0b695c7de6d41daed9ef51 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 1 Jun 2026 17:48:01 +0800 Subject: [PATCH 19/23] LoongArch: kexec_file: Use crash_prepare_headers() helper to simplify code Use the newly introduced crash_prepare_headers() function to replace the existing prepare_elf_headers(), allocate cmem and exclude crash kernel memory in the crash core, which reduce code duplication. Only the following two architecture functions need to be implemented: - arch_get_system_nr_ranges(). Use for_each_mem_range to traverse and pre-count the max number of memory ranges. - arch_crash_populate_cmem(). Use for_each_mem_range to traverse and collect the memory ranges and fills them into cmem. Cc: Huacai Chen Cc: WANG Xuerui Cc: Youling Tang Cc: Baoquan He Reviewed-by: Sourabh Jain Acked-by: Baoquan He Acked-by: Mike Rapoport (Microsoft) Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- arch/loongarch/kernel/machine_kexec_file.c | 48 +++++++--------------- 1 file changed, 15 insertions(+), 33 deletions(-) diff --git a/arch/loongarch/kernel/machine_kexec_file.c b/arch/loongarch/kernel/machine_kexec_file.c index 3c369124586e13..f3101bea9e45c2 100644 --- a/arch/loongarch/kernel/machine_kexec_file.c +++ b/arch/loongarch/kernel/machine_kexec_file.c @@ -56,52 +56,34 @@ static void cmdline_add_initrd(struct kimage *image, unsigned long *cmdline_tmpl } #ifdef CONFIG_CRASH_DUMP - -static int prepare_elf_headers(void **addr, unsigned long *sz) +unsigned int arch_get_system_nr_ranges(void) { - int ret, nr_ranges; - uint64_t i; + /* for exclusion of crashkernel region */ + int nr_ranges = 2 + CRASH_HOTPLUG_SAFETY_PADDING; phys_addr_t start, end; - struct crash_mem *cmem; + uint64_t i; - /* for exclusion of crashkernel region */ - nr_ranges = 2 + CRASH_HOTPLUG_SAFETY_PADDING; for_each_mem_range(i, &start, &end) nr_ranges++; - cmem = kmalloc_flex(*cmem, ranges, nr_ranges); - if (!cmem) - return -ENOMEM; + return nr_ranges; +} + +int arch_crash_populate_cmem(struct crash_mem *cmem) +{ + phys_addr_t start, end; + uint64_t i; - cmem->max_nr_ranges = nr_ranges; - cmem->nr_ranges = 0; for_each_mem_range(i, &start, &end) { - if (unlikely(cmem->nr_ranges >= cmem->max_nr_ranges)) { - ret = -EAGAIN; - goto out; - } + if (unlikely(cmem->nr_ranges >= cmem->max_nr_ranges)) + return -EAGAIN; cmem->ranges[cmem->nr_ranges].start = start; cmem->ranges[cmem->nr_ranges].end = end - 1; cmem->nr_ranges++; } - /* Exclude crashkernel region */ - ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); - if (ret < 0) - goto out; - - if (crashk_low_res.end) { - ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); - if (ret < 0) - goto out; - } - - ret = crash_prepare_elf64_headers(cmem, true, addr, sz); - -out: - kfree(cmem); - return ret; + return 0; } /* @@ -169,7 +151,7 @@ int load_other_segments(struct kimage *image, void *headers; unsigned long headers_sz; - ret = prepare_elf_headers(&headers, &headers_sz); + ret = crash_prepare_headers(true, &headers, &headers_sz, NULL); if (ret < 0) { pr_err("Preparing elf core header failed\n"); goto out_err; From c23eb90ca25ef68e731791430be503ff80b3a14f Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 1 Jun 2026 17:48:02 +0800 Subject: [PATCH 20/23] powerpc/kexec_file: Use crash_exclude_core_ranges() helper The crash memory exclude of crashk_res and crashk_cma memory on powerpc are almost identical to the generic crash_exclude_core_ranges(). By introducing the architecture-specific arch_crash_exclude_mem_range() function with a default implementation of crash_exclude_mem_range(), and using crash_exclude_mem_range_guarded as powerpc's separate implementation, the generic crash_exclude_core_ranges() helper function can be reused. Cc: Andrew Morton Cc: Hari Bathini Cc: Madhavan Srinivasan Cc: Mahesh Salgaonkar Cc: Michael Ellerman Cc: Ritesh Harjani (IBM) Cc: Shivang Upadhyay Acked-by: Baoquan He Reviewed-by: Sourabh Jain Acked-by: Mike Rapoport (Microsoft) Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- arch/powerpc/include/asm/kexec_ranges.h | 3 --- arch/powerpc/kexec/crash.c | 2 +- arch/powerpc/kexec/ranges.c | 16 ++++------------ include/linux/crash_core.h | 4 ++++ kernel/crash_core.c | 19 +++++++++++++------ 5 files changed, 22 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/include/asm/kexec_ranges.h b/arch/powerpc/include/asm/kexec_ranges.h index ad95e3792d10cc..8489e844b44759 100644 --- a/arch/powerpc/include/asm/kexec_ranges.h +++ b/arch/powerpc/include/asm/kexec_ranges.h @@ -7,9 +7,6 @@ void sort_memory_ranges(struct crash_mem *mrngs, bool merge); struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges); int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size); -int crash_exclude_mem_range_guarded(struct crash_mem **mem_ranges, - unsigned long long mstart, - unsigned long long mend); int get_exclude_memory_ranges(struct crash_mem **mem_ranges); int get_reserved_memory_ranges(struct crash_mem **mem_ranges); int get_crash_memory_ranges(struct crash_mem **mem_ranges); diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c index d634db67becc6e..775895f3103793 100644 --- a/arch/powerpc/kexec/crash.c +++ b/arch/powerpc/kexec/crash.c @@ -513,7 +513,7 @@ static void update_crash_elfcorehdr(struct kimage *image, struct memory_notify * base_addr = PFN_PHYS(mn->start_pfn); size = mn->nr_pages * PAGE_SIZE; end = base_addr + size - 1; - ret = crash_exclude_mem_range_guarded(&cmem, base_addr, end); + ret = arch_crash_exclude_mem_range(&cmem, base_addr, end); if (ret) { pr_err("Failed to remove hot-unplugged memory from crash memory ranges\n"); goto out; diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c index b2fb78562cdc3b..539061d14a7718 100644 --- a/arch/powerpc/kexec/ranges.c +++ b/arch/powerpc/kexec/ranges.c @@ -551,9 +551,9 @@ int get_usable_memory_ranges(struct crash_mem **mem_ranges) #endif /* CONFIG_KEXEC_FILE */ #ifdef CONFIG_CRASH_DUMP -int crash_exclude_mem_range_guarded(struct crash_mem **mem_ranges, - unsigned long long mstart, - unsigned long long mend) +int arch_crash_exclude_mem_range(struct crash_mem **mem_ranges, + unsigned long long mstart, + unsigned long long mend) { struct crash_mem *tmem = *mem_ranges; @@ -602,18 +602,10 @@ int get_crash_memory_ranges(struct crash_mem **mem_ranges) sort_memory_ranges(*mem_ranges, true); } - /* Exclude crashkernel region */ - ret = crash_exclude_mem_range_guarded(mem_ranges, crashk_res.start, crashk_res.end); + ret = crash_exclude_core_ranges(mem_ranges); if (ret) goto out; - for (i = 0; i < crashk_cma_cnt; ++i) { - ret = crash_exclude_mem_range_guarded(mem_ranges, crashk_cma_ranges[i].start, - crashk_cma_ranges[i].end); - if (ret) - goto out; - } - /* * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL * regions are exported to save their context at the time of diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 43baf9c87355e3..1ae2c0eb2eb352 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -67,6 +67,7 @@ extern int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_ma void **addr, unsigned long *sz); extern int crash_prepare_headers(int need_kernel_map, void **addr, unsigned long *sz, unsigned long *nr_mem_ranges); +extern int crash_exclude_core_ranges(struct crash_mem **cmem); struct kimage; struct kexec_segment; @@ -87,6 +88,9 @@ extern int kimage_crash_copy_vmcoreinfo(struct kimage *image); extern unsigned int arch_get_system_nr_ranges(void); extern int arch_crash_populate_cmem(struct crash_mem *cmem); extern int arch_crash_exclude_ranges(struct crash_mem *cmem); +extern int arch_crash_exclude_mem_range(struct crash_mem **mem, + unsigned long long mstart, + unsigned long long mend); #else /* !CONFIG_CRASH_DUMP*/ struct pt_regs; diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 481babc2913109..2b36aa9fade064 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -285,24 +285,31 @@ unsigned int __weak arch_get_system_nr_ranges(void) { return 0; } int __weak arch_crash_populate_cmem(struct crash_mem *cmem) { return -1; } int __weak arch_crash_exclude_ranges(struct crash_mem *cmem) { return 0; } -static int crash_exclude_core_ranges(struct crash_mem *cmem) +int __weak arch_crash_exclude_mem_range(struct crash_mem **mem, + unsigned long long mstart, + unsigned long long mend) +{ + return crash_exclude_mem_range(*mem, mstart, mend); +} + +int crash_exclude_core_ranges(struct crash_mem **cmem) { int ret, i; /* Exclude crashkernel region */ - ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); + ret = arch_crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); if (ret) return ret; if (crashk_low_res.end) { - ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); + ret = arch_crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); if (ret) return ret; } for (i = 0; i < crashk_cma_cnt; ++i) { - ret = crash_exclude_mem_range(cmem, crashk_cma_ranges[i].start, - crashk_cma_ranges[i].end); + ret = arch_crash_exclude_mem_range(cmem, crashk_cma_ranges[i].start, + crashk_cma_ranges[i].end); if (ret) return ret; } @@ -329,7 +336,7 @@ int crash_prepare_headers(int need_kernel_map, void **addr, unsigned long *sz, if (ret) goto out; - ret = crash_exclude_core_ranges(cmem); + ret = crash_exclude_core_ranges(&cmem); if (ret) goto out; From a681b2ad1df5064acec185003a0e884aa4d92fee Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 1 Jun 2026 17:48:03 +0800 Subject: [PATCH 21/23] arm64: kexec_file: Add support for crashkernel CMA reservation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 35c18f2933c5 ("Add a new optional ",cma" suffix to the crashkernel= command line option") and commit ab475510e042 ("kdump: implement reserve_crashkernel_cma") added CMA support for kdump crashkernel reservation. Crash kernel memory reservation wastes production resources if too large, risks kdump failure if too small, and faces allocation difficulties on fragmented systems due to contiguous block constraints. The new CMA-based crashkernel reservation scheme splits the "large fixed reservation" into a "small fixed region + large CMA dynamic region": the CMA memory is available to userspace during normal operation to avoid waste, and is reclaimed for kdump upon crash—saving memory while improving reliability. So extend crashkernel CMA reservation support to arm64. The following changes are made to enable CMA reservation: - Parse and obtain the CMA reservation size along with other crashkernel parameters. - Call reserve_crashkernel_cma() to allocate the CMA region for kdump. - Include the CMA-reserved ranges for kdump kernel to use. - Exclude the CMA-reserved ranges from the crash kernel memory to prevent them from being exported through /proc/vmcore, which is already done in the crash core. Update kernel-parameters.txt to document CMA support for crashkernel on arm64 architecture. Tested-by: Breno Leitao Acked-by: Catalin Marinas Acked-by: Rob Herring (Arm) Acked-by: Baoquan He Acked-by: Mike Rapoport (Microsoft) Acked-by: Ard Biesheuvel Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- Documentation/admin-guide/kernel-parameters.txt | 2 +- arch/arm64/kernel/machine_kexec_file.c | 2 +- arch/arm64/mm/init.c | 5 +++-- drivers/of/fdt.c | 9 +++++---- drivers/of/kexec.c | 9 +++++++++ include/linux/crash_reserve.h | 4 +++- 6 files changed, 22 insertions(+), 9 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 4d0f545fb3ec5a..52742fab49a9a3 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1119,7 +1119,7 @@ Kernel parameters It will be ignored when crashkernel=X,high is not used or memory reserved is below 4G. crashkernel=size[KMG],cma - [KNL, X86, ppc] Reserve additional crash kernel memory from + [KNL, X86, ARM64, PPC] Reserve additional crash kernel memory from CMA. This reservation is usable by the first system's userspace memory and kernel movable allocations (memory balloon, zswap). Pages allocated from this memory range diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index 14e65351133ea8..d0f73eb3f856f5 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -43,7 +43,7 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image) unsigned int arch_get_system_nr_ranges(void) { /* for exclusion of crashkernel region */ - unsigned int nr_ranges = 2 + CRASH_HOTPLUG_SAFETY_PADDING; + unsigned int nr_ranges = 2 + crashk_cma_cnt + CRASH_HOTPLUG_SAFETY_PADDING; phys_addr_t start, end; u64 i; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 97987f850a33c3..227f58522dad29 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -96,8 +96,8 @@ phys_addr_t __ro_after_init arm64_dma_phys_limit; static void __init arch_reserve_crashkernel(void) { + unsigned long long crash_base, crash_size, cma_size = 0; unsigned long long low_size = 0; - unsigned long long crash_base, crash_size; bool high = false; int ret; @@ -106,11 +106,12 @@ static void __init arch_reserve_crashkernel(void) ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base, - &low_size, NULL, &high); + &low_size, &cma_size, &high); if (ret) return; reserve_crashkernel_generic(crash_size, crash_base, low_size, high); + reserve_crashkernel_cma(cma_size); } static phys_addr_t __init max_zone_phys(phys_addr_t zone_limit) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 82f7327c59ea90..0470acbd1fcf0f 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -880,11 +880,12 @@ static unsigned long chosen_node_offset = -FDT_ERR_NOTFOUND; /* * The main usage of linux,usable-memory-range is for crash dump kernel. * Originally, the number of usable-memory regions is one. Now there may - * be two regions, low region and high region. - * To make compatibility with existing user-space and older kdump, the low - * region is always the last range of linux,usable-memory-range if exist. + * be 2 + CRASHK_CMA_RANGES_MAX regions, low region, high region and cma + * regions. To make compatibility with existing user-space and older kdump, + * the high and low region are always the first two ranges of + * linux,usable-memory-range if exist. */ -#define MAX_USABLE_RANGES 2 +#define MAX_USABLE_RANGES (2 + CRASHK_CMA_RANGES_MAX) /** * early_init_dt_check_for_usable_mem_range - Decode usable memory range diff --git a/drivers/of/kexec.c b/drivers/of/kexec.c index b6837e299e7fe5..029903b986cbd3 100644 --- a/drivers/of/kexec.c +++ b/drivers/of/kexec.c @@ -458,6 +458,15 @@ void *of_kexec_alloc_and_setup_fdt(const struct kimage *image, if (ret) goto out; } + + for (int i = 0; i < crashk_cma_cnt; i++) { + ret = fdt_appendprop_addrrange(fdt, 0, chosen_node, + "linux,usable-memory-range", + crashk_cma_ranges[i].start, + crashk_cma_ranges[i].end - crashk_cma_ranges[i].start + 1); + if (ret) + goto out; + } #endif } diff --git a/include/linux/crash_reserve.h b/include/linux/crash_reserve.h index f0dc03d94ca2cd..30864d90d7f509 100644 --- a/include/linux/crash_reserve.h +++ b/include/linux/crash_reserve.h @@ -14,9 +14,11 @@ extern struct resource crashk_res; extern struct resource crashk_low_res; extern struct range crashk_cma_ranges[]; + +#define CRASHK_CMA_RANGES_MAX 4 #if defined(CONFIG_CMA) && defined(CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION) #define CRASHKERNEL_CMA -#define CRASHKERNEL_CMA_RANGES_MAX 4 +#define CRASHKERNEL_CMA_RANGES_MAX (CRASHK_CMA_RANGES_MAX) extern int crashk_cma_cnt; #else #define crashk_cma_cnt 0 From c3993fe6c81e5904d6bdf8b56eed0eb498b7e676 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 1 Jun 2026 17:48:04 +0800 Subject: [PATCH 22/23] riscv: kexec_file: Add support for crashkernel CMA reservation Commit 35c18f2933c5 ("Add a new optional ",cma" suffix to the crashkernel= command line option") and commit ab475510e042 ("kdump: implement reserve_crashkernel_cma") added CMA support for kdump crashkernel reservation. This allows the kernel to dynamically allocate contiguous memory for crash dumping when needed, rather than permanently reserving a fixed region at boot time. So extend crashkernel CMA reservation support to riscv. The following changes are made to enable CMA reservation: - Parse and obtain the CMA reservation size along with other crashkernel parameters. - Call reserve_crashkernel_cma() to allocate the CMA region for kdump. - Include the CMA-reserved ranges for kdump kernel to use, which was already done in of_kexec_alloc_and_setup_fdt(). - Exclude the CMA-reserved ranges from the crash kernel memory to prevent them from being exported through /proc/vmcore, which was already done in the crash core. Update kernel-parameters.txt to document CMA support for crashkernel on riscv architecture. Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Alexandre Ghiti Acked-by: Baoquan He Acked-by: Mike Rapoport (Microsoft) Acked-by: Paul Walmsley # arch/riscv Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- Documentation/admin-guide/kernel-parameters.txt | 16 ++++++++-------- arch/riscv/kernel/machine_kexec_file.c | 2 +- arch/riscv/mm/init.c | 5 +++-- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 52742fab49a9a3..3ff3ddd516cf45 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1119,14 +1119,14 @@ Kernel parameters It will be ignored when crashkernel=X,high is not used or memory reserved is below 4G. crashkernel=size[KMG],cma - [KNL, X86, ARM64, PPC] Reserve additional crash kernel memory from - CMA. This reservation is usable by the first system's - userspace memory and kernel movable allocations (memory - balloon, zswap). Pages allocated from this memory range - will not be included in the vmcore so this should not - be used if dumping of userspace memory is intended and - it has to be expected that some movable kernel pages - may be missing from the dump. + [KNL, X86, ARM64, RISCV, PPC] Reserve additional crash + kernel memory from CMA. This reservation is usable by + the first system's userspace memory and kernel movable + allocations (memory balloon, zswap). Pages allocated + from this memory range will not be included in the vmcore + so this should not be used if dumping of userspace memory + is intended and it has to be expected that some movable + kernel pages may be missing from the dump. A standard crashkernel reservation, as described above, is still needed to hold the crash kernel and initrd. diff --git a/arch/riscv/kernel/machine_kexec_file.c b/arch/riscv/kernel/machine_kexec_file.c index a33390dde3165b..158b152ea0a0b7 100644 --- a/arch/riscv/kernel/machine_kexec_file.c +++ b/arch/riscv/kernel/machine_kexec_file.c @@ -48,7 +48,7 @@ static int get_nr_ram_ranges_callback(struct resource *res, void *arg) unsigned int arch_get_system_nr_ranges(void) { /* For exclusion of crashkernel region */ - unsigned int nr_ranges = 2 + CRASH_HOTPLUG_SAFETY_PADDING; + unsigned int nr_ranges = 2 + crashk_cma_cnt + CRASH_HOTPLUG_SAFETY_PADDING; walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index fa8d2f6f554b57..9dd0ffe85d6aaf 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -1320,7 +1320,7 @@ static inline void setup_vm_final(void) */ static void __init arch_reserve_crashkernel(void) { - unsigned long long low_size = 0; + unsigned long long low_size = 0, cma_size = 0; unsigned long long crash_base, crash_size; bool high = false; int ret; @@ -1330,11 +1330,12 @@ static void __init arch_reserve_crashkernel(void) ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base, - &low_size, NULL, &high); + &low_size, &cma_size, &high); if (ret) return; reserve_crashkernel_generic(crash_size, crash_base, low_size, high); + reserve_crashkernel_cma(cma_size); } void __init paging_init(void) From 38114acbd0c695d3b8eb1c0acddbe44600de7a84 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Mon, 1 Jun 2026 17:48:05 +0800 Subject: [PATCH 23/23] arm64: crash: Add crash hotplug support Due to CPU/Memory hotplug or online/offline events, the elfcorehdr (which describes the CPUs and memory of the crashed kernel) of kdump image becomes outdated. Consequently, attempting dump collection with an outdated elfcorehdr can lead to inaccurate dump collection. The current solution to address the above issue involves monitoring the CPU/Memory add/remove events in userspace using udev rules and whenever there are changes in CPU and memory resources, the entire kdump image is loaded again. The kdump image includes kernel, initrd, elfcorehdr, FDT, purgatory. Given that only elfcorehdr gets outdated due to CPU/Memory add/remove events, reloading the entire kdump image is inefficient. More importantly, kdump remains inactive for a substantial amount of time until the kdump reload completes. To address the aforementioned issue, commit 247262756121 ("crash: add generic infrastructure for crash hotplug support") added a generic infrastructure that allows architectures to selectively update the kdump image component during CPU or memory add/remove events within the kernel itself. In the event of a CPU or memory add/remove events, the generic crash hotplug event handler, crash_handle_hotplug_event(), is triggered. It then acquires the necessary locks to update the kdump image and invokes the architecture-specific crash hotplug handler, arch_crash_handle_hotplug_event(), to update the required kdump image components. [1] has supported virtual CPU hotplug in virtual machines for ARM64, allowing vCPUs to be added or removed at runtime to meet Kubernetes demands. On ARM64, only memory add/remove events are handled. Here's why: 1. Physical CPU hotplug: Not supported on ARM64 hardware. 2. ACPI vCPU hotplug (KVM virtual machine): - vCPU hotplug is implemented as a static firmware policy where all possible vCPUs are pre-described in the MADT table at boot. - The vCPU status will be automatically updated after vCPU hotplug. - No FDT or elfcorehdr update needed. 3. Device tree booted Virtual Machine vCPU hotplug: - The elfcorehdr is built using for_each_possible_cpu(), so it already includes all possible CPUs and doesn't need updates. For memory add/remove events, the elfcorehdr is updated to reflect the current memory layout. This patch adds the ARCH_SUPPORTS_CRASH_HOTPLUG config option and implements: - arch_crash_hotplug_support(): Check if hotplug update is supported - arch_crash_get_elfcorehdr_size(): Return elfcorehdr buffer size - arch_crash_handle_hotplug_event(): Handle memory hotplug events This follows the same approach as x86 commit ea53ad9cf73b ("x86/crash: add x86 crash hotplug support") and powerpc commit b741092d5976 ("powerpc/crash: add crash CPU hotplug support") and commit 849599b702ef ("powerpc/crash: add crash memory hotplug support"). The test is based on the following QEMU version: https://github.com/salil-mehta/qemu.git virt-cpuhp-armv8/rfc-v2 Replace your '-smp' argument with something like: | -smp cpus=1,maxcpus=3,cores=3,threads=1,sockets=1 then feed the following to the Qemu montior to hotplug vCPU; | (qemu) device_add driver=host-arm-cpu,core-id=1,id=cpu1 | (qemu) device_del cpu1 feed the following to the Qemu montior to hotplug memory; | (qemu) object_add memory-backend-ram,id=mem1,size=256M | (qemu) device_add pc-dimm,id=dimm1,memdev=mem1 | (qemu) device_del dimm1 The qemu startup configuration is as follows: qemu-system-aarch64 \ -M virt,gic-version=3,acpi=on,highmem=on \ -enable-kvm \ -cpu host \ -kernel Image \ -smp cpus=1,maxcpus=3,cores=3,threads=1,sockets=1 \ -bios /usr/share/edk2/aarch64/QEMU_EFI.fd \ -m 2G,slots=64,maxmem=16G \ -nographic \ -no-reboot \ -device virtio-rng-pci \ -append "root=/dev/vda rw console=ttyAMA0 kgdboc=ttyAMA0,115200 \ earlycon acpi=on crashkernel=512M" \ -drive if=none,file=images/rootfs.ext4,format=raw,id=hd0 \ -device virtio-blk-device,drive=hd0 \ There are two system calls, `kexec_file_load` and `kexec_load`, used to load the kdump image. Only kexec_file_load syscall way is tested now. Cc: Catalin Marinas Cc: Will Deacon Cc: Baoquan He Cc: "Mike Rapoport (Microsoft)" Cc: Andrew Morton Cc: Breno Leitao Cc: Kees Cook [1]: https://lore.kernel.org/all/20240529133446.28446-1-Jonathan.Cameron@huawei.com/ Signed-off-by: Jinjie Ruan Signed-off-by: Linux RISC-V bot --- arch/arm64/Kconfig | 3 + arch/arm64/include/asm/kexec.h | 13 +++ arch/arm64/kernel/Makefile | 2 +- arch/arm64/kernel/crash.c | 152 +++++++++++++++++++++++++ arch/arm64/kernel/kexec_image.c | 21 +++- arch/arm64/kernel/machine_kexec_file.c | 40 ++----- 6 files changed, 195 insertions(+), 36 deletions(-) create mode 100644 arch/arm64/kernel/crash.c diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index fe60738e5943ba..9091c67e1cc289 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1609,6 +1609,9 @@ config ARCH_DEFAULT_CRASH_DUMP config ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION def_bool CRASH_RESERVE +config ARCH_SUPPORTS_CRASH_HOTPLUG + def_bool y + config TRANS_TABLE def_bool y depends on HIBERNATION || KEXEC_CORE diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index 892e5bebda957b..4f3d4fc2807efb 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -130,6 +130,19 @@ extern int load_other_segments(struct kimage *image, char *cmdline); #endif +#ifdef CONFIG_CRASH_HOTPLUG +#define pnum_hdr_sz(pnum) ((pnum) * sizeof(Elf64_Phdr) + sizeof(Elf64_Ehdr)) + +void arch_crash_handle_hotplug_event(struct kimage *image, void *arg); +#define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event + +int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags); +#define arch_crash_hotplug_support arch_crash_hotplug_support + +unsigned int arch_crash_get_elfcorehdr_size(void); +#define crash_get_elfcorehdr_size arch_crash_get_elfcorehdr_size +#endif + #endif /* __ASSEMBLER__ */ #endif diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 74b76bb7045231..0625422fc528c9 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -64,7 +64,7 @@ obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o \ obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_image.o obj-$(CONFIG_ARM64_RELOC_TEST) += arm64-reloc-test.o arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o -obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +obj-$(CONFIG_CRASH_DUMP) += crash_dump.o crash.o obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o diff --git a/arch/arm64/kernel/crash.c b/arch/arm64/kernel/crash.c new file mode 100644 index 00000000000000..5882b9b5a90e80 --- /dev/null +++ b/arch/arm64/kernel/crash.c @@ -0,0 +1,152 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Architecture specific functions for kexec based crash dumps. + */ + +#define pr_fmt(fmt) "crash hp: " fmt + +#include +#include +#include +#include +#include +#include + +#include + +#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_HOTPLUG) +unsigned int arch_get_system_nr_ranges(void) +{ + /* for exclusion of crashkernel region */ + unsigned int nr_ranges = 2 + crashk_cma_cnt + CRASH_HOTPLUG_SAFETY_PADDING; + phys_addr_t start, end; + u64 i; + + for_each_mem_range(i, &start, &end) + nr_ranges++; + + return nr_ranges; +} + +int arch_crash_populate_cmem(struct crash_mem *cmem) +{ + phys_addr_t start, end; + u64 i; + + for_each_mem_range(i, &start, &end) { + if (unlikely(cmem->nr_ranges >= cmem->max_nr_ranges)) + return -EAGAIN; + + cmem->ranges[cmem->nr_ranges].start = start; + cmem->ranges[cmem->nr_ranges].end = end - 1; + cmem->nr_ranges++; + } + + return 0; +} +#endif + +#ifdef CONFIG_CRASH_HOTPLUG +int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags) +{ +#ifdef CONFIG_KEXEC_FILE + if (image->file_mode) + return 1; +#endif + /* + * For kexec_load syscall, crash hotplug support requires + * KEXEC_CRASH_HOTPLUG_SUPPORT flag to be passed by userspace. + */ + return kexec_flags & KEXEC_CRASH_HOTPLUG_SUPPORT; +} + +unsigned int arch_crash_get_elfcorehdr_size(void) +{ + unsigned int phdr_cnt; + + /* A program header for possible CPUs, vmcoreinfo and kernel_map */ + phdr_cnt = 2 + num_possible_cpus(); + if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) + phdr_cnt += CONFIG_CRASH_MAX_MEMORY_RANGES; + + return pnum_hdr_sz(phdr_cnt); +} + +/** + * update_crash_elfcorehdr() - Recreate the elfcorehdr and replace it with old + * elfcorehdr in the kexec segment array. + * @image: the active struct kimage + */ +static void update_crash_elfcorehdr(struct kimage *image) +{ + void *elfbuf = NULL, *old_elfcorehdr; + unsigned long mem, memsz; + unsigned long elfsz = 0; + + /* + * Create the new elfcorehdr reflecting the changes to CPU and/or + * memory resources. + */ + if (crash_prepare_headers(true, &elfbuf, &elfsz, NULL)) { + pr_err("unable to create new elfcorehdr"); + goto out; + } + + /* + * Obtain address and size of the elfcorehdr segment, and + * check it against the new elfcorehdr buffer. + */ + mem = image->segment[image->elfcorehdr_index].mem; + memsz = image->segment[image->elfcorehdr_index].memsz; + if (elfsz > memsz) { + pr_err("update elfcorehdr elfsz %lu > memsz %lu", + elfsz, memsz); + goto out; + } + + /* + * Copy new elfcorehdr over the old elfcorehdr at destination. + */ + old_elfcorehdr = (void *)__va(mem); + if (!old_elfcorehdr) { + pr_err("mapping elfcorehdr segment failed\n"); + goto out; + } + + /* + * Temporarily invalidate the crash image while the + * elfcorehdr is updated. + */ + xchg(&kexec_crash_image, NULL); + memcpy((void *)old_elfcorehdr, elfbuf, elfsz); + dcache_clean_inval_poc((unsigned long)old_elfcorehdr, + (unsigned long)old_elfcorehdr + elfsz); + xchg(&kexec_crash_image, image); + pr_debug("updated elfcorehdr\n"); + +out: + vfree(elfbuf); +} + +/** + * arch_crash_handle_hotplug_event() - Handle hotplug elfcorehdr changes + * @image: a pointer to kexec_crash_image + * @arg: struct memory_notify handler for memory hotplug case and + * NULL for CPU hotplug case. + * + * Update the kdump image based on the type of hotplug event: + * - CPU add and remove: No action is needed. + * - Memory add/remove: Update the elfcorehdr to reflect the current memory layout. + * + * Prepare the new elfcorehdr and replace the existing elfcorehdr. + */ +void arch_crash_handle_hotplug_event(struct kimage *image, void *arg) +{ + if ((image->file_mode || image->elfcorehdr_updated) && + ((image->hp_action == KEXEC_CRASH_HP_ADD_CPU) || + (image->hp_action == KEXEC_CRASH_HP_REMOVE_CPU))) + return; + + update_crash_elfcorehdr(image); +} +#endif /* CONFIG_CRASH_HOTPLUG */ diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c index 93c36a3aa618ef..21f38de7a8b6ae 100644 --- a/arch/arm64/kernel/kexec_image.c +++ b/arch/arm64/kernel/kexec_image.c @@ -8,6 +8,7 @@ #define pr_fmt(fmt) "kexec_file(Image): " fmt +#include #include #include #include @@ -92,16 +93,32 @@ static void *image_load(struct kimage *image, #ifdef CONFIG_CRASH_DUMP if (image->type == KEXEC_TYPE_CRASH) { /* load elf core header */ - unsigned long headers_sz; + unsigned long headers_sz, pnum = 0; void *headers; - ret = crash_prepare_headers(true, &headers, &headers_sz, NULL); + ret = crash_prepare_headers(true, &headers, &headers_sz, &pnum); if (ret) { pr_err("Preparing elf core header failed\n"); return ERR_PTR(ret); } image->elf_headers = headers; image->elf_headers_sz = headers_sz; + +#ifdef CONFIG_CRASH_HOTPLUG + /* + * The elfcorehdr segment size accounts for VMCOREINFO, kernel_map + * maximum CPUs and maximum memory ranges. + */ + if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) + pnum = 2 + num_possible_cpus() + CONFIG_CRASH_MAX_MEMORY_RANGES; + else + pnum += 2 + num_possible_cpus(); + + if (pnum < (unsigned long)PN_XNUM) + image->elf_headers_sz = max(pnum_hdr_sz(pnum), headers_sz); + else + pr_err("number of Phdrs %lu exceeds max\n", pnum); +#endif } #endif diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index d0f73eb3f856f5..0016001f4d0051 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -10,11 +10,11 @@ #define pr_fmt(fmt) "kexec_file: " fmt +#include #include #include #include #include -#include #include #include #include @@ -39,38 +39,6 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image) return kexec_image_post_load_cleanup_default(image); } -#ifdef CONFIG_CRASH_DUMP -unsigned int arch_get_system_nr_ranges(void) -{ - /* for exclusion of crashkernel region */ - unsigned int nr_ranges = 2 + crashk_cma_cnt + CRASH_HOTPLUG_SAFETY_PADDING; - phys_addr_t start, end; - u64 i; - - for_each_mem_range(i, &start, &end) - nr_ranges++; - - return nr_ranges; -} - -int arch_crash_populate_cmem(struct crash_mem *cmem) -{ - phys_addr_t start, end; - u64 i; - - for_each_mem_range(i, &start, &end) { - if (unlikely(cmem->nr_ranges >= cmem->max_nr_ranges)) - return -EAGAIN; - - cmem->ranges[cmem->nr_ranges].start = start; - cmem->ranges[cmem->nr_ranges].end = end - 1; - cmem->nr_ranges++; - } - - return 0; -} -#endif - /* * Tries to add the initrd and DTB to the image. If it is not possible to find * valid locations, this function will undo changes to the image and return non @@ -98,6 +66,12 @@ int load_other_segments(struct kimage *image, kbuf.bufsz = image->elf_headers_sz; kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; kbuf.memsz = image->elf_headers_sz; + +#ifdef CONFIG_CRASH_HOTPLUG + if (image->elf_headers_sz < pnum_hdr_sz(PN_XNUM)) + image->elfcorehdr_index = image->nr_segments; +#endif + kbuf.buf_align = SZ_64K; /* largest supported page size */ kbuf.buf_max = ULONG_MAX; kbuf.top_down = true;