diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 4d0f545fb3ec5a..3ff3ddd516cf45 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1119,14 +1119,14 @@ Kernel parameters It will be ignored when crashkernel=X,high is not used or memory reserved is below 4G. crashkernel=size[KMG],cma - [KNL, X86, ppc] Reserve additional crash kernel memory from - CMA. This reservation is usable by the first system's - userspace memory and kernel movable allocations (memory - balloon, zswap). Pages allocated from this memory range - will not be included in the vmcore so this should not - be used if dumping of userspace memory is intended and - it has to be expected that some movable kernel pages - may be missing from the dump. + [KNL, X86, ARM64, RISCV, PPC] Reserve additional crash + kernel memory from CMA. This reservation is usable by + the first system's userspace memory and kernel movable + allocations (memory balloon, zswap). Pages allocated + from this memory range will not be included in the vmcore + so this should not be used if dumping of userspace memory + is intended and it has to be expected that some movable + kernel pages may be missing from the dump. A standard crashkernel reservation, as described above, is still needed to hold the crash kernel and initrd. diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index fe60738e5943ba..9091c67e1cc289 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1609,6 +1609,9 @@ config ARCH_DEFAULT_CRASH_DUMP config ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION def_bool CRASH_RESERVE +config ARCH_SUPPORTS_CRASH_HOTPLUG + def_bool y + config TRANS_TABLE def_bool y depends on HIBERNATION || KEXEC_CORE diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index 892e5bebda957b..f338d162dec1a3 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -127,7 +127,19 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image); extern int load_other_segments(struct kimage *image, unsigned long kernel_load_addr, unsigned long kernel_size, char *initrd, unsigned long initrd_len, - char *cmdline); + char *cmdline, void *headers, unsigned long headers_size); +extern int prepare_elf_headers(void **addr, unsigned long *sz); +#endif + +#ifdef CONFIG_CRASH_HOTPLUG +void arch_crash_handle_hotplug_event(struct kimage *image, void *arg); +#define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event + +int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags); +#define arch_crash_hotplug_support arch_crash_hotplug_support + +unsigned int arch_crash_get_elfcorehdr_size(void); +#define crash_get_elfcorehdr_size arch_crash_get_elfcorehdr_size #endif #endif /* __ASSEMBLER__ */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 74b76bb7045231..629e962813abca 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -65,6 +65,7 @@ obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_image.o obj-$(CONFIG_ARM64_RELOC_TEST) += arm64-reloc-test.o arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +obj-$(CONFIG_CRASH_HOTPLUG) += crash.o obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o diff --git a/arch/arm64/kernel/crash.c b/arch/arm64/kernel/crash.c new file mode 100644 index 00000000000000..2114375820da9f --- /dev/null +++ b/arch/arm64/kernel/crash.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Architecture specific functions for kexec based crash dumps. + */ + +#define pr_fmt(fmt) "crash hp: " fmt + +#include +#include +#include +#include + +#include + +#ifdef CONFIG_CRASH_HOTPLUG + +int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags) +{ +#ifdef CONFIG_KEXEC_FILE + if (image->file_mode) + return 1; +#endif + /* + * For kexec_load syscall, crash hotplug support requires + * KEXEC_CRASH_HOTPLUG_SUPPORT flag to be passed by userspace. + */ + return kexec_flags & KEXEC_CRASH_HOTPLUG_SUPPORT; +} + +unsigned int arch_crash_get_elfcorehdr_size(void) +{ + unsigned int phdr_cnt; + + /* A program header for possible CPUs, vmcoreinfo and kernel_map */ + phdr_cnt = 2 + num_possible_cpus(); + if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) + phdr_cnt += CONFIG_CRASH_MAX_MEMORY_RANGES; + + return sizeof(Elf64_Ehdr) + phdr_cnt * sizeof(Elf64_Phdr); +} + +/** + * update_crash_elfcorehdr() - Recreate the elfcorehdr and replace it with old + * elfcorehdr in the kexec segment array. + * @image: the active struct kimage + */ +static void update_crash_elfcorehdr(struct kimage *image) +{ + void *elfbuf = NULL, *old_elfcorehdr; + unsigned long mem, memsz; + unsigned long elfsz = 0; + + /* + * Create the new elfcorehdr reflecting the changes to CPU and/or + * memory resources. + */ + if (crash_prepare_headers(true, &elfbuf, &elfsz, NULL)) { + pr_err("unable to create new elfcorehdr"); + goto out; + } + + /* + * Obtain address and size of the elfcorehdr segment, and + * check it against the new elfcorehdr buffer. + */ + mem = image->segment[image->elfcorehdr_index].mem; + memsz = image->segment[image->elfcorehdr_index].memsz; + if (elfsz > memsz) { + pr_err("update elfcorehdr elfsz %lu > memsz %lu", + elfsz, memsz); + goto out; + } + + /* + * Copy new elfcorehdr over the old elfcorehdr at destination. + */ + old_elfcorehdr = (void *)__va(mem); + if (!old_elfcorehdr) { + pr_err("mapping elfcorehdr segment failed\n"); + goto out; + } + + /* + * Temporarily invalidate the crash image while the + * elfcorehdr is updated. + */ + xchg(&kexec_crash_image, NULL); + memcpy_flushcache(old_elfcorehdr, elfbuf, elfsz); + xchg(&kexec_crash_image, image); + pr_debug("updated elfcorehdr\n"); + +out: + vfree(elfbuf); +} + +/** + * arch_crash_handle_hotplug_event() - Handle hotplug elfcorehdr changes + * @image: a pointer to kexec_crash_image + * @arg: struct memory_notify handler for memory hotplug case and + * NULL for CPU hotplug case. + * + * Update the kdump image based on the type of hotplug event: + * - CPU add and remove: No action is needed. + * - Memory add/remove: Update the elfcorehdr to reflect the current memory layout. + * + * Prepare the new elfcorehdr and replace the existing elfcorehdr. + */ +void arch_crash_handle_hotplug_event(struct kimage *image, void *arg) +{ + switch (image->hp_action) { + case KEXEC_CRASH_HP_ADD_CPU: + fallthrough; + case KEXEC_CRASH_HP_REMOVE_CPU: + if (image->file_mode || image->elfcorehdr_updated) + return; + fallthrough; + case KEXEC_CRASH_HP_ADD_MEMORY: + case KEXEC_CRASH_HP_REMOVE_MEMORY: + update_crash_elfcorehdr(image); + return; + default: + pr_warn_once("Unknown hotplug action\n"); + } +} +#endif /* CONFIG_CRASH_HOTPLUG */ diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c index b70f4df15a1ae5..770a4c7bf5737e 100644 --- a/arch/arm64/kernel/kexec_image.c +++ b/arch/arm64/kernel/kexec_image.c @@ -44,6 +44,11 @@ static void *image_load(struct kimage *image, struct kexec_buf kbuf = {}; unsigned long text_offset, kernel_segment_number; struct kexec_segment *kernel_segment; +#ifdef CONFIG_CRASH_DUMP + /* load elf core header */ + unsigned long headers_sz; + void *headers; +#endif int ret; /* @@ -89,6 +94,18 @@ static void *image_load(struct kimage *image, kernel_segment_number = image->nr_segments; +#ifdef CONFIG_CRASH_DUMP + if (image->type == KEXEC_TYPE_CRASH) { + ret = crash_prepare_headers_locked(true, &headers, &headers_sz, NULL); + if (ret) { + pr_err("Preparing elf core header failed\n"); + return ERR_PTR(ret); + } + image->elf_headers = headers; + image->elf_headers_sz = headers_sz; + } +#endif + /* * The location of the kernel segment may make it impossible to satisfy * the other segment requirements, so we try repeatedly to find a @@ -99,7 +116,8 @@ static void *image_load(struct kimage *image, kernel_segment = &image->segment[kernel_segment_number]; ret = load_other_segments(image, kernel_segment->mem, kernel_segment->memsz, initrd, - initrd_len, cmdline); + initrd_len, cmdline, + headers, headers_sz); if (!ret) break; @@ -107,7 +125,7 @@ static void *image_load(struct kimage *image, * We couldn't find space for the other segments; erase the * kernel segment and try the next available hole. */ - image->nr_segments -= 1; + image->nr_segments = kernel_segment_number; kbuf.buf_min = kernel_segment->mem + kernel_segment->memsz; kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; } diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index e31fabed378a59..781febd0f6db10 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -40,46 +40,33 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image) } #ifdef CONFIG_CRASH_DUMP -static int prepare_elf_headers(void **addr, unsigned long *sz) +unsigned int arch_get_system_nr_ranges(void) { - struct crash_mem *cmem; - unsigned int nr_ranges; - int ret; - u64 i; + unsigned int nr_ranges = 2 + crashk_cma_cnt; /* for exclusion of crashkernel region */ phys_addr_t start, end; + u64 i; - nr_ranges = 2; /* for exclusion of crashkernel region */ for_each_mem_range(i, &start, &end) nr_ranges++; - cmem = kmalloc_flex(*cmem, ranges, nr_ranges); - if (!cmem) - return -ENOMEM; + return nr_ranges; +} + +int arch_crash_populate_cmem(struct crash_mem *cmem) +{ + phys_addr_t start, end; + u64 i; - cmem->max_nr_ranges = nr_ranges; - cmem->nr_ranges = 0; for_each_mem_range(i, &start, &end) { + if (WARN_ON_ONCE(cmem->nr_ranges >= cmem->max_nr_ranges)) + return -EAGAIN; + cmem->ranges[cmem->nr_ranges].start = start; cmem->ranges[cmem->nr_ranges].end = end - 1; cmem->nr_ranges++; } - /* Exclude crashkernel region */ - ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); - if (ret) - goto out; - - if (crashk_low_res.end) { - ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); - if (ret) - goto out; - } - - ret = crash_prepare_elf64_headers(cmem, true, addr, sz); - -out: - kfree(cmem); - return ret; + return 0; } #endif @@ -92,12 +79,14 @@ int load_other_segments(struct kimage *image, unsigned long kernel_load_addr, unsigned long kernel_size, char *initrd, unsigned long initrd_len, - char *cmdline) + char *cmdline, void *headers, + unsigned long headers_sz) { - struct kexec_buf kbuf = {}; - void *dtb = NULL; unsigned long initrd_load_addr = 0, dtb_len, orig_segments = image->nr_segments; + struct kexec_buf kbuf = {}; + unsigned long pnum = 0; + void *dtb = NULL; int ret = 0; kbuf.image = image; @@ -105,20 +94,28 @@ int load_other_segments(struct kimage *image, kbuf.buf_min = kernel_load_addr + kernel_size; #ifdef CONFIG_CRASH_DUMP - /* load elf core header */ - void *headers; - unsigned long headers_sz; if (image->type == KEXEC_TYPE_CRASH) { - ret = prepare_elf_headers(&headers, &headers_sz); - if (ret) { - pr_err("Preparing elf core header failed\n"); - goto out_err; - } - kbuf.buffer = headers; kbuf.bufsz = headers_sz; kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; kbuf.memsz = headers_sz; + +#ifdef CONFIG_CRASH_HOTPLUG + /* + * The elfcorehdr segment size accounts for VMCOREINFO, kernel_map + * maximum CPUs and maximum memory ranges. + */ + if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) + pnum = 2 + num_possible_cpus() + CONFIG_CRASH_MAX_MEMORY_RANGES; + else + pnum += 2 + num_possible_cpus(); + + if (pnum < (unsigned long)PN_XNUM) + kbuf.memsz = pnum * sizeof(Elf64_Phdr) + sizeof(Elf64_Ehdr); + else + pr_err("number of Phdrs %lu exceeds max\n", pnum); +#endif + kbuf.buf_align = SZ_64K; /* largest supported page size */ kbuf.buf_max = ULONG_MAX; kbuf.top_down = true; @@ -128,9 +125,8 @@ int load_other_segments(struct kimage *image, vfree(headers); goto out_err; } - image->elf_headers = headers; image->elf_load_addr = kbuf.mem; - image->elf_headers_sz = headers_sz; + image->elf_headers_sz = kbuf.memsz; kexec_dprintk("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n", image->elf_load_addr, kbuf.bufsz, kbuf.memsz); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 97987f850a33c3..227f58522dad29 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -96,8 +96,8 @@ phys_addr_t __ro_after_init arm64_dma_phys_limit; static void __init arch_reserve_crashkernel(void) { + unsigned long long crash_base, crash_size, cma_size = 0; unsigned long long low_size = 0; - unsigned long long crash_base, crash_size; bool high = false; int ret; @@ -106,11 +106,12 @@ static void __init arch_reserve_crashkernel(void) ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base, - &low_size, NULL, &high); + &low_size, &cma_size, &high); if (ret) return; reserve_crashkernel_generic(crash_size, crash_base, low_size, high); + reserve_crashkernel_cma(cma_size); } static phys_addr_t __init max_zone_phys(phys_addr_t zone_limit) diff --git a/arch/loongarch/kernel/machine_kexec_file.c b/arch/loongarch/kernel/machine_kexec_file.c index 5584b798ba4645..a845fbe8883d18 100644 --- a/arch/loongarch/kernel/machine_kexec_file.c +++ b/arch/loongarch/kernel/machine_kexec_file.c @@ -56,46 +56,33 @@ static void cmdline_add_initrd(struct kimage *image, unsigned long *cmdline_tmpl } #ifdef CONFIG_CRASH_DUMP - -static int prepare_elf_headers(void **addr, unsigned long *sz) +unsigned int arch_get_system_nr_ranges(void) { - int ret, nr_ranges; - uint64_t i; + int nr_ranges = 2; /* for exclusion of crashkernel region */ phys_addr_t start, end; - struct crash_mem *cmem; + uint64_t i; - nr_ranges = 2; /* for exclusion of crashkernel region */ for_each_mem_range(i, &start, &end) nr_ranges++; - cmem = kmalloc_flex(*cmem, ranges, nr_ranges); - if (!cmem) - return -ENOMEM; + return nr_ranges; +} + +int arch_crash_populate_cmem(struct crash_mem *cmem) +{ + phys_addr_t start, end; + uint64_t i; - cmem->max_nr_ranges = nr_ranges; - cmem->nr_ranges = 0; for_each_mem_range(i, &start, &end) { + if (WARN_ON_ONCE(cmem->nr_ranges >= cmem->max_nr_ranges)) + return -EAGAIN; + cmem->ranges[cmem->nr_ranges].start = start; cmem->ranges[cmem->nr_ranges].end = end - 1; cmem->nr_ranges++; } - /* Exclude crashkernel region */ - ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); - if (ret < 0) - goto out; - - if (crashk_low_res.end) { - ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); - if (ret < 0) - goto out; - } - - ret = crash_prepare_elf64_headers(cmem, true, addr, sz); - -out: - kfree(cmem); - return ret; + return 0; } /* @@ -163,7 +150,7 @@ int load_other_segments(struct kimage *image, void *headers; unsigned long headers_sz; - ret = prepare_elf_headers(&headers, &headers_sz); + ret = crash_prepare_headers_locked(true, &headers, &headers_sz, NULL); if (ret < 0) { pr_err("Preparing elf core header failed\n"); goto out_err; diff --git a/arch/powerpc/include/asm/kexec_ranges.h b/arch/powerpc/include/asm/kexec_ranges.h index 14055896cbcbcd..8489e844b44759 100644 --- a/arch/powerpc/include/asm/kexec_ranges.h +++ b/arch/powerpc/include/asm/kexec_ranges.h @@ -7,7 +7,6 @@ void sort_memory_ranges(struct crash_mem *mrngs, bool merge); struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges); int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size); -int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size); int get_exclude_memory_ranges(struct crash_mem **mem_ranges); int get_reserved_memory_ranges(struct crash_mem **mem_ranges); int get_crash_memory_ranges(struct crash_mem **mem_ranges); diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c index e6539f213b3d14..775895f3103793 100644 --- a/arch/powerpc/kexec/crash.c +++ b/arch/powerpc/kexec/crash.c @@ -493,7 +493,7 @@ static void update_crash_elfcorehdr(struct kimage *image, struct memory_notify * struct crash_mem *cmem = NULL; struct kexec_segment *ksegment; void *ptr, *mem, *elfbuf = NULL; - unsigned long elfsz, memsz, base_addr, size; + unsigned long elfsz, memsz, base_addr, size, end; ksegment = &image->segment[image->elfcorehdr_index]; mem = (void *) ksegment->mem; @@ -502,7 +502,7 @@ static void update_crash_elfcorehdr(struct kimage *image, struct memory_notify * ret = get_crash_memory_ranges(&cmem); if (ret) { pr_err("Failed to get crash mem range\n"); - return; + goto out; } /* @@ -512,7 +512,8 @@ static void update_crash_elfcorehdr(struct kimage *image, struct memory_notify * if (image->hp_action == KEXEC_CRASH_HP_REMOVE_MEMORY) { base_addr = PFN_PHYS(mn->start_pfn); size = mn->nr_pages * PAGE_SIZE; - ret = remove_mem_range(&cmem, base_addr, size); + end = base_addr + size - 1; + ret = arch_crash_exclude_mem_range(&cmem, base_addr, end); if (ret) { pr_err("Failed to remove hot-unplugged memory from crash memory ranges\n"); goto out; diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c index 867135560e5c8b..e5fea23b191b6d 100644 --- a/arch/powerpc/kexec/ranges.c +++ b/arch/powerpc/kexec/ranges.c @@ -553,9 +553,9 @@ int get_usable_memory_ranges(struct crash_mem **mem_ranges) #endif /* CONFIG_KEXEC_FILE */ #ifdef CONFIG_CRASH_DUMP -static int crash_exclude_mem_range_guarded(struct crash_mem **mem_ranges, - unsigned long long mstart, - unsigned long long mend) +int arch_crash_exclude_mem_range(struct crash_mem **mem_ranges, + unsigned long long mstart, + unsigned long long mend) { struct crash_mem *tmem = *mem_ranges; @@ -604,18 +604,10 @@ int get_crash_memory_ranges(struct crash_mem **mem_ranges) sort_memory_ranges(*mem_ranges, true); } - /* Exclude crashkernel region */ - ret = crash_exclude_mem_range_guarded(mem_ranges, crashk_res.start, crashk_res.end); + ret = crash_exclude_core_ranges(mem_ranges); if (ret) goto out; - for (i = 0; i < crashk_cma_cnt; ++i) { - ret = crash_exclude_mem_range_guarded(mem_ranges, crashk_cma_ranges[i].start, - crashk_cma_ranges[i].end); - if (ret) - goto out; - } - /* * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL * regions are exported to save their context at the time of @@ -641,89 +633,4 @@ int get_crash_memory_ranges(struct crash_mem **mem_ranges) pr_err("Failed to setup crash memory ranges\n"); return ret; } - -/** - * remove_mem_range - Removes the given memory range from the range list. - * @mem_ranges: Range list to remove the memory range to. - * @base: Base address of the range to remove. - * @size: Size of the memory range to remove. - * - * (Re)allocates memory, if needed. - * - * Returns 0 on success, negative errno on error. - */ -int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size) -{ - u64 end; - int ret = 0; - unsigned int i; - u64 mstart, mend; - struct crash_mem *mem_rngs = *mem_ranges; - - if (!size) - return 0; - - /* - * Memory range are stored as start and end address, use - * the same format to do remove operation. - */ - end = base + size - 1; - - for (i = 0; i < mem_rngs->nr_ranges; i++) { - mstart = mem_rngs->ranges[i].start; - mend = mem_rngs->ranges[i].end; - - /* - * Memory range to remove is not part of this range entry - * in the memory range list - */ - if (!(base >= mstart && end <= mend)) - continue; - - /* - * Memory range to remove is equivalent to this entry in the - * memory range list. Remove the range entry from the list. - */ - if (base == mstart && end == mend) { - for (; i < mem_rngs->nr_ranges - 1; i++) { - mem_rngs->ranges[i].start = mem_rngs->ranges[i+1].start; - mem_rngs->ranges[i].end = mem_rngs->ranges[i+1].end; - } - mem_rngs->nr_ranges--; - goto out; - } - /* - * Start address of the memory range to remove and the - * current memory range entry in the list is same. Just - * move the start address of the current memory range - * entry in the list to end + 1. - */ - else if (base == mstart) { - mem_rngs->ranges[i].start = end + 1; - goto out; - } - /* - * End address of the memory range to remove and the - * current memory range entry in the list is same. - * Just move the end address of the current memory - * range entry in the list to base - 1. - */ - else if (end == mend) { - mem_rngs->ranges[i].end = base - 1; - goto out; - } - /* - * Memory range to remove is not at the edge of current - * memory range entry. Split the current memory entry into - * two half. - */ - else { - size = mem_rngs->ranges[i].end - end + 1; - mem_rngs->ranges[i].end = base - 1; - ret = add_mem_range(mem_ranges, end + 1, size); - } - } -out: - return ret; -} #endif /* CONFIG_CRASH_DUMP */ diff --git a/arch/riscv/kernel/machine_kexec_file.c b/arch/riscv/kernel/machine_kexec_file.c index 59d4bbc848a896..dfaa1f6b953114 100644 --- a/arch/riscv/kernel/machine_kexec_file.c +++ b/arch/riscv/kernel/machine_kexec_file.c @@ -45,10 +45,22 @@ static int get_nr_ram_ranges_callback(struct resource *res, void *arg) return 0; } +unsigned int arch_get_system_nr_ranges(void) +{ + unsigned int nr_ranges = 2 + crashk_cma_cnt; /* For exclusion of crashkernel region */ + + walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); + + return nr_ranges; +} + static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) { struct crash_mem *cmem = arg; + if (WARN_ON_ONCE(cmem->nr_ranges >= cmem->max_nr_ranges)) + return -EAGAIN; + cmem->ranges[cmem->nr_ranges].start = res->start; cmem->ranges[cmem->nr_ranges].end = res->end; cmem->nr_ranges++; @@ -56,33 +68,9 @@ static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) return 0; } -static int prepare_elf_headers(void **addr, unsigned long *sz) +int arch_crash_populate_cmem(struct crash_mem *cmem) { - struct crash_mem *cmem; - unsigned int nr_ranges; - int ret; - - nr_ranges = 1; /* For exclusion of crashkernel region */ - walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); - - cmem = kmalloc_flex(*cmem, ranges, nr_ranges); - if (!cmem) - return -ENOMEM; - - cmem->max_nr_ranges = nr_ranges; - cmem->nr_ranges = 0; - ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback); - if (ret) - goto out; - - /* Exclude crashkernel region */ - ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); - if (!ret) - ret = crash_prepare_elf64_headers(cmem, true, addr, sz); - -out: - kfree(cmem); - return ret; + return walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback); } static char *setup_kdump_cmdline(struct kimage *image, char *cmdline, @@ -274,7 +262,7 @@ int load_extra_segments(struct kimage *image, unsigned long kernel_start, if (image->type == KEXEC_TYPE_CRASH) { void *headers; unsigned long headers_sz; - ret = prepare_elf_headers(&headers, &headers_sz); + ret = crash_prepare_headers_locked(true, &headers, &headers_sz, NULL); if (ret) { pr_err("Preparing elf core header failed\n"); goto out; diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index fa8d2f6f554b57..9dd0ffe85d6aaf 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -1320,7 +1320,7 @@ static inline void setup_vm_final(void) */ static void __init arch_reserve_crashkernel(void) { - unsigned long long low_size = 0; + unsigned long long low_size = 0, cma_size = 0; unsigned long long crash_base, crash_size; bool high = false; int ret; @@ -1330,11 +1330,12 @@ static void __init arch_reserve_crashkernel(void) ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base, - &low_size, NULL, &high); + &low_size, &cma_size, &high); if (ret) return; reserve_crashkernel_generic(crash_size, crash_base, low_size, high); + reserve_crashkernel_cma(cma_size); } void __init paging_init(void) diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index cd796818d94d9e..26f140dff3481f 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -153,16 +153,8 @@ static int get_nr_ram_ranges_callback(struct resource *res, void *arg) return 0; } -/* Gather all the required information to prepare elf headers for ram regions */ -static struct crash_mem *fill_up_crash_elf_data(void) +unsigned int arch_get_system_nr_ranges(void) { - unsigned int nr_ranges = 0; - struct crash_mem *cmem; - - walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); - if (!nr_ranges) - return NULL; - /* * Exclusion of crash region, crashk_low_res and/or crashk_cma_ranges * may cause range splits. So add extra slots here. @@ -177,55 +169,25 @@ static struct crash_mem *fill_up_crash_elf_data(void) * But in order to lest the low 1M could be changed in the future, * (e.g. [start, 1M]), add a extra slot. */ - nr_ranges += 3 + crashk_cma_cnt; - cmem = vzalloc(struct_size(cmem, ranges, nr_ranges)); - if (!cmem) - return NULL; - - cmem->max_nr_ranges = nr_ranges; + unsigned int nr_ranges = 3 + crashk_cma_cnt; - return cmem; + walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); + return nr_ranges; } -/* - * Look for any unwanted ranges between mstart, mend and remove them. This - * might lead to split and split ranges are put in cmem->ranges[] array - */ -static int elf_header_exclude_ranges(struct crash_mem *cmem) +int arch_crash_exclude_ranges(struct crash_mem *cmem) { - int ret = 0; - int i; - /* Exclude the low 1M because it is always reserved */ - ret = crash_exclude_mem_range(cmem, 0, SZ_1M - 1); - if (ret) - return ret; - - /* Exclude crashkernel region */ - ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); - if (ret) - return ret; - - if (crashk_low_res.end) - ret = crash_exclude_mem_range(cmem, crashk_low_res.start, - crashk_low_res.end); - if (ret) - return ret; - - for (i = 0; i < crashk_cma_cnt; ++i) { - ret = crash_exclude_mem_range(cmem, crashk_cma_ranges[i].start, - crashk_cma_ranges[i].end); - if (ret) - return ret; - } - - return 0; + return crash_exclude_mem_range(cmem, 0, SZ_1M - 1); } static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) { struct crash_mem *cmem = arg; + if (WARN_ON_ONCE(cmem->nr_ranges >= cmem->max_nr_ranges)) + return -EAGAIN; + cmem->ranges[cmem->nr_ranges].start = res->start; cmem->ranges[cmem->nr_ranges].end = res->end; cmem->nr_ranges++; @@ -233,35 +195,9 @@ static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) return 0; } -/* Prepare elf headers. Return addr and size */ -static int prepare_elf_headers(void **addr, unsigned long *sz, - unsigned long *nr_mem_ranges) +int arch_crash_populate_cmem(struct crash_mem *cmem) { - struct crash_mem *cmem; - int ret; - - cmem = fill_up_crash_elf_data(); - if (!cmem) - return -ENOMEM; - - ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback); - if (ret) - goto out; - - /* Exclude unwanted mem ranges */ - ret = elf_header_exclude_ranges(cmem); - if (ret) - goto out; - - /* Return the computed number of memory ranges, for hotplug usage */ - *nr_mem_ranges = cmem->nr_ranges; - - /* By default prepare 64bit headers */ - ret = crash_prepare_elf64_headers(cmem, IS_ENABLED(CONFIG_X86_64), addr, sz); - -out: - vfree(cmem); - return ret; + return walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback); } #endif @@ -419,7 +355,8 @@ int crash_load_segments(struct kimage *image) .buf_max = ULONG_MAX, .top_down = false }; /* Prepare elf headers and add a segment */ - ret = prepare_elf_headers(&kbuf.buffer, &kbuf.bufsz, &pnum); + ret = crash_prepare_headers_locked(IS_ENABLED(CONFIG_X86_64), &kbuf.buffer, + &kbuf.bufsz, &pnum); if (ret) return ret; @@ -512,7 +449,6 @@ unsigned int arch_crash_get_elfcorehdr_size(void) void arch_crash_handle_hotplug_event(struct kimage *image, void *arg) { void *elfbuf = NULL, *old_elfcorehdr; - unsigned long nr_mem_ranges; unsigned long mem, memsz; unsigned long elfsz = 0; @@ -530,7 +466,7 @@ void arch_crash_handle_hotplug_event(struct kimage *image, void *arg) * Create the new elfcorehdr reflecting the changes to CPU and/or * memory resources. */ - if (prepare_elf_headers(&elfbuf, &elfsz, &nr_mem_ranges)) { + if (crash_prepare_headers(IS_ENABLED(CONFIG_X86_64), &elfbuf, &elfsz, NULL)) { pr_err("unable to create new elfcorehdr"); goto out; } diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 82f7327c59ea90..0470acbd1fcf0f 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -880,11 +880,12 @@ static unsigned long chosen_node_offset = -FDT_ERR_NOTFOUND; /* * The main usage of linux,usable-memory-range is for crash dump kernel. * Originally, the number of usable-memory regions is one. Now there may - * be two regions, low region and high region. - * To make compatibility with existing user-space and older kdump, the low - * region is always the last range of linux,usable-memory-range if exist. + * be 2 + CRASHK_CMA_RANGES_MAX regions, low region, high region and cma + * regions. To make compatibility with existing user-space and older kdump, + * the high and low region are always the first two ranges of + * linux,usable-memory-range if exist. */ -#define MAX_USABLE_RANGES 2 +#define MAX_USABLE_RANGES (2 + CRASHK_CMA_RANGES_MAX) /** * early_init_dt_check_for_usable_mem_range - Decode usable memory range diff --git a/drivers/of/kexec.c b/drivers/of/kexec.c index b6837e299e7fe5..029903b986cbd3 100644 --- a/drivers/of/kexec.c +++ b/drivers/of/kexec.c @@ -458,6 +458,15 @@ void *of_kexec_alloc_and_setup_fdt(const struct kimage *image, if (ret) goto out; } + + for (int i = 0; i < crashk_cma_cnt; i++) { + ret = fdt_appendprop_addrrange(fdt, 0, chosen_node, + "linux,usable-memory-range", + crashk_cma_ranges[i].start, + crashk_cma_ranges[i].end - crashk_cma_ranges[i].start + 1); + if (ret) + goto out; + } #endif } diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index c1dee3f971a918..0f4ea7ffd06609 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -59,6 +59,11 @@ extern int crash_exclude_mem_range(struct crash_mem *mem, unsigned long long mend); extern int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, void **addr, unsigned long *sz); +extern int crash_prepare_headers(int need_kernel_map, void **addr, + unsigned long *sz, unsigned long *nr_mem_ranges); +extern int crash_prepare_headers_locked(int need_kernel_map, void **addr, + unsigned long *sz, unsigned long *nr_mem_ranges); +extern int crash_exclude_core_ranges(struct crash_mem **cmem); struct kimage; struct kexec_segment; @@ -76,6 +81,12 @@ int kexec_should_crash(struct task_struct *p); int kexec_crash_loaded(void); void crash_save_cpu(struct pt_regs *regs, int cpu); extern int kimage_crash_copy_vmcoreinfo(struct kimage *image); +extern unsigned int arch_get_system_nr_ranges(void); +extern int arch_crash_populate_cmem(struct crash_mem *cmem); +extern int arch_crash_exclude_ranges(struct crash_mem *cmem); +extern int arch_crash_exclude_mem_range(struct crash_mem **mem, + unsigned long long mstart, + unsigned long long mend); #else /* !CONFIG_CRASH_DUMP*/ struct pt_regs; diff --git a/include/linux/crash_reserve.h b/include/linux/crash_reserve.h index f0dc03d94ca2cd..30864d90d7f509 100644 --- a/include/linux/crash_reserve.h +++ b/include/linux/crash_reserve.h @@ -14,9 +14,11 @@ extern struct resource crashk_res; extern struct resource crashk_low_res; extern struct range crashk_cma_ranges[]; + +#define CRASHK_CMA_RANGES_MAX 4 #if defined(CONFIG_CMA) && defined(CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION) #define CRASHKERNEL_CMA -#define CRASHKERNEL_CMA_RANGES_MAX 4 +#define CRASHKERNEL_CMA_RANGES_MAX (CRASHK_CMA_RANGES_MAX) extern int crashk_cma_cnt; #else #define crashk_cma_cnt 0 diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 4f21fc3b108b83..33e945da677607 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -168,9 +169,6 @@ static inline resource_size_t crash_resource_size(const struct resource *res) return !res->end ? 0 : resource_size(res); } - - - int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, void **addr, unsigned long *sz) { @@ -272,6 +270,104 @@ int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, return 0; } +static struct crash_mem *alloc_cmem(unsigned int nr_ranges) +{ + struct crash_mem *cmem; + + cmem = kvzalloc_flex(*cmem, ranges, nr_ranges); + if (!cmem) + return NULL; + + cmem->max_nr_ranges = nr_ranges; + return cmem; +} + +unsigned int __weak arch_get_system_nr_ranges(void) { return 0; } +int __weak arch_crash_populate_cmem(struct crash_mem *cmem) { return -1; } +int __weak arch_crash_exclude_ranges(struct crash_mem *cmem) { return 0; } + +int __weak arch_crash_exclude_mem_range(struct crash_mem **mem, + unsigned long long mstart, + unsigned long long mend) +{ + return crash_exclude_mem_range(*mem, mstart, mend); +} + +int crash_exclude_core_ranges(struct crash_mem **cmem) +{ + int ret, i; + + /* Exclude crashkernel region */ + ret = arch_crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); + if (ret) + return ret; + + if (crashk_low_res.end) { + ret = arch_crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); + if (ret) + return ret; + } + + for (i = 0; i < crashk_cma_cnt; ++i) { + ret = arch_crash_exclude_mem_range(cmem, crashk_cma_ranges[i].start, + crashk_cma_ranges[i].end); + if (ret) + return ret; + } + + return 0; +} + +int crash_prepare_headers(int need_kernel_map, void **addr, unsigned long *sz, + unsigned long *nr_mem_ranges) +{ + unsigned int max_nr_ranges; + struct crash_mem *cmem; + int ret; + + max_nr_ranges = arch_get_system_nr_ranges(); + if (!max_nr_ranges) + return -ENOMEM; + + cmem = alloc_cmem(max_nr_ranges); + if (!cmem) + return -ENOMEM; + + ret = arch_crash_populate_cmem(cmem); + if (ret) + goto out; + + ret = crash_exclude_core_ranges(&cmem); + if (ret) + goto out; + + ret = arch_crash_exclude_ranges(cmem); + if (ret) + goto out; + + /* Return the computed number of memory ranges, for hotplug usage */ + if (nr_mem_ranges) + *nr_mem_ranges = cmem->nr_ranges; + + ret = crash_prepare_elf64_headers(cmem, need_kernel_map, addr, sz); + +out: + kvfree(cmem); + return ret; +} + +int crash_prepare_headers_locked(int need_kernel_map, void **addr, unsigned long *sz, + unsigned long *nr_mem_ranges) +{ + int ret; + + get_online_mems(); + ret = crash_prepare_headers(need_kernel_map, addr, sz, nr_mem_ranges); + put_online_mems(); + + return ret; +} + /** * crash_exclude_mem_range - exclude a mem range for existing ranges * @mem: mem->range contains an array of ranges sorted in ascending order