diff --git a/arch/loongarch/mm/pageattr.c b/arch/loongarch/mm/pageattr.c index 99165903908a4e..b701076605b358 100644 --- a/arch/loongarch/mm/pageattr.c +++ b/arch/loongarch/mm/pageattr.c @@ -118,7 +118,7 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask, pgp return 0; mmap_write_lock(&init_mm); - ret = walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL, &masks); + ret = walk_page_range_kernel(start, end, &pageattr_ops, NULL, &masks); mmap_write_unlock(&init_mm); flush_tlb_kernel_range(start, end); diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c index 3a7b5baaa45066..bc9ce5cc2e2649 100644 --- a/arch/openrisc/kernel/dma.c +++ b/arch/openrisc/kernel/dma.c @@ -72,7 +72,7 @@ void *arch_dma_set_uncached(void *cpu_addr, size_t size) * them and setting the cache-inhibit bit. */ mmap_write_lock(&init_mm); - error = walk_page_range_novma(&init_mm, va, va + size, + error = walk_page_range_kernel(va, va + size, &set_nocache_walk_ops, NULL, NULL); mmap_write_unlock(&init_mm); @@ -87,7 +87,7 @@ void arch_dma_clear_uncached(void *cpu_addr, size_t size) mmap_write_lock(&init_mm); /* walk_page_range shouldn't be able to fail here */ - WARN_ON(walk_page_range_novma(&init_mm, va, va + size, + WARN_ON(walk_page_range_kernel(va, va + size, &clear_nocache_walk_ops, NULL, NULL)); mmap_write_unlock(&init_mm); } diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c index d815448758a19c..80e0cd08a4e55f 100644 --- a/arch/riscv/mm/pageattr.c +++ b/arch/riscv/mm/pageattr.c @@ -299,7 +299,7 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask, if (ret) goto unlock; - ret = walk_page_range_novma(&init_mm, lm_start, lm_end, + ret = walk_page_range_kernel(lm_start, lm_end, &pageattr_ops, NULL, &masks); if (ret) goto unlock; @@ -317,13 +317,13 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask, if (ret) goto unlock; - ret = walk_page_range_novma(&init_mm, lm_start, lm_end, + ret = walk_page_range_kernel(lm_start, lm_end, &pageattr_ops, NULL, &masks); if (ret) goto unlock; } - ret = walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL, + ret = walk_page_range_kernel(start, end, &pageattr_ops, NULL, &masks); unlock: @@ -335,7 +335,7 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask, */ flush_tlb_all(); #else - ret = walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL, + ret = walk_page_range_kernel(start, end, &pageattr_ops, NULL, &masks); mmap_write_unlock(&init_mm); diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h index 9700a29f8afbc9..1c2dc3ae071916 100644 --- a/include/linux/pagewalk.h +++ b/include/linux/pagewalk.h @@ -129,6 +129,9 @@ struct mm_walk { int walk_page_range(struct mm_struct *mm, unsigned long start, unsigned long end, const struct mm_walk_ops *ops, void *private); +int walk_page_range_kernel(unsigned long start, + unsigned long end, const struct mm_walk_ops *ops, + pgd_t *pgd, void *private); int walk_page_range_novma(struct mm_struct *mm, unsigned long start, unsigned long end, const struct mm_walk_ops *ops, pgd_t *pgd, diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 27245e86df2500..724033d9d4a16b 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -166,7 +166,7 @@ static int vmemmap_remap_range(unsigned long start, unsigned long end, VM_BUG_ON(!PAGE_ALIGNED(start | end)); mmap_read_lock(&init_mm); - ret = walk_page_range_novma(&init_mm, start, end, &vmemmap_remap_ops, + ret = walk_page_range_kernel(start, end, &vmemmap_remap_ops, NULL, walk); mmap_read_unlock(&init_mm); if (ret) diff --git a/mm/pagewalk.c b/mm/pagewalk.c index e478777c86e196..d27347ffcd6348 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -584,9 +584,28 @@ int walk_page_range(struct mm_struct *mm, unsigned long start, return walk_page_range_mm(mm, start, end, ops, private); } +static int __walk_page_range_novma(struct mm_struct *mm, unsigned long start, + unsigned long end, const struct mm_walk_ops *ops, + pgd_t *pgd, void *private) +{ + struct mm_walk walk = { + .ops = ops, + .mm = mm, + .pgd = pgd, + .private = private, + .no_vma = true + }; + + if (start >= end || !walk.mm) + return -EINVAL; + if (!check_ops_valid(ops)) + return -EINVAL; + + return walk_pgd_range(start, end, &walk); +} + /** - * walk_page_range_novma - walk a range of pagetables not backed by a vma - * @mm: mm_struct representing the target process of page table walk + * walk_page_range_kernel - walk a range of kernel pagetables. * @start: start address of the virtual address range * @end: end address of the virtual address range * @ops: operation to call during the walk @@ -596,56 +615,69 @@ int walk_page_range(struct mm_struct *mm, unsigned long start, * Similar to walk_page_range() but can walk any page tables even if they are * not backed by VMAs. Because 'unusual' entries may be walked this function * will also not lock the PTEs for the pte_entry() callback. This is useful for - * walking the kernel pages tables or page tables for firmware. + * walking kernel pages tables or page tables for firmware. * * Note: Be careful to walk the kernel pages tables, the caller may be need to * take other effective approaches (mmap lock may be insufficient) to prevent * the intermediate kernel page tables belonging to the specified address range * from being freed (e.g. memory hot-remove). */ +int walk_page_range_kernel(unsigned long start, unsigned long end, + const struct mm_walk_ops *ops, pgd_t *pgd, void *private) +{ + struct mm_struct *mm = &init_mm; + + /* + * Kernel intermediate page tables are usually not freed, so the mmap + * read lock is sufficient. But there are some exceptions. + * E.g. memory hot-remove. In which case, the mmap lock is insufficient + * to prevent the intermediate kernel pages tables belonging to the + * specified address range from being freed. The caller should take + * other actions to prevent this race. + */ + mmap_assert_locked(mm); + + return __walk_page_range_novma(mm, start, end, ops, pgd, private); +} + +/** + * walk_page_range_novma - walk a range of pagetables not backed by a vma + * @mm: mm_struct representing the target process of page table walk + * @start: start address of the virtual address range + * @end: end address of the virtual address range + * @ops: operation to call during the walk + * @pgd: pgd to walk if different from mm->pgd + * @private: private data for callbacks' usage + * + * Similar to walk_page_range() but can walk any page tables even if they are + * not backed by VMAs. Because 'unusual' entries may be walked this function + * will also not lock the PTEs for the pte_entry() callback. + * + * This is for debugging purposes ONLY. + */ int walk_page_range_novma(struct mm_struct *mm, unsigned long start, unsigned long end, const struct mm_walk_ops *ops, pgd_t *pgd, void *private) { - struct mm_walk walk = { - .ops = ops, - .mm = mm, - .pgd = pgd, - .private = private, - .no_vma = true - }; - - if (start >= end || !walk.mm) - return -EINVAL; - if (!check_ops_valid(ops)) - return -EINVAL; + /* + * For convenience, we allow this function to also traverse kernel + * mappings. + */ + if (mm == &init_mm) + return walk_page_range_kernel(start, end, ops, pgd, private); /* - * 1) For walking the user virtual address space: - * * The mmap lock protects the page walker from changes to the page * tables during the walk. However a read lock is insufficient to * protect those areas which don't have a VMA as munmap() detaches * the VMAs before downgrading to a read lock and actually tearing * down PTEs/page tables. In which case, the mmap write lock should - * be hold. - * - * 2) For walking the kernel virtual address space: - * - * The kernel intermediate page tables usually do not be freed, so - * the mmap map read lock is sufficient. But there are some exceptions. - * E.g. memory hot-remove. In which case, the mmap lock is insufficient - * to prevent the intermediate kernel pages tables belonging to the - * specified address range from being freed. The caller should take - * other actions to prevent this race. + * be held. */ - if (mm == &init_mm) - mmap_assert_locked(walk.mm); - else - mmap_assert_write_locked(walk.mm); + mmap_assert_write_locked(mm); - return walk_pgd_range(start, end, &walk); + return __walk_page_range_novma(mm, start, end, ops, pgd, private); } int walk_page_range_vma(struct vm_area_struct *vma, unsigned long start,