diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 032516783e9622..9d7f6958fa816e 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -8902,6 +8902,33 @@ helpful if user space wants to emulate instructions which are not This capability can be enabled dynamically even if VCPUs were already created and are running. +7.47 KVM_CAP_RISCV_SET_HGATP_MODE +--------------------------------- + +:Architectures: riscv +:Type: VM +:Parameters: args[0] contains the requested HGATP mode +:Returns: + - 0 on success. + - -EINVAL if args[0] is outside the range of HGATP modes supported by the + hardware. + - -EBUSY if vCPUs have already been created for the VM, if the VM has any + non-empty memslots. + +This capability allows userspace to explicitly select the HGATP mode for +the VM. The selected mode must be supported by both KVM and hardware. This +capability must be enabled before creating any vCPUs or memslots. + +If this capability is not enabled, KVM will select the default HGATP mode +automatically. The default is the highest HGATP.MODE value supported by +hardware. + +``KVM_CHECK_EXTENSION(KVM_CAP_RISCV_SET_HGATP_MODE)`` returns a bitmask of +HGATP.MODE values supported by the host. A return value of 0 indicates that +the capability is not supported. Supported-mode bitmask use HGATP.MODE +encodings as defined by the RISC-V privileged specification, such as Sv39x4 +corresponds to HGATP.MODE=8, so userspace should test bitmask & BIT(8). + 8. Other capabilities. ====================== diff --git a/arch/riscv/include/asm/kvm_gstage.h b/arch/riscv/include/asm/kvm_gstage.h index 595e2183173ebd..bbf8f45c6563be 100644 --- a/arch/riscv/include/asm/kvm_gstage.h +++ b/arch/riscv/include/asm/kvm_gstage.h @@ -15,6 +15,7 @@ struct kvm_gstage { #define KVM_GSTAGE_FLAGS_LOCAL BIT(0) unsigned long vmid; pgd_t *pgd; + unsigned long pgd_levels; }; struct kvm_gstage_mapping { @@ -29,16 +30,23 @@ struct kvm_gstage_mapping { #define kvm_riscv_gstage_index_bits 10 #endif -extern unsigned long kvm_riscv_gstage_mode; -extern unsigned long kvm_riscv_gstage_pgd_levels; +extern unsigned long kvm_riscv_gstage_max_pgd_levels; +extern u32 kvm_riscv_gstage_supported_mode_mask; #define kvm_riscv_gstage_pgd_xbits 2 #define kvm_riscv_gstage_pgd_size (1UL << (HGATP_PAGE_SHIFT + kvm_riscv_gstage_pgd_xbits)) -#define kvm_riscv_gstage_gpa_bits (HGATP_PAGE_SHIFT + \ - (kvm_riscv_gstage_pgd_levels * \ - kvm_riscv_gstage_index_bits) + \ - kvm_riscv_gstage_pgd_xbits) -#define kvm_riscv_gstage_gpa_size ((gpa_t)(1ULL << kvm_riscv_gstage_gpa_bits)) + +static inline unsigned long kvm_riscv_gstage_gpa_bits(unsigned long pgd_levels) +{ + return (HGATP_PAGE_SHIFT + + pgd_levels * kvm_riscv_gstage_index_bits + + kvm_riscv_gstage_pgd_xbits); +} + +static inline gpa_t kvm_riscv_gstage_gpa_size(unsigned long pgd_levels) +{ + return BIT_ULL(kvm_riscv_gstage_gpa_bits(pgd_levels)); +} bool kvm_riscv_gstage_get_leaf(struct kvm_gstage *gstage, gpa_t addr, pte_t **ptepp, u32 *ptep_level); @@ -69,4 +77,40 @@ void kvm_riscv_gstage_wp_range(struct kvm_gstage *gstage, gpa_t start, gpa_t end void kvm_riscv_gstage_mode_detect(void); +static inline unsigned long kvm_riscv_gstage_mode(unsigned long pgd_levels) +{ + switch (pgd_levels) { + case 2: + return HGATP_MODE_SV32X4; + case 3: + return HGATP_MODE_SV39X4; + case 4: + return HGATP_MODE_SV48X4; + case 5: + return HGATP_MODE_SV57X4; + default: + WARN_ON_ONCE(1); + return HGATP_MODE_OFF; + } +} + +static inline void kvm_riscv_gstage_init(struct kvm_gstage *gstage, struct kvm *kvm) +{ + gstage->kvm = kvm; + gstage->flags = 0; + gstage->vmid = READ_ONCE(kvm->arch.vmid.vmid); + gstage->pgd = kvm->arch.pgd; + gstage->pgd_levels = kvm->arch.pgd_levels; +} + +static inline u32 kvm_riscv_get_hgatp_mode_mask(void) +{ + return kvm_riscv_gstage_supported_mode_mask; +} + +static inline bool kvm_riscv_hgatp_mode_is_valid(unsigned long mode) +{ + return kvm_riscv_gstage_supported_mode_mask & BIT(mode); +} + #endif diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index 24585304c02b14..478f699e9decbd 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -94,6 +94,7 @@ struct kvm_arch { /* G-stage page table */ pgd_t *pgd; phys_addr_t pgd_phys; + unsigned long pgd_levels; /* Guest Timer */ struct kvm_guest_timer timer; diff --git a/arch/riscv/kvm/gstage.c b/arch/riscv/kvm/gstage.c index b67d60d722c2fb..9204e6427d2d3e 100644 --- a/arch/riscv/kvm/gstage.c +++ b/arch/riscv/kvm/gstage.c @@ -12,22 +12,23 @@ #include #ifdef CONFIG_64BIT -unsigned long kvm_riscv_gstage_mode __ro_after_init = HGATP_MODE_SV39X4; -unsigned long kvm_riscv_gstage_pgd_levels __ro_after_init = 3; +unsigned long kvm_riscv_gstage_max_pgd_levels __ro_after_init = 3; #else -unsigned long kvm_riscv_gstage_mode __ro_after_init = HGATP_MODE_SV32X4; -unsigned long kvm_riscv_gstage_pgd_levels __ro_after_init = 2; +unsigned long kvm_riscv_gstage_max_pgd_levels __ro_after_init = 2; #endif +/* Bitmask of supported HGATP.MODE encodings (BIT(HGATP_MODE_*)). */ +u32 kvm_riscv_gstage_supported_mode_mask __ro_after_init; #define gstage_pte_leaf(__ptep) \ (pte_val(*(__ptep)) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)) -static inline unsigned long gstage_pte_index(gpa_t addr, u32 level) +static inline unsigned long gstage_pte_index(struct kvm_gstage *gstage, + gpa_t addr, u32 level) { unsigned long mask; unsigned long shift = HGATP_PAGE_SHIFT + (kvm_riscv_gstage_index_bits * level); - if (level == (kvm_riscv_gstage_pgd_levels - 1)) + if (level == gstage->pgd_levels - 1) mask = (PTRS_PER_PTE * (1UL << kvm_riscv_gstage_pgd_xbits)) - 1; else mask = PTRS_PER_PTE - 1; @@ -40,12 +41,13 @@ static inline unsigned long gstage_pte_page_vaddr(pte_t pte) return (unsigned long)pfn_to_virt(__page_val_to_pfn(pte_val(pte))); } -static int gstage_page_size_to_level(unsigned long page_size, u32 *out_level) +static int gstage_page_size_to_level(struct kvm_gstage *gstage, unsigned long page_size, + u32 *out_level) { u32 i; unsigned long psz = 1UL << 12; - for (i = 0; i < kvm_riscv_gstage_pgd_levels; i++) { + for (i = 0; i < gstage->pgd_levels; i++) { if (page_size == (psz << (i * kvm_riscv_gstage_index_bits))) { *out_level = i; return 0; @@ -55,21 +57,23 @@ static int gstage_page_size_to_level(unsigned long page_size, u32 *out_level) return -EINVAL; } -static int gstage_level_to_page_order(u32 level, unsigned long *out_pgorder) +static int gstage_level_to_page_order(struct kvm_gstage *gstage, u32 level, + unsigned long *out_pgorder) { - if (kvm_riscv_gstage_pgd_levels < level) + if (gstage->pgd_levels < level) return -EINVAL; *out_pgorder = 12 + (level * kvm_riscv_gstage_index_bits); return 0; } -static int gstage_level_to_page_size(u32 level, unsigned long *out_pgsize) +static int gstage_level_to_page_size(struct kvm_gstage *gstage, u32 level, + unsigned long *out_pgsize) { int rc; unsigned long page_order = PAGE_SHIFT; - rc = gstage_level_to_page_order(level, &page_order); + rc = gstage_level_to_page_order(gstage, level, &page_order); if (rc) return rc; @@ -81,11 +85,11 @@ bool kvm_riscv_gstage_get_leaf(struct kvm_gstage *gstage, gpa_t addr, pte_t **ptepp, u32 *ptep_level) { pte_t *ptep; - u32 current_level = kvm_riscv_gstage_pgd_levels - 1; + u32 current_level = gstage->pgd_levels - 1; *ptep_level = current_level; ptep = (pte_t *)gstage->pgd; - ptep = &ptep[gstage_pte_index(addr, current_level)]; + ptep = &ptep[gstage_pte_index(gstage, addr, current_level)]; while (ptep && pte_val(ptep_get(ptep))) { if (gstage_pte_leaf(ptep)) { *ptep_level = current_level; @@ -97,7 +101,7 @@ bool kvm_riscv_gstage_get_leaf(struct kvm_gstage *gstage, gpa_t addr, current_level--; *ptep_level = current_level; ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep)); - ptep = &ptep[gstage_pte_index(addr, current_level)]; + ptep = &ptep[gstage_pte_index(gstage, addr, current_level)]; } else { ptep = NULL; } @@ -110,7 +114,7 @@ static void gstage_tlb_flush(struct kvm_gstage *gstage, u32 level, gpa_t addr) { unsigned long order = PAGE_SHIFT; - if (gstage_level_to_page_order(level, &order)) + if (gstage_level_to_page_order(gstage, level, &order)) return; addr &= ~(BIT(order) - 1); @@ -125,9 +129,9 @@ int kvm_riscv_gstage_set_pte(struct kvm_gstage *gstage, struct kvm_mmu_memory_cache *pcache, const struct kvm_gstage_mapping *map) { - u32 current_level = kvm_riscv_gstage_pgd_levels - 1; + u32 current_level = gstage->pgd_levels - 1; pte_t *next_ptep = (pte_t *)gstage->pgd; - pte_t *ptep = &next_ptep[gstage_pte_index(map->addr, current_level)]; + pte_t *ptep = &next_ptep[gstage_pte_index(gstage, map->addr, current_level)]; if (current_level < map->level) return -EINVAL; @@ -151,7 +155,7 @@ int kvm_riscv_gstage_set_pte(struct kvm_gstage *gstage, } current_level--; - ptep = &next_ptep[gstage_pte_index(map->addr, current_level)]; + ptep = &next_ptep[gstage_pte_index(gstage, map->addr, current_level)]; } if (pte_val(*ptep) != pte_val(map->pte)) { @@ -175,7 +179,7 @@ int kvm_riscv_gstage_map_page(struct kvm_gstage *gstage, out_map->addr = gpa; out_map->level = 0; - ret = gstage_page_size_to_level(page_size, &out_map->level); + ret = gstage_page_size_to_level(gstage, page_size, &out_map->level); if (ret) return ret; @@ -217,7 +221,7 @@ void kvm_riscv_gstage_op_pte(struct kvm_gstage *gstage, gpa_t addr, u32 next_ptep_level; unsigned long next_page_size, page_size; - ret = gstage_level_to_page_size(ptep_level, &page_size); + ret = gstage_level_to_page_size(gstage, ptep_level, &page_size); if (ret) return; @@ -229,7 +233,7 @@ void kvm_riscv_gstage_op_pte(struct kvm_gstage *gstage, gpa_t addr, if (ptep_level && !gstage_pte_leaf(ptep)) { next_ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep)); next_ptep_level = ptep_level - 1; - ret = gstage_level_to_page_size(next_ptep_level, &next_page_size); + ret = gstage_level_to_page_size(gstage, next_ptep_level, &next_page_size); if (ret) return; @@ -263,7 +267,7 @@ void kvm_riscv_gstage_unmap_range(struct kvm_gstage *gstage, while (addr < end) { found_leaf = kvm_riscv_gstage_get_leaf(gstage, addr, &ptep, &ptep_level); - ret = gstage_level_to_page_size(ptep_level, &page_size); + ret = gstage_level_to_page_size(gstage, ptep_level, &page_size); if (ret) break; @@ -297,7 +301,7 @@ void kvm_riscv_gstage_wp_range(struct kvm_gstage *gstage, gpa_t start, gpa_t end while (addr < end) { found_leaf = kvm_riscv_gstage_get_leaf(gstage, addr, &ptep, &ptep_level); - ret = gstage_level_to_page_size(ptep_level, &page_size); + ret = gstage_level_to_page_size(gstage, ptep_level, &page_size); if (ret) break; @@ -315,44 +319,46 @@ void kvm_riscv_gstage_wp_range(struct kvm_gstage *gstage, gpa_t start, gpa_t end void __init kvm_riscv_gstage_mode_detect(void) { + kvm_riscv_gstage_supported_mode_mask = 0; + kvm_riscv_gstage_max_pgd_levels = 0; + #ifdef CONFIG_64BIT /* Try Sv57x4 G-stage mode */ csr_write(CSR_HGATP, HGATP_MODE_SV57X4 << HGATP_MODE_SHIFT); if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV57X4) { - kvm_riscv_gstage_mode = HGATP_MODE_SV57X4; - kvm_riscv_gstage_pgd_levels = 5; + kvm_riscv_gstage_max_pgd_levels = 5; + kvm_riscv_gstage_supported_mode_mask |= BIT(HGATP_MODE_SV57X4) | + BIT(HGATP_MODE_SV48X4) | + BIT(HGATP_MODE_SV39X4); goto done; } /* Try Sv48x4 G-stage mode */ csr_write(CSR_HGATP, HGATP_MODE_SV48X4 << HGATP_MODE_SHIFT); if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV48X4) { - kvm_riscv_gstage_mode = HGATP_MODE_SV48X4; - kvm_riscv_gstage_pgd_levels = 4; + kvm_riscv_gstage_max_pgd_levels = 4; + kvm_riscv_gstage_supported_mode_mask |= BIT(HGATP_MODE_SV48X4) | + BIT(HGATP_MODE_SV39X4); goto done; } /* Try Sv39x4 G-stage mode */ csr_write(CSR_HGATP, HGATP_MODE_SV39X4 << HGATP_MODE_SHIFT); if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV39X4) { - kvm_riscv_gstage_mode = HGATP_MODE_SV39X4; - kvm_riscv_gstage_pgd_levels = 3; + kvm_riscv_gstage_max_pgd_levels = 3; + kvm_riscv_gstage_supported_mode_mask |= BIT(HGATP_MODE_SV39X4); goto done; } #else /* CONFIG_32BIT */ /* Try Sv32x4 G-stage mode */ csr_write(CSR_HGATP, HGATP_MODE_SV32X4 << HGATP_MODE_SHIFT); if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV32X4) { - kvm_riscv_gstage_mode = HGATP_MODE_SV32X4; - kvm_riscv_gstage_pgd_levels = 2; + kvm_riscv_gstage_max_pgd_levels = 2; + kvm_riscv_gstage_supported_mode_mask |= BIT(HGATP_MODE_SV32X4); goto done; } #endif - /* KVM depends on !HGATP_MODE_OFF */ - kvm_riscv_gstage_mode = HGATP_MODE_OFF; - kvm_riscv_gstage_pgd_levels = 0; - done: csr_write(CSR_HGATP, 0); kvm_riscv_local_hfence_gvma_all(); diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c index 0f3fe3986fc02e..90ee0a032b9a3c 100644 --- a/arch/riscv/kvm/main.c +++ b/arch/riscv/kvm/main.c @@ -105,17 +105,17 @@ static int __init riscv_kvm_init(void) return rc; kvm_riscv_gstage_mode_detect(); - switch (kvm_riscv_gstage_mode) { - case HGATP_MODE_SV32X4: + switch (kvm_riscv_gstage_max_pgd_levels) { + case 2: str = "Sv32x4"; break; - case HGATP_MODE_SV39X4: + case 3: str = "Sv39x4"; break; - case HGATP_MODE_SV48X4: + case 4: str = "Sv48x4"; break; - case HGATP_MODE_SV57X4: + case 5: str = "Sv57x4"; break; default: @@ -164,7 +164,7 @@ static int __init riscv_kvm_init(void) (rc) ? slist : "no features"); } - kvm_info("using %s G-stage page table format\n", str); + kvm_info("highest G-stage page table mode is %s\n", str); kvm_info("VMID %ld bits available\n", kvm_riscv_gstage_vmid_bits()); diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index 088d33ba90edaf..2d3def024270c0 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -24,10 +24,7 @@ static void mmu_wp_memory_region(struct kvm *kvm, int slot) phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; struct kvm_gstage gstage; - gstage.kvm = kvm; - gstage.flags = 0; - gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); - gstage.pgd = kvm->arch.pgd; + kvm_riscv_gstage_init(&gstage, kvm); spin_lock(&kvm->mmu_lock); kvm_riscv_gstage_wp_range(&gstage, start, end); @@ -49,10 +46,7 @@ int kvm_riscv_mmu_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa, struct kvm_gstage_mapping map; struct kvm_gstage gstage; - gstage.kvm = kvm; - gstage.flags = 0; - gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); - gstage.pgd = kvm->arch.pgd; + kvm_riscv_gstage_init(&gstage, kvm); end = (gpa + size + PAGE_SIZE - 1) & PAGE_MASK; pfn = __phys_to_pfn(hpa); @@ -67,7 +61,7 @@ int kvm_riscv_mmu_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa, if (!writable) map.pte = pte_wrprotect(map.pte); - ret = kvm_mmu_topup_memory_cache(&pcache, kvm_riscv_gstage_pgd_levels); + ret = kvm_mmu_topup_memory_cache(&pcache, kvm->arch.pgd_levels); if (ret) goto out; @@ -89,10 +83,7 @@ void kvm_riscv_mmu_iounmap(struct kvm *kvm, gpa_t gpa, unsigned long size) { struct kvm_gstage gstage; - gstage.kvm = kvm; - gstage.flags = 0; - gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); - gstage.pgd = kvm->arch.pgd; + kvm_riscv_gstage_init(&gstage, kvm); spin_lock(&kvm->mmu_lock); kvm_riscv_gstage_unmap_range(&gstage, gpa, size, false); @@ -109,10 +100,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT; struct kvm_gstage gstage; - gstage.kvm = kvm; - gstage.flags = 0; - gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); - gstage.pgd = kvm->arch.pgd; + kvm_riscv_gstage_init(&gstage, kvm); kvm_riscv_gstage_wp_range(&gstage, start, end); } @@ -141,10 +129,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, phys_addr_t size = slot->npages << PAGE_SHIFT; struct kvm_gstage gstage; - gstage.kvm = kvm; - gstage.flags = 0; - gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); - gstage.pgd = kvm->arch.pgd; + kvm_riscv_gstage_init(&gstage, kvm); spin_lock(&kvm->mmu_lock); kvm_riscv_gstage_unmap_range(&gstage, gpa, size, false); @@ -186,7 +171,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, * space addressable by the KVM guest GPA space. */ if ((new->base_gfn + new->npages) >= - (kvm_riscv_gstage_gpa_size >> PAGE_SHIFT)) + kvm_riscv_gstage_gpa_size(kvm->arch.pgd_levels) >> PAGE_SHIFT) return -EFAULT; hva = new->userspace_addr; @@ -250,10 +235,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) if (!kvm->arch.pgd) return false; - gstage.kvm = kvm; - gstage.flags = 0; - gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); - gstage.pgd = kvm->arch.pgd; + kvm_riscv_gstage_init(&gstage, kvm); mmu_locked = spin_trylock(&kvm->mmu_lock); kvm_riscv_gstage_unmap_range(&gstage, range->start << PAGE_SHIFT, (range->end - range->start) << PAGE_SHIFT, @@ -275,10 +257,7 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE); - gstage.kvm = kvm; - gstage.flags = 0; - gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); - gstage.pgd = kvm->arch.pgd; + kvm_riscv_gstage_init(&gstage, kvm); if (!kvm_riscv_gstage_get_leaf(&gstage, range->start << PAGE_SHIFT, &ptep, &ptep_level)) return false; @@ -298,10 +277,7 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE); - gstage.kvm = kvm; - gstage.flags = 0; - gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); - gstage.pgd = kvm->arch.pgd; + kvm_riscv_gstage_init(&gstage, kvm); if (!kvm_riscv_gstage_get_leaf(&gstage, range->start << PAGE_SHIFT, &ptep, &ptep_level)) return false; @@ -463,16 +439,13 @@ int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, struct kvm_gstage gstage; struct page *page; - gstage.kvm = kvm; - gstage.flags = 0; - gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); - gstage.pgd = kvm->arch.pgd; + kvm_riscv_gstage_init(&gstage, kvm); /* Setup initial state of output mapping */ memset(out_map, 0, sizeof(*out_map)); /* We need minimum second+third level pages */ - ret = kvm_mmu_topup_memory_cache(pcache, kvm_riscv_gstage_pgd_levels); + ret = kvm_mmu_topup_memory_cache(pcache, kvm->arch.pgd_levels); if (ret) { kvm_err("Failed to topup G-stage cache\n"); return ret; @@ -575,6 +548,7 @@ int kvm_riscv_mmu_alloc_pgd(struct kvm *kvm) return -ENOMEM; kvm->arch.pgd = page_to_virt(pgd_page); kvm->arch.pgd_phys = page_to_phys(pgd_page); + kvm->arch.pgd_levels = kvm_riscv_gstage_max_pgd_levels; return 0; } @@ -586,14 +560,13 @@ void kvm_riscv_mmu_free_pgd(struct kvm *kvm) spin_lock(&kvm->mmu_lock); if (kvm->arch.pgd) { - gstage.kvm = kvm; - gstage.flags = 0; - gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); - gstage.pgd = kvm->arch.pgd; - kvm_riscv_gstage_unmap_range(&gstage, 0UL, kvm_riscv_gstage_gpa_size, false); + kvm_riscv_gstage_init(&gstage, kvm); + kvm_riscv_gstage_unmap_range(&gstage, 0UL, + kvm_riscv_gstage_gpa_size(kvm->arch.pgd_levels), false); pgd = READ_ONCE(kvm->arch.pgd); kvm->arch.pgd = NULL; kvm->arch.pgd_phys = 0; + kvm->arch.pgd_levels = 0; } spin_unlock(&kvm->mmu_lock); @@ -603,11 +576,12 @@ void kvm_riscv_mmu_free_pgd(struct kvm *kvm) void kvm_riscv_mmu_update_hgatp(struct kvm_vcpu *vcpu) { - unsigned long hgatp = kvm_riscv_gstage_mode << HGATP_MODE_SHIFT; - struct kvm_arch *k = &vcpu->kvm->arch; + struct kvm_arch *ka = &vcpu->kvm->arch; + unsigned long hgatp = kvm_riscv_gstage_mode(ka->pgd_levels) + << HGATP_MODE_SHIFT; - hgatp |= (READ_ONCE(k->vmid.vmid) << HGATP_VMID_SHIFT) & HGATP_VMID; - hgatp |= (k->pgd_phys >> PAGE_SHIFT) & HGATP_PPN; + hgatp |= (READ_ONCE(ka->vmid.vmid) << HGATP_VMID_SHIFT) & HGATP_VMID; + hgatp |= (ka->pgd_phys >> PAGE_SHIFT) & HGATP_PPN; ncsr_write(CSR_HGATP, hgatp); diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c index 13c63ae1a78b25..5e82a3ad3ad015 100644 --- a/arch/riscv/kvm/vm.c +++ b/arch/riscv/kvm/vm.c @@ -199,7 +199,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = KVM_USER_MEM_SLOTS; break; case KVM_CAP_VM_GPA_BITS: - r = kvm_riscv_gstage_gpa_bits; + r = kvm_riscv_gstage_gpa_bits(kvm->arch.pgd_levels); + break; + case KVM_CAP_RISCV_SET_HGATP_MODE: + r = kvm_riscv_get_hgatp_mode_mask(); break; default: r = 0; @@ -211,12 +214,23 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) { + if (cap->flags) + return -EINVAL; + switch (cap->cap) { case KVM_CAP_RISCV_MP_STATE_RESET: - if (cap->flags) - return -EINVAL; kvm->arch.mp_state_reset = true; return 0; + case KVM_CAP_RISCV_SET_HGATP_MODE: + if (!kvm_riscv_hgatp_mode_is_valid(cap->args[0])) + return -EINVAL; + + if (kvm->created_vcpus || !kvm_are_all_memslots_empty(kvm)) + return -EBUSY; +#ifdef CONFIG_64BIT + kvm->arch.pgd_levels = 3 + cap->args[0] - HGATP_MODE_SV39X4; +#endif + return 0; default: return -EINVAL; } diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c index cf34d448289d79..c15bdb1dd8bef0 100644 --- a/arch/riscv/kvm/vmid.c +++ b/arch/riscv/kvm/vmid.c @@ -26,7 +26,8 @@ static DEFINE_SPINLOCK(vmid_lock); void __init kvm_riscv_gstage_vmid_detect(void) { /* Figure-out number of VMID bits in HW */ - csr_write(CSR_HGATP, (kvm_riscv_gstage_mode << HGATP_MODE_SHIFT) | HGATP_VMID); + csr_write(CSR_HGATP, (kvm_riscv_gstage_mode(kvm_riscv_gstage_max_pgd_levels) << + HGATP_MODE_SHIFT) | HGATP_VMID); vmid_bits = csr_read(CSR_HGATP); vmid_bits = (vmid_bits & HGATP_VMID) >> HGATP_VMID_SHIFT; vmid_bits = fls_long(vmid_bits); diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 80364d4dbebb0c..a74a80fd40469c 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -989,6 +989,7 @@ struct kvm_enable_cap { #define KVM_CAP_ARM_SEA_TO_USER 245 #define KVM_CAP_S390_USER_OPEREXEC 246 #define KVM_CAP_S390_KEYOP 247 +#define KVM_CAP_RISCV_SET_HGATP_MODE 248 struct kvm_irq_routing_irqchip { __u32 irqchip;