Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions Documentation/virt/kvm/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8902,6 +8902,33 @@ helpful if user space wants to emulate instructions which are not
This capability can be enabled dynamically even if VCPUs were already
created and are running.

7.47 KVM_CAP_RISCV_SET_HGATP_MODE
---------------------------------

:Architectures: riscv
:Type: VM
:Parameters: args[0] contains the requested HGATP mode
:Returns:
- 0 on success.
- -EINVAL if args[0] is outside the range of HGATP modes supported by the
hardware.
- -EBUSY if vCPUs have already been created for the VM, if the VM has any
non-empty memslots.

This capability allows userspace to explicitly select the HGATP mode for
the VM. The selected mode must be supported by both KVM and hardware. This
capability must be enabled before creating any vCPUs or memslots.

If this capability is not enabled, KVM will select the default HGATP mode
automatically. The default is the highest HGATP.MODE value supported by
hardware.

``KVM_CHECK_EXTENSION(KVM_CAP_RISCV_SET_HGATP_MODE)`` returns a bitmask of
HGATP.MODE values supported by the host. A return value of 0 indicates that
the capability is not supported. Supported-mode bitmask use HGATP.MODE
encodings as defined by the RISC-V privileged specification, such as Sv39x4
corresponds to HGATP.MODE=8, so userspace should test bitmask & BIT(8).

8. Other capabilities.
======================

Expand Down
58 changes: 51 additions & 7 deletions arch/riscv/include/asm/kvm_gstage.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ struct kvm_gstage {
#define KVM_GSTAGE_FLAGS_LOCAL BIT(0)
unsigned long vmid;
pgd_t *pgd;
unsigned long pgd_levels;
};

struct kvm_gstage_mapping {
Expand All @@ -29,16 +30,23 @@ struct kvm_gstage_mapping {
#define kvm_riscv_gstage_index_bits 10
#endif

extern unsigned long kvm_riscv_gstage_mode;
extern unsigned long kvm_riscv_gstage_pgd_levels;
extern unsigned long kvm_riscv_gstage_max_pgd_levels;
extern u32 kvm_riscv_gstage_supported_mode_mask;

#define kvm_riscv_gstage_pgd_xbits 2
#define kvm_riscv_gstage_pgd_size (1UL << (HGATP_PAGE_SHIFT + kvm_riscv_gstage_pgd_xbits))
#define kvm_riscv_gstage_gpa_bits (HGATP_PAGE_SHIFT + \
(kvm_riscv_gstage_pgd_levels * \
kvm_riscv_gstage_index_bits) + \
kvm_riscv_gstage_pgd_xbits)
#define kvm_riscv_gstage_gpa_size ((gpa_t)(1ULL << kvm_riscv_gstage_gpa_bits))

static inline unsigned long kvm_riscv_gstage_gpa_bits(unsigned long pgd_levels)
{
return (HGATP_PAGE_SHIFT +
pgd_levels * kvm_riscv_gstage_index_bits +
kvm_riscv_gstage_pgd_xbits);
}

static inline gpa_t kvm_riscv_gstage_gpa_size(unsigned long pgd_levels)
{
return BIT_ULL(kvm_riscv_gstage_gpa_bits(pgd_levels));
}

bool kvm_riscv_gstage_get_leaf(struct kvm_gstage *gstage, gpa_t addr,
pte_t **ptepp, u32 *ptep_level);
Expand Down Expand Up @@ -69,4 +77,40 @@ void kvm_riscv_gstage_wp_range(struct kvm_gstage *gstage, gpa_t start, gpa_t end

void kvm_riscv_gstage_mode_detect(void);

static inline unsigned long kvm_riscv_gstage_mode(unsigned long pgd_levels)
{
switch (pgd_levels) {
case 2:
return HGATP_MODE_SV32X4;
case 3:
return HGATP_MODE_SV39X4;
case 4:
return HGATP_MODE_SV48X4;
case 5:
return HGATP_MODE_SV57X4;
default:
WARN_ON_ONCE(1);
return HGATP_MODE_OFF;
}
}

static inline void kvm_riscv_gstage_init(struct kvm_gstage *gstage, struct kvm *kvm)
{
gstage->kvm = kvm;
gstage->flags = 0;
gstage->vmid = READ_ONCE(kvm->arch.vmid.vmid);
gstage->pgd = kvm->arch.pgd;
gstage->pgd_levels = kvm->arch.pgd_levels;
}

static inline u32 kvm_riscv_get_hgatp_mode_mask(void)
{
return kvm_riscv_gstage_supported_mode_mask;
}

static inline bool kvm_riscv_hgatp_mode_is_valid(unsigned long mode)
{
return kvm_riscv_gstage_supported_mode_mask & BIT(mode);
}

#endif
1 change: 1 addition & 0 deletions arch/riscv/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ struct kvm_arch {
/* G-stage page table */
pgd_t *pgd;
phys_addr_t pgd_phys;
unsigned long pgd_levels;

/* Guest Timer */
struct kvm_guest_timer timer;
Expand Down
78 changes: 42 additions & 36 deletions arch/riscv/kvm/gstage.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,23 @@
#include <asm/kvm_gstage.h>

#ifdef CONFIG_64BIT
unsigned long kvm_riscv_gstage_mode __ro_after_init = HGATP_MODE_SV39X4;
unsigned long kvm_riscv_gstage_pgd_levels __ro_after_init = 3;
unsigned long kvm_riscv_gstage_max_pgd_levels __ro_after_init = 3;
#else
unsigned long kvm_riscv_gstage_mode __ro_after_init = HGATP_MODE_SV32X4;
unsigned long kvm_riscv_gstage_pgd_levels __ro_after_init = 2;
unsigned long kvm_riscv_gstage_max_pgd_levels __ro_after_init = 2;
#endif
/* Bitmask of supported HGATP.MODE encodings (BIT(HGATP_MODE_*)). */
u32 kvm_riscv_gstage_supported_mode_mask __ro_after_init;

#define gstage_pte_leaf(__ptep) \
(pte_val(*(__ptep)) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC))

static inline unsigned long gstage_pte_index(gpa_t addr, u32 level)
static inline unsigned long gstage_pte_index(struct kvm_gstage *gstage,
gpa_t addr, u32 level)
{
unsigned long mask;
unsigned long shift = HGATP_PAGE_SHIFT + (kvm_riscv_gstage_index_bits * level);

if (level == (kvm_riscv_gstage_pgd_levels - 1))
if (level == gstage->pgd_levels - 1)
mask = (PTRS_PER_PTE * (1UL << kvm_riscv_gstage_pgd_xbits)) - 1;
else
mask = PTRS_PER_PTE - 1;
Expand All @@ -40,12 +41,13 @@ static inline unsigned long gstage_pte_page_vaddr(pte_t pte)
return (unsigned long)pfn_to_virt(__page_val_to_pfn(pte_val(pte)));
}

static int gstage_page_size_to_level(unsigned long page_size, u32 *out_level)
static int gstage_page_size_to_level(struct kvm_gstage *gstage, unsigned long page_size,
u32 *out_level)
{
u32 i;
unsigned long psz = 1UL << 12;

for (i = 0; i < kvm_riscv_gstage_pgd_levels; i++) {
for (i = 0; i < gstage->pgd_levels; i++) {
if (page_size == (psz << (i * kvm_riscv_gstage_index_bits))) {
*out_level = i;
return 0;
Expand All @@ -55,21 +57,23 @@ static int gstage_page_size_to_level(unsigned long page_size, u32 *out_level)
return -EINVAL;
}

static int gstage_level_to_page_order(u32 level, unsigned long *out_pgorder)
static int gstage_level_to_page_order(struct kvm_gstage *gstage, u32 level,
unsigned long *out_pgorder)
{
if (kvm_riscv_gstage_pgd_levels < level)
if (gstage->pgd_levels < level)
return -EINVAL;

*out_pgorder = 12 + (level * kvm_riscv_gstage_index_bits);
return 0;
}

static int gstage_level_to_page_size(u32 level, unsigned long *out_pgsize)
static int gstage_level_to_page_size(struct kvm_gstage *gstage, u32 level,
unsigned long *out_pgsize)
{
int rc;
unsigned long page_order = PAGE_SHIFT;

rc = gstage_level_to_page_order(level, &page_order);
rc = gstage_level_to_page_order(gstage, level, &page_order);
if (rc)
return rc;

Expand All @@ -81,11 +85,11 @@ bool kvm_riscv_gstage_get_leaf(struct kvm_gstage *gstage, gpa_t addr,
pte_t **ptepp, u32 *ptep_level)
{
pte_t *ptep;
u32 current_level = kvm_riscv_gstage_pgd_levels - 1;
u32 current_level = gstage->pgd_levels - 1;

*ptep_level = current_level;
ptep = (pte_t *)gstage->pgd;
ptep = &ptep[gstage_pte_index(addr, current_level)];
ptep = &ptep[gstage_pte_index(gstage, addr, current_level)];
while (ptep && pte_val(ptep_get(ptep))) {
if (gstage_pte_leaf(ptep)) {
*ptep_level = current_level;
Expand All @@ -97,7 +101,7 @@ bool kvm_riscv_gstage_get_leaf(struct kvm_gstage *gstage, gpa_t addr,
current_level--;
*ptep_level = current_level;
ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep));
ptep = &ptep[gstage_pte_index(addr, current_level)];
ptep = &ptep[gstage_pte_index(gstage, addr, current_level)];
} else {
ptep = NULL;
}
Expand All @@ -110,7 +114,7 @@ static void gstage_tlb_flush(struct kvm_gstage *gstage, u32 level, gpa_t addr)
{
unsigned long order = PAGE_SHIFT;

if (gstage_level_to_page_order(level, &order))
if (gstage_level_to_page_order(gstage, level, &order))
return;
addr &= ~(BIT(order) - 1);

Expand All @@ -125,9 +129,9 @@ int kvm_riscv_gstage_set_pte(struct kvm_gstage *gstage,
struct kvm_mmu_memory_cache *pcache,
const struct kvm_gstage_mapping *map)
{
u32 current_level = kvm_riscv_gstage_pgd_levels - 1;
u32 current_level = gstage->pgd_levels - 1;
pte_t *next_ptep = (pte_t *)gstage->pgd;
pte_t *ptep = &next_ptep[gstage_pte_index(map->addr, current_level)];
pte_t *ptep = &next_ptep[gstage_pte_index(gstage, map->addr, current_level)];

if (current_level < map->level)
return -EINVAL;
Expand All @@ -151,7 +155,7 @@ int kvm_riscv_gstage_set_pte(struct kvm_gstage *gstage,
}

current_level--;
ptep = &next_ptep[gstage_pte_index(map->addr, current_level)];
ptep = &next_ptep[gstage_pte_index(gstage, map->addr, current_level)];
}

if (pte_val(*ptep) != pte_val(map->pte)) {
Expand All @@ -175,7 +179,7 @@ int kvm_riscv_gstage_map_page(struct kvm_gstage *gstage,
out_map->addr = gpa;
out_map->level = 0;

ret = gstage_page_size_to_level(page_size, &out_map->level);
ret = gstage_page_size_to_level(gstage, page_size, &out_map->level);
if (ret)
return ret;

Expand Down Expand Up @@ -217,7 +221,7 @@ void kvm_riscv_gstage_op_pte(struct kvm_gstage *gstage, gpa_t addr,
u32 next_ptep_level;
unsigned long next_page_size, page_size;

ret = gstage_level_to_page_size(ptep_level, &page_size);
ret = gstage_level_to_page_size(gstage, ptep_level, &page_size);
if (ret)
return;

Expand All @@ -229,7 +233,7 @@ void kvm_riscv_gstage_op_pte(struct kvm_gstage *gstage, gpa_t addr,
if (ptep_level && !gstage_pte_leaf(ptep)) {
next_ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep));
next_ptep_level = ptep_level - 1;
ret = gstage_level_to_page_size(next_ptep_level, &next_page_size);
ret = gstage_level_to_page_size(gstage, next_ptep_level, &next_page_size);
if (ret)
return;

Expand Down Expand Up @@ -263,7 +267,7 @@ void kvm_riscv_gstage_unmap_range(struct kvm_gstage *gstage,

while (addr < end) {
found_leaf = kvm_riscv_gstage_get_leaf(gstage, addr, &ptep, &ptep_level);
ret = gstage_level_to_page_size(ptep_level, &page_size);
ret = gstage_level_to_page_size(gstage, ptep_level, &page_size);
if (ret)
break;

Expand Down Expand Up @@ -297,7 +301,7 @@ void kvm_riscv_gstage_wp_range(struct kvm_gstage *gstage, gpa_t start, gpa_t end

while (addr < end) {
found_leaf = kvm_riscv_gstage_get_leaf(gstage, addr, &ptep, &ptep_level);
ret = gstage_level_to_page_size(ptep_level, &page_size);
ret = gstage_level_to_page_size(gstage, ptep_level, &page_size);
if (ret)
break;

Expand All @@ -315,44 +319,46 @@ void kvm_riscv_gstage_wp_range(struct kvm_gstage *gstage, gpa_t start, gpa_t end

void __init kvm_riscv_gstage_mode_detect(void)
{
kvm_riscv_gstage_supported_mode_mask = 0;
kvm_riscv_gstage_max_pgd_levels = 0;

#ifdef CONFIG_64BIT
/* Try Sv57x4 G-stage mode */
csr_write(CSR_HGATP, HGATP_MODE_SV57X4 << HGATP_MODE_SHIFT);
if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV57X4) {
kvm_riscv_gstage_mode = HGATP_MODE_SV57X4;
kvm_riscv_gstage_pgd_levels = 5;
kvm_riscv_gstage_max_pgd_levels = 5;
kvm_riscv_gstage_supported_mode_mask |= BIT(HGATP_MODE_SV57X4) |
BIT(HGATP_MODE_SV48X4) |
BIT(HGATP_MODE_SV39X4);
goto done;
}

/* Try Sv48x4 G-stage mode */
csr_write(CSR_HGATP, HGATP_MODE_SV48X4 << HGATP_MODE_SHIFT);
if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV48X4) {
kvm_riscv_gstage_mode = HGATP_MODE_SV48X4;
kvm_riscv_gstage_pgd_levels = 4;
kvm_riscv_gstage_max_pgd_levels = 4;
kvm_riscv_gstage_supported_mode_mask |= BIT(HGATP_MODE_SV48X4) |
BIT(HGATP_MODE_SV39X4);
goto done;
}

/* Try Sv39x4 G-stage mode */
csr_write(CSR_HGATP, HGATP_MODE_SV39X4 << HGATP_MODE_SHIFT);
if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV39X4) {
kvm_riscv_gstage_mode = HGATP_MODE_SV39X4;
kvm_riscv_gstage_pgd_levels = 3;
kvm_riscv_gstage_max_pgd_levels = 3;
kvm_riscv_gstage_supported_mode_mask |= BIT(HGATP_MODE_SV39X4);
goto done;
}
#else /* CONFIG_32BIT */
/* Try Sv32x4 G-stage mode */
csr_write(CSR_HGATP, HGATP_MODE_SV32X4 << HGATP_MODE_SHIFT);
if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV32X4) {
kvm_riscv_gstage_mode = HGATP_MODE_SV32X4;
kvm_riscv_gstage_pgd_levels = 2;
kvm_riscv_gstage_max_pgd_levels = 2;
kvm_riscv_gstage_supported_mode_mask |= BIT(HGATP_MODE_SV32X4);
goto done;
}
#endif

/* KVM depends on !HGATP_MODE_OFF */
kvm_riscv_gstage_mode = HGATP_MODE_OFF;
kvm_riscv_gstage_pgd_levels = 0;

done:
csr_write(CSR_HGATP, 0);
kvm_riscv_local_hfence_gvma_all();
Expand Down
12 changes: 6 additions & 6 deletions arch/riscv/kvm/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -105,17 +105,17 @@ static int __init riscv_kvm_init(void)
return rc;

kvm_riscv_gstage_mode_detect();
switch (kvm_riscv_gstage_mode) {
case HGATP_MODE_SV32X4:
switch (kvm_riscv_gstage_max_pgd_levels) {
case 2:
str = "Sv32x4";
break;
case HGATP_MODE_SV39X4:
case 3:
str = "Sv39x4";
break;
case HGATP_MODE_SV48X4:
case 4:
str = "Sv48x4";
break;
case HGATP_MODE_SV57X4:
case 5:
str = "Sv57x4";
break;
default:
Expand Down Expand Up @@ -164,7 +164,7 @@ static int __init riscv_kvm_init(void)
(rc) ? slist : "no features");
}

kvm_info("using %s G-stage page table format\n", str);
kvm_info("highest G-stage page table mode is %s\n", str);

kvm_info("VMID %ld bits available\n", kvm_riscv_gstage_vmid_bits());

Expand Down
Loading
Loading