diff --git a/vmm_core/virt_kvm/src/arch/aarch64/mod.rs b/vmm_core/virt_kvm/src/arch/aarch64/mod.rs index 67c7135389..e6788b7038 100644 --- a/vmm_core/virt_kvm/src/arch/aarch64/mod.rs +++ b/vmm_core/virt_kvm/src/arch/aarch64/mod.rs @@ -14,7 +14,11 @@ use crate::KvmError; use crate::KvmPartition; use crate::KvmPartitionInner; use crate::KvmRunVpError; +use crate::gsi::GsiRouting; +use crate::gsi::KvmIrqFdState; +use crate::gsi::MsiRouteBuilder; use aarch64defs::SystemReg; +use aarch64defs::gic::GicV2mRegister; use bitfield_struct::bitfield; use core::panic; use hvdef::Vtl; @@ -33,6 +37,7 @@ use kvm::kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2; use kvm::kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3; use kvm::kvm_regs; use kvm::user_pt_regs; +use parking_lot::Mutex; use std::convert::Infallible; use std::sync::Arc; use std::sync::atomic::AtomicBool; @@ -779,6 +784,7 @@ impl virt::ProtoPartition for KvmProtoPartition<'_> { eval: false.into(), }) .collect(), + gsi_routing: Mutex::new(GsiRouting::new()), caps, _gic_device: gic_device, gic_v2m: self.config.processor_topology.gic_v2m(), @@ -788,6 +794,7 @@ impl virt::ProtoPartition for KvmProtoPartition<'_> { let partition = KvmPartition { synic_ports: Arc::new(virt::synic::SynicPorts::new(partition.clone())), + irqfd_state: Arc::new(KvmIrqFdState::new(partition.clone())), inner: partition, }; @@ -834,6 +841,12 @@ impl virt::Partition for KvmPartition { ))) } + fn irqfd(&self) -> Option> { + // The irqfd implementation requires a GICv2m frame to be present. + self.inner.gic_v2m?; + Some(self.irqfd_state.clone()) + } + fn request_yield(&self, vp_index: VpIndex) { let vp = &self.inner.vps[vp_index.index() as usize]; if vp.needs_yield.request_yield() { @@ -902,6 +915,42 @@ impl virt::Aarch64Partition for KvmPartition { } } +struct KvmGicV2mRouteBuilder; + +impl MsiRouteBuilder for KvmGicV2mRouteBuilder { + fn routing_entry( + &self, + partition: &KvmPartitionInner, + address: u64, + data: u32, + ) -> Option { + let v2m = partition + .gic_v2m + .as_ref() + .expect("partition does not expose a GICv2m MSI frame"); + let setspi_addr = v2m.frame_base + GicV2mRegister::SETSPI_NS.0 as u64; + if address != setspi_addr { + return None; + } + if !(v2m.spi_base..v2m.spi_base + v2m.spi_count).contains(&data) { + return None; + } + // KVM expects the data to be the GIC SPI number, not the interrupt ID. + // TODO: centralize this constant. + Some(kvm::RoutingEntry::Irqchip { pin: data - 32 }) + } +} + +impl virt::irqfd::IrqFd for KvmIrqFdState { + fn new_irqfd_route(&self) -> anyhow::Result> { + assert!( + self.partition.gic_v2m.is_some(), + "GICv2m is required for irqfd support" + ); + Ok(Box::new(self.new_irqfd_route(KvmGicV2mRouteBuilder)?)) + } +} + impl virt::PartitionAccessState for KvmPartition { type StateAccess<'a> = &'a KvmPartition; diff --git a/vmm_core/virt_kvm/src/arch/x86_64/mod.rs b/vmm_core/virt_kvm/src/arch/x86_64/mod.rs index de46ffd93e..4c91f12b13 100644 --- a/vmm_core/virt_kvm/src/arch/x86_64/mod.rs +++ b/vmm_core/virt_kvm/src/arch/x86_64/mod.rs @@ -15,6 +15,8 @@ use crate::KvmPartitionInner; use crate::KvmProcessorBinder; use crate::KvmRunVpError; use crate::gsi::GsiRouting; +use crate::gsi::KvmIrqFdState; +use crate::gsi::MsiRouteBuilder; use guestmem::DoorbellRegistration; use guestmem::GuestMemory; use guestmem::GuestMemoryError; @@ -459,7 +461,7 @@ impl ProtoPartition for KvmProtoPartition<'_> { let partition = KvmPartition { synic_ports: Arc::new(virt::synic::SynicPorts::new(partition.clone())), - irqfd_state: Arc::new(crate::gsi::KvmIrqFdState::new(partition.clone())), + irqfd_state: Arc::new(KvmIrqFdState::new(partition.clone())), inner: partition, }; @@ -772,8 +774,12 @@ pub(crate) struct KvmMsi { } impl KvmMsi { - pub(crate) fn new(request: MsiRequest) -> Self { + pub(crate) fn new(request: MsiRequest) -> Option { + // TODO: validate the high bits of the request as well, across the codebase. let request_address = MsiAddress::from(request.address as u32); + if request_address.address() != x86defs::msi::MSI_ADDRESS { + return None; + } let request_data = MsiData::from(request.data); // Although architecturally the destination mode bit is only supposed to @@ -796,21 +802,29 @@ impl KvmMsi { .with_vector(request_data.vector()) .into(); - Self { + Some(Self { address_lo, address_hi, data, - } + }) } } impl KvmPartitionInner { fn request_msi(&self, request: MsiRequest) { - let KvmMsi { + let Some(KvmMsi { address_lo, address_hi, data, - } = KvmMsi::new(request); + }) = KvmMsi::new(request) + else { + tracelimit::warn_ratelimited!( + address = request.address, + data = request.data, + "invalid MSI address" + ); + return; + }; if let Err(err) = self.kvm.request_msi(&kvm::kvm_msi { address_lo, address_hi, @@ -829,20 +843,59 @@ impl KvmPartitionInner { } } +struct KvmX86MsiRouteBuilder; + +impl MsiRouteBuilder for KvmX86MsiRouteBuilder { + fn routing_entry( + &self, + _partition: &KvmPartitionInner, + address: u64, + data: u32, + ) -> Option { + let KvmMsi { + address_lo, + address_hi, + data, + } = KvmMsi::new(MsiRequest { address, data })?; + Some(kvm::RoutingEntry::Msi { + address_lo, + address_hi, + data, + }) + } +} + +impl virt::irqfd::IrqFd for KvmIrqFdState { + fn new_irqfd_route(&self) -> anyhow::Result> { + Ok(Box::new(self.new_irqfd_route(KvmX86MsiRouteBuilder)?)) + } +} + impl IoApicRouting for KvmPartitionInner { fn set_irq_route(&self, irq: u8, request: Option) { - let entry = request.map(|request| { - let KvmMsi { - address_lo, - address_hi, - data, - } = KvmMsi::new(request); - kvm::RoutingEntry::Msi { - address_lo, - address_hi, - data, - } - }); + let entry = match request { + Some(request) => match KvmMsi::new(request) { + Some(KvmMsi { + address_lo, + address_hi, + data, + }) => Some(kvm::RoutingEntry::Msi { + address_lo, + address_hi, + data, + }), + None => { + tracelimit::warn_ratelimited!( + irq, + address = request.address, + data = request.data, + "invalid MSI address for IO-APIC route" + ); + None + } + }, + None => None, + }; let mut gsi_routing = self.gsi_routing.lock(); if gsi_routing.set(irq as u32, entry) { gsi_routing.update_routes(&self.kvm); diff --git a/vmm_core/virt_kvm/src/gsi.rs b/vmm_core/virt_kvm/src/gsi.rs index ee2bd2d91b..f070d81311 100644 --- a/vmm_core/virt_kvm/src/gsi.rs +++ b/vmm_core/virt_kvm/src/gsi.rs @@ -12,7 +12,6 @@ use std::sync::Arc; use std::sync::Weak; use std::sync::atomic::AtomicBool; use std::sync::atomic::Ordering; -use virt::irqfd::IrqFd; use virt::irqfd::IrqFdRoute; const NUM_GSIS: usize = 2048; @@ -32,6 +31,7 @@ impl GsiRouting { } /// Claims a specific GSI. + #[cfg_attr(guest_arch = "aarch64", expect(dead_code))] pub fn claim(&mut self, gsi: u32) { let gsi = gsi as usize; assert_eq!(self.states[gsi], GsiState::Unallocated); @@ -83,14 +83,16 @@ impl GsiRouting { impl KvmPartitionInner { /// Reserves a new route, optionally with an associated irqfd event. - pub(crate) fn new_route(self: &Arc, irqfd_event: Option) -> Option { + fn new_route(self: &Arc, irqfd_event: Option) -> Option { let gsi = self.gsi_routing.lock().alloc()?; Some(GsiRoute { partition: Arc::downgrade(self), - gsi, - irqfd_event, - enabled: false.into(), - enable_mutex: Mutex::new(()), + inner: GsiRouteInner { + gsi, + irqfd_event, + enabled: false.into(), + enable_mutex: Mutex::new(()), + }, }) } } @@ -109,9 +111,12 @@ impl GsiState { } /// A GSI route. -#[derive(Debug)] -pub struct GsiRoute { +struct GsiRoute { partition: Weak, + inner: GsiRouteInner, +} + +struct GsiRouteInner { gsi: u32, irqfd_event: Option, enabled: AtomicBool, @@ -120,32 +125,28 @@ pub struct GsiRoute { impl Drop for GsiRoute { fn drop(&mut self) { - self.disable(); - self.set_entry(None); if let Some(partition) = self.partition.upgrade() { - partition.gsi_routing.lock().free(self.gsi); + self.inner.disable(&partition); + self.inner.set_entry(&partition, None); + partition.gsi_routing.lock().free(self.inner.gsi); } } } -impl GsiRoute { - fn set_entry(&self, new_entry: Option) -> Option> { - let partition = self.partition.upgrade(); - if let Some(partition) = &partition { - let mut routing = partition.gsi_routing.lock(); - if routing.set(self.gsi, new_entry) { - routing.update_routes(&partition.kvm); - } +impl GsiRouteInner { + fn set_entry(&self, partition: &KvmPartitionInner, new_entry: Option) { + let mut routing = partition.gsi_routing.lock(); + if routing.set(self.gsi, new_entry) { + routing.update_routes(&partition.kvm); } - partition } /// Enables the route and associated irqfd. - pub fn enable(&self, entry: kvm::RoutingEntry) { + pub fn enable(&self, partition: &KvmPartitionInner, entry: kvm::RoutingEntry) { let _lock = self.enable_mutex.lock(); - let partition = self.set_entry(Some(entry)); + self.set_entry(partition, Some(entry)); if !self.enabled.load(Ordering::Relaxed) { - if let (Some(partition), Some(event)) = (&partition, &self.irqfd_event) { + if let Some(event) = &self.irqfd_event { partition .kvm .irqfd(self.gsi, event.as_fd().as_raw_fd(), true) @@ -158,75 +159,44 @@ impl GsiRoute { /// Disables the associated irqfd. /// /// This actually leaves the route configured, but it disables the irqfd and - /// clears the `enabled` bool so that `signal` won't. - pub fn disable(&self) { + /// clears the `enabled` flag. + pub fn disable(&self, partition: &KvmPartitionInner) { let _lock = self.enable_mutex.lock(); if self.enabled.load(Ordering::Relaxed) { if let Some(irqfd_event) = &self.irqfd_event { - if let Some(partition) = self.partition.upgrade() { - partition - .kvm - .irqfd(self.gsi, irqfd_event.as_fd().as_raw_fd(), false) - .expect("should not fail"); - } - } - self.enabled.store(false, Ordering::Relaxed); - } - } - - /// Returns the configured irqfd event, if there is one. - #[expect(dead_code)] - pub fn irqfd_event(&self) -> Option<&Event> { - self.irqfd_event.as_ref() - } - - /// Signals the interrupt if it is enabled. - #[expect(clippy::assertions_on_constants)] - #[expect(dead_code)] - pub fn signal(&self) { - // Use a relaxed atomic read to avoid extra synchronization in this - // path. It's up to callers to synchronize this with `enable`/`disable` - // if strict ordering is necessary. - if self.enabled.load(Ordering::Relaxed) { - if let Some(event) = &self.irqfd_event { - event.signal(); - } else if let Some(partition) = self.partition.upgrade() { - // TODO: `gsi` must include certain flags on aarch64 to indicate - // the type of the interrupt: SPI or PPI handled by the in-kernel vGIC, - // or the user mode GIC emulator (where have to specify the target VP, too). - - assert!(cfg!(guest_arch = "x86_64")); partition .kvm - .irq_line(self.gsi, true) - .expect("interrupt delivery failure"); + .irqfd(self.gsi, irqfd_event.as_fd().as_raw_fd(), false) + .expect("should not fail"); } + self.enabled.store(false, Ordering::Relaxed); } } } -/// irqfd routing interface for a KVM partition. -/// -/// Wraps the existing [`GsiRoute`] infrastructure to implement the -/// [`IrqFd`]/[`IrqFdRoute`] traits. -pub struct KvmIrqFdState { - partition: Arc, +pub(crate) struct KvmIrqFdState { + pub(crate) partition: Arc, } impl KvmIrqFdState { pub fn new(partition: Arc) -> Self { Self { partition } } -} -impl IrqFd for KvmIrqFdState { - fn new_irqfd_route(&self) -> anyhow::Result> { + pub fn new_irqfd_route( + &self, + builder: T, + ) -> anyhow::Result> { let event = Event::new(); let route = self .partition .new_route(Some(event.clone())) .context("no free GSIs available for irqfd")?; - Ok(Box::new(KvmIrqFdRoute { route, event })) + Ok(KvmIrqFdRoute { + builder, + route, + event, + }) } } @@ -234,35 +204,45 @@ impl IrqFd for KvmIrqFdState { /// /// Cleanup (disable irqfd, clear route, free GSI) is handled by /// [`GsiRoute::drop`]. -struct KvmIrqFdRoute { +pub(crate) struct KvmIrqFdRoute { + builder: T, route: GsiRoute, event: Event, } -impl IrqFdRoute for KvmIrqFdRoute { +pub(crate) trait MsiRouteBuilder: Send + Sync { + fn routing_entry( + &self, + partition: &KvmPartitionInner, + address: u64, + data: u32, + ) -> Option; +} + +impl IrqFdRoute for KvmIrqFdRoute { fn event(&self) -> &Event { &self.event } fn enable(&self, address: u64, data: u32) { - let crate::arch::KvmMsi { - address_lo, - address_hi, - data, - } = crate::arch::KvmMsi::new(virt::irqcon::MsiRequest { address, data }); - let entry = kvm::RoutingEntry::Msi { - address_lo, - address_hi, - data, - }; - self.route.enable(entry); + if let Some(partition) = self.route.partition.upgrade() { + if let Some(entry) = self.builder.routing_entry(&partition, address, data) { + self.route.inner.enable(&partition, entry); + } else { + tracelimit::warn_ratelimited!( + address, + data, + "failed to build irqfd interrupt route" + ); + self.route.inner.disable(&partition); + self.route.inner.set_entry(&partition, None); + } + } } fn disable(&self) { - // Just disarm the irqfd. The routing entry is inert without an - // armed eventfd, so there's no need to remove it and trigger an - // expensive KVM_SET_GSI_ROUTING ioctl. On re-enable, if the - // address/data haven't changed, set_entry will be a no-op. - self.route.disable(); + if let Some(partition) = self.route.partition.upgrade() { + self.route.inner.disable(&partition); + } } } diff --git a/vmm_core/virt_kvm/src/lib.rs b/vmm_core/virt_kvm/src/lib.rs index 1b8467fc49..91609f3214 100644 --- a/vmm_core/virt_kvm/src/lib.rs +++ b/vmm_core/virt_kvm/src/lib.rs @@ -10,7 +10,6 @@ #![expect(clippy::undocumented_unsafe_blocks)] mod arch; -#[cfg(guest_arch = "x86_64")] mod gsi; pub use arch::Kvm; @@ -86,9 +85,8 @@ pub struct KvmPartition { inner: Arc, #[inspect(skip)] synic_ports: Arc>, - #[cfg(guest_arch = "x86_64")] #[inspect(skip)] - irqfd_state: Arc, + irqfd_state: Arc, } #[derive(Inspect)] @@ -100,7 +98,6 @@ struct KvmPartitionInner { gm: GuestMemory, #[inspect(skip)] vps: Vec, - #[cfg(guest_arch = "x86_64")] #[inspect(skip)] gsi_routing: Mutex, caps: virt::PartitionCapabilities,