diff --git a/Cargo.lock b/Cargo.lock index c456516676..27c0b0597b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4627,6 +4627,7 @@ dependencies = [ "mesh", "net_backend", "pal_async", + "parking_lot", "pci_core", "safeatomic", "test_with_tracing", diff --git a/vm/devices/net/gdma/src/bnic.rs b/vm/devices/net/gdma/src/bnic.rs index bf5a94d482..1988a8cbfc 100644 --- a/vm/devices/net/gdma/src/bnic.rs +++ b/vm/devices/net/gdma/src/bnic.rs @@ -139,6 +139,11 @@ impl BufferAccess for GuestBuffers { }, } + if let Some(vlan) = &metadata.vlan { + flags.set_rx_vlantag_present(true); + flags.set_rx_vlan_id(vlan.vlan_id as u32); + } + let packet = &mut self.rx_packets[id.0 as usize]; let cqe_type = if metadata.len > packet.len as usize { @@ -548,6 +553,11 @@ impl TxRxTask { let sge0 = sqe.sgl().first().context("no sgl")?; let total_len: usize = sqe.sgl().iter().map(|sge| sge.size as usize).sum(); + let l2_len = if oob.l_oob.inject_vlan_pri_tag() { + net_backend::ETHERNET_VLAN_HEADER_LEN + } else { + net_backend::ETHERNET_HEADER_LEN + }; let mut meta = TxMetadata { id: TxId(0), segment_count: sqe.sgl().len().try_into().unwrap(), @@ -558,10 +568,19 @@ impl TxRxTask { .with_offload_udp_checksum(oob.s_oob.comp_udp_csum()) .with_is_ipv4(oob.s_oob.is_outer_ipv4()) .with_is_ipv6(oob.s_oob.is_outer_ipv6() && !oob.s_oob.is_outer_ipv4()), - l2_len: 14, - l3_len: oob.s_oob.trans_off().clamp(14, 255) - 14, + l2_len: l2_len as u8, + l3_len: oob.s_oob.trans_off().clamp(l2_len as u16, 255) - l2_len as u16, l4_len: 0, + transport_header_offset: 0, max_segment_size: 0, + vlan: oob + .l_oob + .inject_vlan_pri_tag() + .then(|| net_backend::VlanMetadata { + priority: oob.l_oob.pcp(), + drop_eligible_indicator: oob.l_oob.dei(), + vlan_id: oob.l_oob.vlan_id(), + }), }; if sqe.header.params.client_oob_in_sgl() { diff --git a/vm/devices/net/net_backend/src/lib.rs b/vm/devices/net/net_backend/src/lib.rs index fb838beb61..e096dca6ea 100644 --- a/vm/devices/net/net_backend/src/lib.rs +++ b/vm/devices/net/net_backend/src/lib.rs @@ -281,6 +281,24 @@ pub trait BufferAccess { } } +pub const ETHERNET_HEADER_LEN: u32 = 14; +pub const ETHERNET_VLAN_HEADER_LEN: u32 = 18; + +pub const IPV4_MIN_HEADER_LEN: u16 = 20; +pub const IPV6_MIN_HEADER_LEN: u16 = 40; + +#[derive(Debug, Copy, Clone)] +pub struct VlanMetadata { + /// Priority for 802.1Q. Actually a 3-bit value. + pub priority: u8, + /// In pretty much every circumstance this is false. When + /// it is used, setting DEI will inform switches/routing infra + /// that this can be dropped before higher priority traffic. + pub drop_eligible_indicator: bool, + /// The 802.1Q ID for this transmission. Actually a 12-bit value. + pub vlan_id: u16, +} + /// A receive buffer ID. #[derive(Debug, Copy, Clone)] #[repr(transparent)] @@ -308,6 +326,10 @@ pub struct RxMetadata { pub l4_checksum: RxChecksumState, /// The L4 protocol. pub l4_protocol: L4Protocol, + /// Information about 802.1Q VLAN tagging. When a vlan is in use, this structure + /// is populated. Only applies when traffic is being received over an L2 connection, + /// so L3-only or above traffic will not use this option. + pub vlan: Option, } impl Default for RxMetadata { @@ -318,6 +340,7 @@ impl Default for RxMetadata { ip_checksum: RxChecksumState::Unknown, l4_checksum: RxChecksumState::Unknown, l4_protocol: L4Protocol::Unknown, + vlan: None, } } } @@ -396,10 +419,17 @@ pub struct TxMetadata { /// The length of the TCP header. Only guaranteed to be set if various /// offload flags are set. pub l4_len: u8, + /// The offset into the buffer where the L4 header begins (TCP or UDP). Only + /// expected to be set if offload (checksum and/or segmentation) flags are set. + pub transport_header_offset: u16, /// The maximum segment size, used for segmentation offload (TSO or USO). /// Only guaranteed to be set if [`TxFlags::offload_tcp_segmentation`] or /// [`TxFlags::offload_udp_segmentation`] is set. pub max_segment_size: u16, + /// Information about 802.1Q VLAN tagging. When a vlan is in use, this structure + /// is populated. Only applies when traffic is being sent over an L2 connection, + /// so L3-only or above traffic will not use this option. + pub vlan: Option, } /// Flags affecting transmit behavior. @@ -444,7 +474,9 @@ impl Default for TxMetadata { l2_len: 0, l3_len: 0, l4_len: 0, + transport_header_offset: 0, max_segment_size: 0, + vlan: None, } } } diff --git a/vm/devices/net/net_backend/src/tests.rs b/vm/devices/net/net_backend/src/tests.rs index fa4ab40667..abe218153e 100644 --- a/vm/devices/net/net_backend/src/tests.rs +++ b/vm/devices/net/net_backend/src/tests.rs @@ -75,3 +75,10 @@ impl BufferAccess for Bufs { *self.inner.rx_metadata[id.0 as usize].lock() = Some(*metadata); } } + +impl Bufs { + /// Returns the [`RxMetadata`] written for the given receive buffer, if any. + pub fn rx_metadata(&self, id: RxId) -> Option { + *self.inner.rx_metadata[id.0 as usize].lock() + } +} diff --git a/vm/devices/net/net_consomme/src/lib.rs b/vm/devices/net/net_consomme/src/lib.rs index 9375f07132..4b7cca5185 100644 --- a/vm/devices/net/net_consomme/src/lib.rs +++ b/vm/devices/net/net_consomme/src/lib.rs @@ -522,6 +522,7 @@ impl consomme::Client for Client<'_> { } else { L4Protocol::Unknown }, + vlan: None, }, data, ); diff --git a/vm/devices/net/net_mana/Cargo.toml b/vm/devices/net/net_mana/Cargo.toml index c8870908e2..43421182d1 100644 --- a/vm/devices/net/net_mana/Cargo.toml +++ b/vm/devices/net/net_mana/Cargo.toml @@ -32,6 +32,7 @@ zerocopy.workspace = true [dev-dependencies] chipset_device.workspace = true gdma.workspace = true +parking_lot.workspace = true pci_core.workspace = true test_with_tracing.workspace = true user_driver_emulated_mock.workspace = true diff --git a/vm/devices/net/net_mana/src/lib.rs b/vm/devices/net/net_mana/src/lib.rs index 4483ea19c7..41ff233959 100644 --- a/vm/devices/net/net_mana/src/lib.rs +++ b/vm/devices/net/net_mana/src/lib.rs @@ -58,6 +58,7 @@ use net_backend::TxId; use net_backend::TxOffloadSupport; use net_backend::TxSegment; use net_backend::TxSegmentType; +use net_backend::VlanMetadata; use pal_async::task::Spawn; use safeatomic::AtomicSliceOps; use std::collections::VecDeque; @@ -965,6 +966,11 @@ impl Queue for ManaQueue { } else { (L4Protocol::Unknown, RxChecksumState::Unknown) }; + let vlantag = rx_oob.flags.rx_vlantag_present().then(|| VlanMetadata { + drop_eligible_indicator: false, + priority: 0, + vlan_id: rx_oob.flags.rx_vlan_id() as u16, + }); let len = rx_oob.ppi[0].pkt_len.into(); pool.write_header( rx.id, @@ -974,6 +980,7 @@ impl Queue for ManaQueue { ip_checksum, l4_checksum, l4_protocol, + vlan: vlantag, }, ); if rx.bounced_len_with_padding > 0 { @@ -1163,10 +1170,16 @@ impl ManaQueue { .set_comp_tcp_csum(meta.flags.offload_tcp_checksum()); oob.s_oob .set_comp_udp_csum(meta.flags.offload_udp_checksum()); - if meta.flags.offload_tcp_checksum() { + if meta.flags.offload_tcp_checksum() || meta.flags.offload_udp_checksum() { oob.s_oob.set_trans_off(meta.l2_len as u16 + meta.l3_len); } - let short_format = self.vp_offset <= 0xff; + if let Some(vlan) = &meta.vlan { + oob.l_oob.set_inject_vlan_pri_tag(true); + oob.l_oob.set_vlan_id(vlan.vlan_id); + oob.l_oob.set_pcp(vlan.priority); + oob.l_oob.set_dei(vlan.drop_eligible_indicator); + } + let short_format = self.vp_offset <= 0xff && meta.vlan.is_none(); if short_format { oob.s_oob.set_pkt_fmt(MANA_SHORT_PKT_FMT); oob.s_oob.set_short_vp_offset(self.vp_offset as u8); diff --git a/vm/devices/net/net_mana/src/test.rs b/vm/devices/net/net_mana/src/test.rs index b41fc70ce2..ddf8a671f7 100644 --- a/vm/devices/net/net_mana/src/test.rs +++ b/vm/devices/net/net_mana/src/test.rs @@ -7,9 +7,11 @@ use crate::GuestDmaMode; use crate::ManaEndpoint; use crate::ManaTestConfiguration; use crate::QueueStats; +use async_trait::async_trait; use chipset_device::mmio::ExternallyManagedMmioIntercepts; use gdma::VportConfig; use gdma_defs::bnic::ManaQueryDeviceCfgResp; +use inspect::InspectMut; use inspect_counters::Counter; use mana_driver::mana::ManaDevice; use mesh::CancelContext; @@ -24,7 +26,11 @@ use net_backend::loopback::LoopbackEndpoint; use pal_async::DefaultDriver; use pal_async::async_test; use pci_core::msi::MsiConnection; +use std::collections::VecDeque; use std::future::poll_fn; +use std::sync::Arc; +use std::task::Context; +use std::task::Poll; use std::time::Duration; use test_with_tracing::test; use user_driver_emulated_mock::DeviceTestMemory; @@ -879,6 +885,49 @@ fn build_tx_segments( } } +/// Like [`build_tx_segments`] but with 802.1Q VLAN tagging enabled. +fn build_tx_segments_vlan( + packet_len: usize, + num_segments: usize, + vlan_id: u16, + pkt_builder: &mut TxPacketBuilder, +) { + assert_eq!(packet_len % num_segments, 0); + let tx_id = 1; + let segment_len = packet_len / num_segments; + let tx_metadata = net_backend::TxMetadata { + id: TxId(tx_id), + segment_count: num_segments as u8, + len: packet_len as u32, + l2_len: 18, // Ethernet header (with VLAN) + l3_len: 20, // IPv4 header + l4_len: 20, // TCP header + max_segment_size: 1460, // Typical MSS for Ethernet + vlan: Some(net_backend::VlanMetadata { + priority: 0, + drop_eligible_indicator: false, + vlan_id, + }), + ..Default::default() + }; + + let mut gpa = pkt_builder.data_len(); + pkt_builder.push(TxSegment { + ty: net_backend::TxSegmentType::Head(tx_metadata.clone()), + gpa, + len: segment_len as u32, + }); + + for _ in 0..(num_segments - 1) { + gpa += segment_len as u64; + pkt_builder.push(TxSegment { + ty: net_backend::TxSegmentType::Tail, + gpa, + len: segment_len as u32, + }); + } +} + async fn test_endpoint( driver: DefaultDriver, dma_mode: GuestDmaMode, @@ -1038,3 +1087,422 @@ fn get_queue_stats(queue_stats: Option<&dyn net_backend::BackendQueueStats>) -> ..Default::default() } } + +// --------------------------------------------------------------------------- +// VLAN-preserving loopback endpoint +// --------------------------------------------------------------------------- + +/// Shared state capturing TX metadata from the GDMA emulator, accessible to +/// test assertions. +#[derive(Clone)] +struct VlanTestState { + inner: Arc>, +} + +#[derive(Default)] +struct VlanTestStateInner { + /// TX metadata captured from each transmitted packet, in order. + tx_metadata: Vec, +} + +impl VlanTestState { + fn new() -> Self { + Self { + inner: Arc::new(parking_lot::Mutex::new(VlanTestStateInner::default())), + } + } + + fn tx_metadata(&self) -> Vec { + self.inner.lock().tx_metadata.clone() + } +} + +/// A loopback endpoint that preserves VLAN metadata from TX → RX and captures +/// every transmitted packet's [`TxMetadata`] for later inspection. +struct VlanPreservingEndpoint { + state: VlanTestState, +} + +impl VlanPreservingEndpoint { + fn new(state: VlanTestState) -> Self { + Self { state } + } +} + +struct VlanPreservingQueue { + rx_avail: VecDeque, + rx_done: VecDeque, + state: VlanTestState, +} + +#[async_trait] +impl Endpoint for VlanPreservingEndpoint { + fn endpoint_type(&self) -> &'static str { + "vlan_loopback" + } + + async fn get_queues( + &mut self, + config: Vec, + _rss: Option<&net_backend::RssConfig<'_>>, + queues: &mut Vec>, + ) -> anyhow::Result<()> { + queues.extend(config.into_iter().map(|_config| { + Box::new(VlanPreservingQueue { + rx_avail: VecDeque::new(), + rx_done: VecDeque::new(), + state: self.state.clone(), + }) as _ + })); + Ok(()) + } + + async fn stop(&mut self) {} + + fn is_ordered(&self) -> bool { + true + } + + fn multiqueue_support(&self) -> net_backend::MultiQueueSupport { + net_backend::MultiQueueSupport { + max_queues: u16::MAX, + indirection_table_size: 64, + } + } +} + +impl InspectMut for VlanPreservingEndpoint { + fn inspect_mut(&mut self, _req: inspect::Request<'_>) {} +} + +impl InspectMut for VlanPreservingQueue { + fn inspect_mut(&mut self, _req: inspect::Request<'_>) {} +} + +impl net_backend::Queue for VlanPreservingQueue { + fn poll_ready(&mut self, _cx: &mut Context<'_>, _pool: &mut dyn BufferAccess) -> Poll<()> { + if self.rx_done.is_empty() { + Poll::Pending + } else { + Poll::Ready(()) + } + } + + fn rx_avail(&mut self, _pool: &mut dyn BufferAccess, done: &[RxId]) { + self.rx_avail.extend(done); + } + + fn rx_poll( + &mut self, + _pool: &mut dyn BufferAccess, + packets: &mut [RxId], + ) -> anyhow::Result { + let n = packets.len().min(self.rx_done.len()); + for (d, s) in packets.iter_mut().zip(self.rx_done.drain(..n)) { + *d = s; + } + Ok(n) + } + + fn tx_avail( + &mut self, + pool: &mut dyn BufferAccess, + mut segments: &[TxSegment], + ) -> anyhow::Result<(bool, usize)> { + let mut sent = 0; + while !segments.is_empty() && !self.rx_avail.is_empty() { + let (meta, _, _) = net_backend::next_packet(segments); + let vlan = meta.vlan; + { + let mut state = self.state.inner.lock(); + state.tx_metadata.push(meta.clone()); + } + let before = segments.len(); + let packet = net_backend::linearize(pool, &mut segments)?; + sent += before - segments.len(); + let rx_id = self.rx_avail.pop_front().unwrap(); + pool.write_packet( + rx_id, + &net_backend::RxMetadata { + offset: 0, + len: packet.len(), + vlan, + ..Default::default() + }, + &packet, + ); + self.rx_done.push_back(rx_id); + } + Ok((true, sent)) + } + + fn tx_poll( + &mut self, + _pool: &mut dyn BufferAccess, + _done: &mut [TxId], + ) -> Result { + Ok(0) + } +} + +/// Run a VLAN-aware send/receive test. Returns the captured TX metadata (from +/// the GDMA→backend boundary) and the per-buffer RX metadata (from the +/// net_mana→pool boundary), along with queue stats. +async fn test_vlan_endpoint( + driver: DefaultDriver, + dma_mode: GuestDmaMode, + pkt_builder: &TxPacketBuilder, + expected_num_send_packets: usize, + expected_num_received_packets: usize, +) -> ( + QueueStats, + Vec, + Vec>, +) { + let pages = 256; + let allow_dma = dma_mode == GuestDmaMode::DirectDma; + let mem: DeviceTestMemory = DeviceTestMemory::new(pages * 2, allow_dma, "test_vlan_endpoint"); + let payload_mem = mem.payload_mem(); + let data_to_send = pkt_builder.packet_data(); + let tx_segments = pkt_builder.segments(); + + let vlan_state = VlanTestState::new(); + let msi_conn = MsiConnection::new(); + let device = gdma::GdmaDevice::new( + &VmTaskDriverSource::new(SingleDriverBackend::new(driver.clone())), + mem.guest_memory(), + msi_conn.target(), + vec![VportConfig { + mac_address: [1, 2, 3, 4, 5, 6].into(), + endpoint: Box::new(VlanPreservingEndpoint::new(vlan_state.clone())), + }], + &mut ExternallyManagedMmioIntercepts, + ); + let device = EmulatedDevice::new(device, msi_conn, mem.dma_client()); + let dev_config = ManaQueryDeviceCfgResp { + pf_cap_flags1: 0.into(), + pf_cap_flags2: 0, + pf_cap_flags3: 0, + pf_cap_flags4: 0, + max_num_vports: 1, + reserved: 0, + max_num_eqs: 64, + adapter_mtu: 0, + reserved2: 0, + adapter_link_speed_mbps: 0, + }; + let thing = ManaDevice::new(&driver, device, 1, 1, None).await.unwrap(); + let vport = thing.new_vport(0, None, &dev_config).await.unwrap(); + let mut endpoint = ManaEndpoint::new(driver.clone(), vport, dma_mode).await; + let mut queues = Vec::new(); + let mut pool = net_backend::tests::Bufs::new(payload_mem.clone()); + endpoint + .get_queues( + vec![QueueConfig { + driver: Box::new(driver.clone()), + }], + None, + &mut queues, + ) + .await + .unwrap(); + + queues[0].rx_avail(&mut pool, &(1..128u32).map(RxId).collect::>()); + + payload_mem.write_at(0, &data_to_send).unwrap(); + + queues[0].tx_avail(&mut pool, tx_segments).unwrap(); + + let mut rx_packets = (0..expected_num_received_packets.max(2)) + .map(|i| RxId(i as u32)) + .collect::>(); + let mut rx_packets_n = 0; + let mut tx_done = vec![TxId(0); expected_num_send_packets.max(2)]; + let mut tx_done_n = 0; + + let done = |rx_n: usize, tx_n: usize| -> bool { + rx_n >= expected_num_received_packets && tx_n >= expected_num_send_packets + }; + + loop { + let mut context = CancelContext::new().with_timeout(Duration::from_secs(1)); + match context + .until_cancelled(poll_fn(|cx| queues[0].poll_ready(cx, &mut pool))) + .await + { + Err(CancelReason::DeadlineExceeded) => break, + Err(e) => { + tracing::error!(error = ?e, "Failed to poll queue ready"); + break; + } + _ => {} + } + rx_packets_n += queues[0] + .rx_poll(&mut pool, &mut rx_packets[rx_packets_n..]) + .unwrap(); + tx_done_n += queues[0] + .tx_poll(&mut pool, &mut tx_done[tx_done_n..]) + .unwrap_or(0); + if done(rx_packets_n, tx_done_n) { + break; + } + } + assert_eq!(rx_packets_n, expected_num_received_packets); + assert_eq!(tx_done_n, expected_num_send_packets); + + // Gather per-buffer RX metadata written by net_mana. + let rx_meta: Vec> = rx_packets[..rx_packets_n] + .iter() + .map(|id| pool.rx_metadata(*id)) + .collect(); + + let stats = get_queue_stats(queues[0].queue_stats()); + let captured_tx = vlan_state.tx_metadata(); + drop(queues); + endpoint.stop().await; + (stats, captured_tx, rx_meta) +} + +// --------------------------------------------------------------------------- +// VLAN tests +// --------------------------------------------------------------------------- + +/// Verify that a single VLAN-tagged packet round-trips through the MANA TX and +/// RX paths with the VLAN ID preserved. +#[async_test] +async fn test_vlan_tx_rx_roundtrip_direct_dma(driver: DefaultDriver) { + let mut pkt_builder = TxPacketBuilder::new(); + build_tx_segments_vlan(1138, 1, 42, &mut pkt_builder); + + let (stats, captured_tx, rx_meta) = test_vlan_endpoint( + driver, + GuestDmaMode::DirectDma, + &pkt_builder, + 1, // expected TX + 1, // expected RX + ) + .await; + + assert_eq!(stats.tx_packets.get(), 1); + assert_eq!(stats.rx_packets.get(), 1); + + // TX: GDMA decoded the OOB and the backend received VLAN metadata. + assert_eq!(captured_tx.len(), 1); + let tx_vlan = captured_tx[0].vlan.expect("TX metadata should carry VLAN"); + assert_eq!(tx_vlan.vlan_id, 42); + + // RX: net_mana parsed the CQE and surfaced the VLAN to the pool. + let rx = rx_meta[0].expect("RX metadata should be present"); + let rx_vlan = rx.vlan.expect("RX metadata should carry VLAN"); + assert_eq!(rx_vlan.vlan_id, 42); +} + +/// Same round-trip but with bounce-buffer DMA mode. +#[async_test] +async fn test_vlan_tx_rx_roundtrip_bounce_buffer(driver: DefaultDriver) { + let mut pkt_builder = TxPacketBuilder::new(); + build_tx_segments_vlan(1138, 1, 99, &mut pkt_builder); + + let (stats, captured_tx, rx_meta) = + test_vlan_endpoint(driver, GuestDmaMode::BounceBuffer, &pkt_builder, 1, 1).await; + + assert_eq!(stats.tx_packets.get(), 1); + assert_eq!(stats.rx_packets.get(), 1); + + let tx_vlan = captured_tx[0].vlan.expect("TX metadata should carry VLAN"); + assert_eq!(tx_vlan.vlan_id, 99); + + let rx_vlan = rx_meta[0] + .expect("RX metadata should be present") + .vlan + .expect("RX metadata should carry VLAN"); + assert_eq!(rx_vlan.vlan_id, 99); +} + +/// Verify that a non-VLAN packet does NOT produce VLAN metadata. +#[async_test] +async fn test_no_vlan_rx_metadata_when_untagged(driver: DefaultDriver) { + let mut pkt_builder = TxPacketBuilder::new(); + build_tx_segments(1138, 1, false, &mut pkt_builder); + + let (_stats, captured_tx, rx_meta) = + test_vlan_endpoint(driver, GuestDmaMode::DirectDma, &pkt_builder, 1, 1).await; + + assert!( + captured_tx[0].vlan.is_none(), + "TX metadata must not carry VLAN for an untagged packet" + ); + + let rx = rx_meta[0].expect("RX metadata should be present"); + assert!( + rx.vlan.is_none(), + "RX metadata must not carry VLAN for an untagged packet" + ); +} + +/// Mix of VLAN-tagged and untagged packets in a single TX batch. +#[async_test] +async fn test_vlan_mixed_batch(driver: DefaultDriver) { + let mut pkt_builder = TxPacketBuilder::new(); + + // Packet 0: no VLAN + build_tx_segments(550, 1, false, &mut pkt_builder); + // Packet 1: VLAN 100 + build_tx_segments_vlan(550, 1, 100, &mut pkt_builder); + // Packet 2: no VLAN, multi-segment + build_tx_segments(1130, 10, false, &mut pkt_builder); + // Packet 3: VLAN 4094 (max 12-bit value) + build_tx_segments_vlan(550, 1, 4094, &mut pkt_builder); + + let (stats, captured_tx, rx_meta) = + test_vlan_endpoint(driver, GuestDmaMode::DirectDma, &pkt_builder, 4, 4).await; + + assert_eq!(stats.tx_packets.get(), 4); + assert_eq!(stats.rx_packets.get(), 4); + + // Packet 0: no VLAN + assert!(captured_tx[0].vlan.is_none()); + assert!( + rx_meta[0] + .expect("RX metadata should be present") + .vlan + .is_none() + ); + + // Packet 1: VLAN 100 + assert_eq!( + captured_tx[1].vlan.expect("TX should carry VLAN").vlan_id, + 100 + ); + assert_eq!( + rx_meta[1] + .expect("RX metadata should be present") + .vlan + .expect("RX should carry VLAN") + .vlan_id, + 100 + ); + + // Packet 2: no VLAN + assert!(captured_tx[2].vlan.is_none()); + assert!( + rx_meta[2] + .expect("RX metadata should be present") + .vlan + .is_none() + ); + + // Packet 3: VLAN 4094 + assert_eq!( + captured_tx[3].vlan.expect("TX should carry VLAN").vlan_id, + 4094 + ); + assert_eq!( + rx_meta[3] + .expect("RX metadata should be present") + .vlan + .expect("RX should carry VLAN") + .vlan_id, + 4094 + ); +} diff --git a/vm/devices/net/net_tap/src/lib.rs b/vm/devices/net/net_tap/src/lib.rs index da9bcb69c6..6afba1d559 100644 --- a/vm/devices/net/net_tap/src/lib.rs +++ b/vm/devices/net/net_tap/src/lib.rs @@ -520,6 +520,7 @@ fn parse_vnet_hdr(hdr: &VirtioNetHdr) -> RxMetadata { ip_checksum, l4_checksum, l4_protocol, + vlan: None, } } diff --git a/vm/devices/net/net_tap/tests/tap_tests.rs b/vm/devices/net/net_tap/tests/tap_tests.rs index 7f275f0281..424b39c88d 100644 --- a/vm/devices/net/net_tap/tests/tap_tests.rs +++ b/vm/devices/net/net_tap/tests/tap_tests.rs @@ -572,7 +572,9 @@ mod tap_tests { l2_len: 14, l3_len: 20, l4_len: 20, + transport_header_offset: 34, max_segment_size: 1460, + vlan: None, }), gpa: 0, len: frame_len, diff --git a/vm/devices/net/netvsp/src/buffers.rs b/vm/devices/net/netvsp/src/buffers.rs index 68a381a5e7..669f97df94 100644 --- a/vm/devices/net/netvsp/src/buffers.rs +++ b/vm/devices/net/netvsp/src/buffers.rs @@ -150,16 +150,16 @@ impl BufferAccess for BufferPool { struct Header { header: rndisprot::MessageHeader, packet: rndisprot::Packet, - per_packet_info: PerPacketInfo, } #[repr(C)] #[derive(zerocopy::IntoBytes, Immutable, KnownLayout, Debug)] struct PerPacketInfo { header: rndisprot::PerPacketInfo, - checksum: rndisprot::RxTcpIpChecksumInfo, + payload: u32, } + let mut ppi_count = 1; let checksum = rndisprot::RxTcpIpChecksumInfo::new_zeroed() .set_ip_checksum_failed(metadata.ip_checksum == RxChecksumState::Bad) .set_ip_checksum_succeeded(metadata.ip_checksum.is_valid()) @@ -184,6 +184,32 @@ impl BufferAccess for BufferPool { .set_udp_checksum_succeeded( metadata.l4_protocol == L4Protocol::Udp && metadata.l4_checksum.is_valid(), ); + let checksum_ppi = PerPacketInfo { + header: rndisprot::PerPacketInfo { + size: size_of::() as u32, + typ: rndisprot::PPI_TCP_IP_CHECKSUM, + per_packet_information_offset: size_of::() as u32, + }, + payload: checksum.0, + }; + + let vlan = if let Some(vlan_info) = metadata.vlan { + ppi_count += 1; + Some(PerPacketInfo { + header: rndisprot::PerPacketInfo { + size: size_of::() as u32, + typ: rndisprot::PPI_VLAN, + per_packet_information_offset: size_of::() as u32, + }, + payload: rndisprot::EthVlanInfo::new_zeroed() + .set_priority(vlan_info.priority) + .set_drop_eligible_indicator(vlan_info.drop_eligible_indicator) + .set_vlan_id(vlan_info.vlan_id) + .0, + }) + } else { + None + }; let header = Header { header: rndisprot::MessageHeader { @@ -202,21 +228,20 @@ impl BufferAccess for BufferPool { oob_data_length: 0, num_oob_data_elements: 0, per_packet_info_offset: size_of::() as u32, - per_packet_info_length: size_of::() as u32, + per_packet_info_length: ppi_count * size_of::() as u32, vc_handle: 0, reserved: 0, }, - per_packet_info: PerPacketInfo { - header: rndisprot::PerPacketInfo { - size: size_of::() as u32, - typ: rndisprot::PPI_TCP_IP_CHECKSUM, - per_packet_information_offset: size_of::() as u32, - }, - checksum, - }, }; - self.buffers.write_at(self.offset(id), header.as_bytes()); + let mut offset = self.offset(id); + self.buffers.write_at(offset, header.as_bytes()); + offset += size_of::
() as u32; + self.buffers.write_at(offset, checksum_ppi.as_bytes()); + offset += size_of::() as u32; + if let Some(vlan_ppi) = vlan { + self.buffers.write_at(offset, vlan_ppi.as_bytes()); + } } } diff --git a/vm/devices/net/netvsp/src/lib.rs b/vm/devices/net/netvsp/src/lib.rs index a5c2410306..5d900d9047 100644 --- a/vm/devices/net/netvsp/src/lib.rs +++ b/vm/devices/net/netvsp/src/lib.rs @@ -513,6 +513,8 @@ struct QueueStats { tx_lso_packets: Counter, tx_checksum_packets: Counter, tx_invalid_lso_packets: Counter, + rx_vlan_packets: Counter, + tx_vlan_packets: Counter, tx_packets_per_wake: Histogram<10>, rx_packets_per_wake: Histogram<10>, } @@ -2549,23 +2551,7 @@ impl NetChannel { .set_offload_ip_header_checksum(n.is_ipv4() && n.ip_header_checksum()); metadata.flags.set_is_ipv4(n.is_ipv4()); metadata.flags.set_is_ipv6(n.is_ipv6() && !n.is_ipv4()); - metadata.l2_len = ETHERNET_HEADER_LEN as u8; - if metadata.flags.offload_tcp_checksum() - || metadata.flags.offload_udp_checksum() - { - metadata.l3_len = if n.tcp_header_offset() >= metadata.l2_len as u16 { - n.tcp_header_offset() - metadata.l2_len as u16 - } else if n.is_ipv4() { - let mut reader = data.clone().reader(mem); - reader.skip(metadata.l2_len as usize)?; - let mut b = 0; - reader.read(std::slice::from_mut(&mut b))?; - (b as u16 >> 4) * 4 - } else { - // Hope there are no extensions. - 40 - }; - } + metadata.transport_header_offset = n.tcp_header_offset(); } rndisprot::PPI_LSO => { let n: rndisprot::TcpLsoInfo = d.reader(mem).read_plain()?; @@ -2575,37 +2561,75 @@ impl NetChannel { metadata.flags.set_offload_ip_header_checksum(n.is_ipv4()); metadata.flags.set_is_ipv4(n.is_ipv4()); metadata.flags.set_is_ipv6(n.is_ipv6() && !n.is_ipv4()); - metadata.l2_len = ETHERNET_HEADER_LEN as u8; - if n.tcp_header_offset() < metadata.l2_len as u16 { - return Err(WorkerError::InvalidTcpHeaderOffset(n.tcp_header_offset())); - } - metadata.l3_len = n.tcp_header_offset() - metadata.l2_len as u16; - // Offset of `Data Offset` field in the TCP header (byte 12) - const TCP_DOFF_BYTE_OFFSET: u32 = 12; - let tcp_hdr_doff_offset = - u32::from(n.tcp_header_offset()) + TCP_DOFF_BYTE_OFFSET; - // Validate TCP header Data Offset 4 bit nibble within the packet data bounds. - if tcp_hdr_doff_offset >= request.data_length { - return Err(WorkerError::InvalidTcpHeaderOffset(n.tcp_header_offset())); - } - metadata.l4_len = { - let mut reader = data.clone().reader(mem); - reader.skip(tcp_hdr_doff_offset as usize)?; - let mut b = 0; - reader.read(std::slice::from_mut(&mut b))?; - (b >> 4) * 4 - }; metadata.max_segment_size = n.mss() as u16; + metadata.transport_header_offset = n.tcp_header_offset(); + } + rndisprot::PPI_VLAN => { + let n: rndisprot::EthVlanInfo = d.reader(mem).read_plain()?; - if request.data_length >= rndisprot::LSO_MAX_OFFLOAD_SIZE { - // Not strictly enforced. - stats.tx_invalid_lso_packets.increment(); - } + metadata.vlan = Some(net_backend::VlanMetadata { + priority: n.priority(), + drop_eligible_indicator: n.drop_eligible_indicator(), + vlan_id: n.vlan_id(), + }); + stats.tx_vlan_packets.increment(); } _ => {} } ppi = rest; } + + metadata.l2_len = if metadata.vlan.is_some() { + net_backend::ETHERNET_VLAN_HEADER_LEN + } else { + net_backend::ETHERNET_HEADER_LEN + } as u8; + + if metadata.flags.offload_tcp_checksum() || metadata.flags.offload_udp_checksum() { + // The offset must be set if we're handling checksums; we already know from the above logic + // that the L4 checksum-type will match the L4 protocol. + if (metadata.transport_header_offset < metadata.l2_len as u16) + || (metadata.flags.is_ipv4() + && metadata.transport_header_offset + < (metadata.l2_len as u16 + net_backend::IPV4_MIN_HEADER_LEN)) + || (metadata.flags.is_ipv6() + && metadata.transport_header_offset + < (metadata.l2_len as u16 + net_backend::IPV6_MIN_HEADER_LEN)) + || (metadata.transport_header_offset as u32 >= request.data_length) + { + return Err(WorkerError::InvalidTcpHeaderOffset( + metadata.transport_header_offset, + )); + } + + metadata.l3_len = metadata.transport_header_offset - metadata.l2_len as u16; + } + + if metadata.flags.offload_tcp_segmentation() { + const TCP_DOFF_BYTE_OFFSET: u32 = 12; + let tcp_hdr_doff_offset = + u32::from(metadata.transport_header_offset) + TCP_DOFF_BYTE_OFFSET; + // Validate TCP header Data Offset 4 bit nibble within the packet data bounds. + if tcp_hdr_doff_offset >= request.data_length { + return Err(WorkerError::InvalidTcpHeaderOffset( + metadata.transport_header_offset, + )); + } + metadata.l4_len = { + let mut reader = data.clone().reader(mem); + reader.skip(tcp_hdr_doff_offset as usize)?; + let mut b = 0; + reader.read(std::slice::from_mut(&mut b))?; + (b >> 4) * 4 + }; + + if request.data_length >= rndisprot::LSO_MAX_OFFLOAD_SIZE { + // Not strictly enforced. + stats.tx_invalid_lso_packets.increment(); + } + } + + // TODO: USO support is not present. } let start = segments.len(); @@ -2627,6 +2651,9 @@ impl NetChannel { if metadata.flags.offload_tcp_segmentation() { stats.tx_lso_packets.increment(); } + if metadata.vlan.is_some() { + stats.tx_vlan_packets.increment(); + } segments[start].ty = net_backend::TxSegmentType::Head(metadata); @@ -2775,6 +2802,8 @@ impl NetChannel { error = &err as &dyn std::error::Error, "Failed to notify guest that data path is now synthetic" ); + } else { + tracing::info!("Switched data path to synthetic") } } @@ -3266,8 +3295,6 @@ const DEFAULT_MTU: u32 = 1514; const MIN_MTU: u32 = DEFAULT_MTU; const MAX_MTU: u32 = 9216; -const ETHERNET_HEADER_LEN: u32 = 14; - impl Adapter { fn get_guest_vf_serial_number(&self, vfid: u32) -> u32 { if let Some(guest_os_id) = self.get_guest_os_id.as_ref().map(|f| f()) { @@ -3387,7 +3414,7 @@ impl Adapter { rndisprot::Oid::OID_GEN_MAXIMUM_LOOKAHEAD | rndisprot::Oid::OID_GEN_CURRENT_LOOKAHEAD | rndisprot::Oid::OID_GEN_MAXIMUM_FRAME_SIZE => { - let len: u32 = buffers.ndis_config.mtu - ETHERNET_HEADER_LEN; + let len: u32 = buffers.ndis_config.mtu - net_backend::ETHERNET_HEADER_LEN; writer.write(len.as_bytes())?; } rndisprot::Oid::OID_GEN_MAXIMUM_TOTAL_SIZE @@ -3486,10 +3513,10 @@ impl Adapter { }, ipv4_enabled: rndisprot::NDIS_OFFLOAD_SUPPORTED, ipv4_encapsulation_type: rndisprot::NDIS_ENCAPSULATION_IEEE_802_3, - ipv4_header_size: ETHERNET_HEADER_LEN, + ipv4_header_size: net_backend::ETHERNET_HEADER_LEN, ipv6_enabled: rndisprot::NDIS_OFFLOAD_SUPPORTED, ipv6_encapsulation_type: rndisprot::NDIS_ENCAPSULATION_IEEE_802_3, - ipv6_header_size: ETHERNET_HEADER_LEN, + ipv6_header_size: net_backend::ETHERNET_HEADER_LEN, } .as_bytes()[..rndisprot::NDIS_SIZEOF_OFFLOAD_ENCAPSULATION_REVISION_1], )?; @@ -3719,13 +3746,13 @@ impl Adapter { )?; if encap.ipv4_enabled == rndisprot::NDIS_OFFLOAD_SET_ON && (encap.ipv4_encapsulation_type != rndisprot::NDIS_ENCAPSULATION_IEEE_802_3 - || encap.ipv4_header_size != ETHERNET_HEADER_LEN) + || encap.ipv4_header_size != net_backend::ETHERNET_HEADER_LEN) { return Err(OidError::NotSupported("ipv4 encap")); } if encap.ipv6_enabled == rndisprot::NDIS_OFFLOAD_SET_ON && (encap.ipv6_encapsulation_type != rndisprot::NDIS_ENCAPSULATION_IEEE_802_3 - || encap.ipv6_header_size != ETHERNET_HEADER_LEN) + || encap.ipv6_header_size != net_backend::ETHERNET_HEADER_LEN) { return Err(OidError::NotSupported("ipv6 encap")); } diff --git a/vm/devices/net/netvsp/src/rndisprot.rs b/vm/devices/net/netvsp/src/rndisprot.rs index 1ca93e8b88..56a6e4f5be 100644 --- a/vm/devices/net/netvsp/src/rndisprot.rs +++ b/vm/devices/net/netvsp/src/rndisprot.rs @@ -709,8 +709,47 @@ impl TcpLsoInfo { } } +#[repr(C)] +#[derive(Debug, Copy, Clone, IntoBytes, Immutable, KnownLayout, FromBytes)] +pub struct EthVlanInfo(pub u32); + +impl EthVlanInfo { + /// priority is a 3-bit field, any bits outside the lower portion of the low + /// nybble are ignored + pub fn set_priority(mut self, priority: u8) -> Self { + self.0 = (self.0 & !0x7) | (priority as u32 & 0x7); + self + } + + pub fn priority(&self) -> u8 { + (self.0 as u8) & 0x7 + } + + /// In practical use this should always be false, but who knows? + pub fn set_drop_eligible_indicator(mut self, indicator: bool) -> Self { + self.0 = (self.0 & !0x8) | if indicator { 0x8 } else { 0x0 }; + self + } + + pub fn drop_eligible_indicator(&self) -> bool { + self.0 & 0x8 != 0 + } + + /// VLAN IDs are 12 bits. This will silently reject any bits outside of + /// the range. + pub fn set_vlan_id(mut self, vlan_id: u16) -> Self { + self.0 = (self.0 & !0xFFF0) | ((vlan_id as u32 & 0xFFF) << 4); + self + } + + pub fn vlan_id(&self) -> u16 { + (self.0 >> 4) as u16 & 0xfff + } +} + pub const PPI_TCP_IP_CHECKSUM: u32 = 0; pub const PPI_LSO: u32 = 2; +pub const PPI_VLAN: u32 = 6; // // Format of Information buffer passed in a SetRequest for the OID diff --git a/vm/devices/net/netvsp/src/test.rs b/vm/devices/net/netvsp/src/test.rs index 56996b1528..2a2cf7e4d2 100644 --- a/vm/devices/net/netvsp/src/test.rs +++ b/vm/devices/net/netvsp/src/test.rs @@ -29,10 +29,12 @@ use net_backend::BufferAccess; use net_backend::DisconnectableEndpoint; use net_backend::Endpoint; use net_backend::EndpointAction; +use net_backend::L4Protocol; use net_backend::MultiQueueSupport; use net_backend::Queue as NetQueue; use net_backend::QueueConfig; -use net_backend::RxBufferSegment; +use net_backend::RxChecksumState; +use net_backend::RxMetadata; use net_backend::TxError; use net_backend::TxOffloadSupport; use net_backend::null::NullEndpoint; @@ -140,11 +142,12 @@ struct TestNicEndpointState { pub vf_state: Option, pub stop_endpoint_counter: usize, pub link_status_updater: Option>>, - pub queues: Vec>>, + pub queues: Vec, RxMetadata)>>, /// When true (default), `TestNicQueue::tx_avail` returns `(true, N)` so /// TX packets are completed synchronously. When false it returns /// `(false, N)`, leaving packets in-flight. pub sync_tx: bool, + pub tx_metadata: Vec, } impl TestNicEndpointState { @@ -158,6 +161,7 @@ impl TestNicEndpointState { link_status_updater: None, queues: Vec::new(), sync_tx: true, + tx_metadata: Vec::new(), })) } @@ -167,6 +171,20 @@ impl TestNicEndpointState { let status_vec = link_status.iter().copied().collect::>(); link_status_updater.send(status_vec); } + + /// Send an RX packet on the given queue with default (no offload) metadata. + pub fn send_rx(&self, queue_idx: usize, data: Vec) { + let metadata = RxMetadata { + len: data.len(), + ..Default::default() + }; + self.queues[queue_idx].send((data, metadata)); + } + + /// Send an RX packet on the given queue with explicit metadata. + pub fn send_rx_with_metadata(&self, queue_idx: usize, data: Vec, metadata: RxMetadata) { + self.queues[queue_idx].send((data, metadata)); + } } struct TestNicEndpointInner { @@ -244,7 +262,12 @@ impl net_backend::Endpoint for TestNicEndpoint { .into_iter() .map(|config| { let (tx, rx) = mesh::channel(); - queues.push(Box::new(TestNicQueue::new(config, rx, sync_tx))); + queues.push(Box::new(TestNicQueue::new( + config, + rx, + sync_tx, + inner.endpoint_state.clone(), + ))); tx }) .collect::>(); @@ -336,21 +359,27 @@ struct TestNicQueue { #[inspect(skip)] rx_ids: VecDeque, #[inspect(skip)] - rx: mesh::Receiver>, - next_rx_packet: Option>, - sync_tx: bool, + rx: mesh::Receiver<(Vec, RxMetadata)>, + #[inspect(skip)] + endpoint_state: Option>>, #[inspect(skip)] - scratch_segments: Vec, + next_rx_packet: Option<(Vec, RxMetadata)>, + sync_tx: bool, } impl TestNicQueue { - pub fn new(_config: QueueConfig, rx: mesh::Receiver>, sync_tx: bool) -> Self { + pub fn new( + _config: QueueConfig, + rx: mesh::Receiver<(Vec, RxMetadata)>, + sync_tx: bool, + endpoint_state: Option>>, + ) -> Self { Self { rx_ids: VecDeque::new(), rx, + endpoint_state, next_rx_packet: None, sync_tx, - scratch_segments: Vec::new(), } } } @@ -392,29 +421,24 @@ impl NetQueue for TestNicQueue { self.next_rx_packet = self.rx.try_recv().ok(); } - if let Some(packet) = self.next_rx_packet.take() { - let len = packet.len(); - assert!(len > 0); + if let Some((packet, metadata)) = self.next_rx_packet.take() { + assert!(!packet.is_empty(), "test RX packets must not be empty"); + assert_eq!( + metadata.len, + packet.len(), + "RxMetadata.len must match actual packet length" + ); let rx_id = self.rx_ids.pop_front().unwrap(); - tracing::info!(rx_id = rx_id.0, ?packet, "returning packet on receive path"); - let mut packet = &packet[..]; - self.scratch_segments.clear(); - pool.push_guest_addresses(rx_id, &mut self.scratch_segments); - let guest_memory = pool.guest_memory(); - for seg in &self.scratch_segments { - // N.B. The packet data is written after the implicit header, - // which is 256 bytes long. The header can be written with - // self.pool.write_header(...) if desired. - let write_len = packet.len().min(seg.len as usize); - tracing::info!(seg.gpa, write_len, "writing packet to guest memory"); - guest_memory - .write_at(seg.gpa, &packet[..write_len]) - .unwrap(); - packet = &packet[write_len..]; - if packet.is_empty() { - break; - } - } + assert!( + packet.len() <= pool.capacity(rx_id) as usize, + "test RX packet exceeds buffer capacity" + ); + tracing::info!( + rx_id = rx_id.0, + len = packet.len(), + "returning packet on receive path" + ); + pool.write_packet(rx_id, &metadata, &packet); packets[0] = rx_id; Ok(1) } else { @@ -427,6 +451,18 @@ impl NetQueue for TestNicQueue { _pool: &mut dyn BufferAccess, packets: &[TxSegment], ) -> anyhow::Result<(bool, usize)> { + if let Some(endpoint_state) = &self.endpoint_state { + let mut endpoint_state = endpoint_state.lock(); + endpoint_state + .tx_metadata + .extend(packets.iter().filter_map(|packet| { + if let net_backend::TxSegmentType::Head(metadata) = &packet.ty { + Some(metadata.clone()) + } else { + None + } + })); + } Ok((self.sync_tx, packets.len())) } @@ -1526,6 +1562,143 @@ impl<'a> TestNicChannel<'a> { self.transaction_id += 1; } + pub async fn send_rndis_packet_offload_with_vlan( + &mut self, + data: &[u8], + tcp_checksum: bool, + udp_checksum: bool, + lso: bool, + vlan_info: rndisprot::EthVlanInfo, + ) { + let mem = self.nic.mock_vmbus.memory.clone(); + let gpadl_view = self.gpadl_map.clone().view().map(self.send_buf_id).unwrap(); + let mut buf_writer = PagedRanges::new(&*gpadl_view).writer(&mem); + + assert!(lso || tcp_checksum || udp_checksum); + let per_packet_info_offset = size_of::() as u32; + let mut per_packet_info_length = 0u32; + if tcp_checksum || udp_checksum { + per_packet_info_length += size_of::() as u32 + + size_of::() as u32; + assert!(!lso); + } + if lso { + per_packet_info_length += size_of::() as u32 + + size_of::() as u32; + assert!(!(tcp_checksum || udp_checksum)); + } + per_packet_info_length += size_of::() as u32 + + size_of::() as u32; + + let message_length = size_of::() + + size_of::() + + per_packet_info_length as usize + + data.len(); + + buf_writer + .write( + rndisprot::MessageHeader { + message_type: rndisprot::MESSAGE_TYPE_PACKET_MSG, + message_length: message_length as u32, + } + .as_bytes(), + ) + .unwrap(); + + let packet = rndisprot::Packet { + data_offset: per_packet_info_offset + per_packet_info_length, + data_length: data.len() as u32, + oob_data_offset: 0, + oob_data_length: 0, + num_oob_data_elements: 0, + per_packet_info_offset, + per_packet_info_length, + vc_handle: 0, + reserved: 0, + }; + + buf_writer.write(packet.as_bytes()).unwrap(); + + const VLAN_TCP_HEADER_OFFSET: u16 = 38; // Ethernet (18) + IPv4 (20) + if tcp_checksum || udp_checksum { + let checksum_info = rndisprot::TxTcpIpChecksumInfo::new_zeroed() + .set_is_ipv4(true) + .set_tcp_checksum(tcp_checksum) + .set_udp_checksum(udp_checksum) + .set_ip_header_checksum(true) + .set_tcp_header_offset(VLAN_TCP_HEADER_OFFSET); + + buf_writer + .write( + rndisprot::PerPacketInfo { + size: size_of::() as u32 + + size_of::() as u32, + typ: rndisprot::PPI_TCP_IP_CHECKSUM, + per_packet_information_offset: size_of::() as u32, + } + .as_bytes(), + ) + .unwrap(); + buf_writer.write(checksum_info.as_bytes()).unwrap(); + } + + if lso { + const NORMAL_MTU: u32 = 1460; + let lso_info = + rndisprot::TcpLsoInfo(NORMAL_MTU | ((VLAN_TCP_HEADER_OFFSET as u32) << 20)); + + buf_writer + .write( + rndisprot::PerPacketInfo { + size: size_of::() as u32 + + size_of::() as u32, + typ: rndisprot::PPI_LSO, + per_packet_information_offset: size_of::() as u32, + } + .as_bytes(), + ) + .unwrap(); + buf_writer.write(lso_info.as_bytes()).unwrap(); + } + + buf_writer + .write( + rndisprot::PerPacketInfo { + size: size_of::() as u32 + + size_of::() as u32, + typ: rndisprot::PPI_VLAN, + per_packet_information_offset: size_of::() as u32, + } + .as_bytes(), + ) + .unwrap(); + buf_writer.write(vlan_info.as_bytes()).unwrap(); + + buf_writer.write(data).unwrap(); + + let message = NvspMessage { + header: protocol::MessageHeader { + message_type: protocol::MESSAGE1_TYPE_SEND_RNDIS_PACKET, + }, + data: protocol::Message1SendRndisPacket { + channel_type: protocol::DATA_CHANNEL_TYPE, + send_buffer_section_index: 0xffffffff, + send_buffer_section_size: 0, + }, + padding: &[], + }; + + let gpadl_map_view = self.gpadl_map.clone().view().map(self.send_buf_id).unwrap(); + let gpa_range = gpadl_map_view.first().unwrap().subrange(0, message_length); + self.write(OutgoingPacket { + transaction_id: self.transaction_id, + packet_type: OutgoingPacketType::GpaDirect(&[gpa_range]), + payload: &message.payload(), + }) + .await; + self.transaction_id += 1; + } + pub async fn connect_subchannel(&mut self, idx: u32) { self.subchannels .insert(idx, self.nic.connect_vmbus_subchannel(idx).await); @@ -1667,6 +1840,83 @@ impl RndisMessageParser { assert!(reader.skip(RX_HEADER_LEN).is_ok()); reader.read_plain::().unwrap() } + + /// Parse the per-packet info (PPI) entries from an RX data message. + /// Walks the PPI chain using the Packet header's offset/length fields, + /// matching each entry by type. + pub fn parse_rx_ppi(&self, external_ranges: &MultiPagedRangeBuf) -> RxPpiInfo { + let mut reader = PagedRanges::new(external_ranges.iter()).reader(&self.mem); + // Skip the MessageHeader to read the Packet struct. + assert!(reader.skip(size_of::()).is_ok()); + let packet: rndisprot::Packet = reader.read_plain().unwrap(); + + let ppi_offset = packet.per_packet_info_offset as usize; + let ppi_length = packet.per_packet_info_length as usize; + + if ppi_length == 0 { + return RxPpiInfo::default(); + } + + // Seek to the PPI area (relative to after MessageHeader). + let mut reader = PagedRanges::new(external_ranges.iter()).reader(&self.mem); + let ppi_start = size_of::() + ppi_offset; + assert!(reader.skip(ppi_start).is_ok()); + + let mut ppi_bytes = vec![0u8; ppi_length]; + reader.read(&mut ppi_bytes).unwrap(); + + let mut result = RxPpiInfo::default(); + let mut offset = 0usize; + while offset < ppi_length { + assert!( + offset + size_of::() <= ppi_length, + "PPI header extends past PPI region" + ); + let header = rndisprot::PerPacketInfo::read_from_prefix(&ppi_bytes[offset..]) + .unwrap() + .0; + assert!( + header.size as usize >= size_of::(), + "PPI entry size too small" + ); + assert!( + offset + header.size as usize <= ppi_length, + "PPI entry extends past PPI region" + ); + + let payload_start = offset + header.per_packet_information_offset as usize; + assert!( + payload_start + 4 <= offset + header.size as usize, + "PPI offset results in invalid reads" + ); + match header.typ { + rndisprot::PPI_TCP_IP_CHECKSUM => { + let value = u32::read_from_prefix(&ppi_bytes[payload_start..]) + .unwrap() + .0; + result.checksum = Some(rndisprot::RxTcpIpChecksumInfo(value)); + } + rndisprot::PPI_VLAN => { + let value = u32::read_from_prefix(&ppi_bytes[payload_start..]) + .unwrap() + .0; + result.vlan = Some(rndisprot::EthVlanInfo(value)); + } + _ => { + // Unknown PPI type — skip. + } + } + offset += header.size as usize; + } + result + } +} + +/// Parsed per-packet info from an RX RNDIS message. +#[derive(Default, Debug)] +struct RxPpiInfo { + pub checksum: Option, + pub vlan: Option, } enum TestVirtualFunctionStateChange { @@ -4248,8 +4498,8 @@ async fn send_rndis_set_packet_filter(driver: DefaultDriver) { // Send a packet on every queue. { let locked_state = endpoint_state.lock(); - for (idx, queue) in locked_state.queues.iter().enumerate() { - queue.send(vec![idx as u8]); + for idx in 0..locked_state.queues.len() { + locked_state.send_rx(idx, vec![idx as u8]); } } @@ -4288,8 +4538,8 @@ async fn send_rndis_set_packet_filter(driver: DefaultDriver) { // Send a packet on every queue. { let locked_state = endpoint_state.lock(); - for (idx, queue) in locked_state.queues.iter().enumerate() { - queue.send(vec![idx as u8]); + for idx in 0..locked_state.queues.len() { + locked_state.send_rx(idx, vec![idx as u8]); } } @@ -4357,8 +4607,8 @@ async fn send_rndis_set_packet_filter(driver: DefaultDriver) { // Test sending packets with the filter set to None. for _ in 0..2 { let locked_state = endpoint_state.lock(); - for (idx, queue) in locked_state.queues.iter().enumerate() { - queue.send(vec![idx as u8]); + for idx in 0..locked_state.queues.len() { + locked_state.send_rx(idx, vec![idx as u8]); } } @@ -5143,8 +5393,8 @@ async fn set_rss_parameter_bufs_not_evenly_divisible(driver: DefaultDriver) { // Receive a packet on every queue. { let locked_state = endpoint_state.lock(); - for (idx, queue) in locked_state.queues.iter().enumerate() { - queue.send(vec![idx as u8]); + for idx in 0..locked_state.queues.len() { + locked_state.send_rx(idx, vec![idx as u8]); } } @@ -5752,6 +6002,446 @@ async fn rndis_send_tcp_checksum_packet(driver: DefaultDriver) { assert_eq!(completion.status, protocol::Status::SUCCESS); } +fn build_vlan_ipv4_tcp_packet(vlan_id: u16) -> Vec { + let mut data = vec![0u8; 60]; + + data[..6].copy_from_slice(&[0x10, 0x11, 0x12, 0x13, 0x14, 0x15]); + data[6..12].copy_from_slice(&[0x20, 0x21, 0x22, 0x23, 0x24, 0x25]); + data[12..14].copy_from_slice(&0x8100u16.to_be_bytes()); + data[14..16].copy_from_slice(&(vlan_id & 0x0fff).to_be_bytes()); + data[16..18].copy_from_slice(&0x0800u16.to_be_bytes()); + + data[18] = 0x45; // IPv4, 20-byte header + data[20..22].copy_from_slice(&(42u16).to_be_bytes()); + data[26] = 64; // TTL + data[27] = 6; // TCP + + data[38 + 12] = 0x50; // TCP data offset = 5 (20 bytes) + + data +} + +#[async_test] +async fn rndis_send_tcp_checksum_packet_with_vlan_ppi(driver: DefaultDriver) { + let endpoint_state = TestNicEndpointState::new(); + let endpoint = TestNicEndpoint::new(Some(endpoint_state.clone())); + let builder = Nic::builder(); + let nic = builder.build( + &VmTaskDriverSource::new(SingleDriverBackend::new(driver.clone())), + Guid::new_random(), + Box::new(endpoint), + [1, 2, 3, 4, 5, 6].into(), + 0, + ); + + let mut nic = TestNicDevice::new_with_nic(&driver, nic).await; + nic.start_vmbus_channel(); + let mut channel = nic.connect_vmbus_channel().await; + channel + .initialize(0, protocol::NdisConfigCapabilities::new()) + .await; + channel + .send_rndis_control_message( + rndisprot::MESSAGE_TYPE_INITIALIZE_MSG, + rndisprot::InitializeRequest { + request_id: 123, + major_version: rndisprot::MAJOR_VERSION, + minor_version: rndisprot::MINOR_VERSION, + max_transfer_size: 0, + }, + &[], + ) + .await; + + let initialize_complete: rndisprot::InitializeComplete = channel + .read_rndis_control_message(rndisprot::MESSAGE_TYPE_INITIALIZE_CMPLT) + .await + .unwrap(); + assert_eq!(initialize_complete.request_id, 123); + assert_eq!(initialize_complete.status, rndisprot::STATUS_SUCCESS); + + let data = build_vlan_ipv4_tcp_packet(37); + let vlan_info = rndisprot::EthVlanInfo(37u32 << 4); + channel + .send_rndis_packet_offload_with_vlan(&data, true, false, false, vlan_info) + .await; + + let completion = channel.read_rndis_packet_complete_message().await.unwrap(); + assert_eq!(completion.status, protocol::Status::SUCCESS); + + let metadata = endpoint_state + .lock() + .tx_metadata + .last() + .cloned() + .expect("packet metadata should be captured"); + assert!(metadata.flags.offload_tcp_checksum()); + assert!(metadata.flags.offload_ip_header_checksum()); + assert!(metadata.flags.is_ipv4()); + assert_eq!( + metadata.l2_len, 18, + "VLAN-tagged packets must use an 18-byte L2 header" + ); + assert_eq!( + metadata.l3_len, 20, + "VLAN-tagged IPv4 packets must keep a 20-byte L3 header" + ); +} + +#[async_test] +async fn rndis_send_lso_packet_with_vlan_ppi(driver: DefaultDriver) { + let endpoint_state = TestNicEndpointState::new(); + let endpoint = TestNicEndpoint::new(Some(endpoint_state.clone())); + let builder = Nic::builder(); + let nic = builder.build( + &VmTaskDriverSource::new(SingleDriverBackend::new(driver.clone())), + Guid::new_random(), + Box::new(endpoint), + [1, 2, 3, 4, 5, 6].into(), + 0, + ); + + let mut nic = TestNicDevice::new_with_nic(&driver, nic).await; + nic.start_vmbus_channel(); + let mut channel = nic.connect_vmbus_channel().await; + channel + .initialize(0, protocol::NdisConfigCapabilities::new()) + .await; + channel + .send_rndis_control_message( + rndisprot::MESSAGE_TYPE_INITIALIZE_MSG, + rndisprot::InitializeRequest { + request_id: 123, + major_version: rndisprot::MAJOR_VERSION, + minor_version: rndisprot::MINOR_VERSION, + max_transfer_size: 0, + }, + &[], + ) + .await; + + let initialize_complete: rndisprot::InitializeComplete = channel + .read_rndis_control_message(rndisprot::MESSAGE_TYPE_INITIALIZE_CMPLT) + .await + .unwrap(); + assert_eq!(initialize_complete.request_id, 123); + assert_eq!(initialize_complete.status, rndisprot::STATUS_SUCCESS); + + let data = build_vlan_ipv4_tcp_packet(91); + let vlan_info = rndisprot::EthVlanInfo(91u32 << 4); + channel + .send_rndis_packet_offload_with_vlan(&data, false, false, true, vlan_info) + .await; + + let completion = channel.read_rndis_packet_complete_message().await.unwrap(); + assert_eq!(completion.status, protocol::Status::SUCCESS); + + let metadata = endpoint_state + .lock() + .tx_metadata + .last() + .cloned() + .expect("packet metadata should be captured"); + assert!(metadata.flags.offload_tcp_segmentation()); + assert!(metadata.flags.offload_tcp_checksum()); + assert!(metadata.flags.offload_ip_header_checksum()); + assert!(metadata.flags.is_ipv4()); + assert_eq!( + metadata.l2_len, 18, + "VLAN-tagged packets must use an 18-byte L2 header" + ); + assert_eq!( + metadata.l3_len, 20, + "VLAN-tagged IPv4 packets must keep a 20-byte L3 header" + ); + assert_eq!(metadata.max_segment_size, 1460); +} + +/// Helper to initialize RNDIS and set the packet filter on a channel so +/// that RX packets will be delivered to the guest. +async fn initialize_rndis_for_rx(channel: &mut TestNicChannel<'_>) { + channel + .send_rndis_control_message( + rndisprot::MESSAGE_TYPE_INITIALIZE_MSG, + rndisprot::InitializeRequest { + request_id: 1, + major_version: rndisprot::MAJOR_VERSION, + minor_version: rndisprot::MINOR_VERSION, + max_transfer_size: 0, + }, + &[], + ) + .await; + + let init_complete: rndisprot::InitializeComplete = channel + .read_rndis_control_message(rndisprot::MESSAGE_TYPE_INITIALIZE_CMPLT) + .await + .unwrap(); + assert_eq!(init_complete.status, rndisprot::STATUS_SUCCESS); + + // Set packet filter so RX packets are delivered to the guest. + channel + .send_rndis_control_message( + rndisprot::MESSAGE_TYPE_SET_MSG, + rndisprot::SetRequest { + request_id: 2, + oid: rndisprot::Oid::OID_GEN_CURRENT_PACKET_FILTER, + information_buffer_length: size_of::() as u32, + information_buffer_offset: size_of::() as u32, + device_vc_handle: 0, + }, + &rndisprot::NPROTO_PACKET_FILTER.to_le_bytes(), + ) + .await; + + let set_complete: rndisprot::SetComplete = channel + .read_rndis_control_message(rndisprot::MESSAGE_TYPE_SET_CMPLT) + .await + .unwrap(); + assert_eq!(set_complete.status, rndisprot::STATUS_SUCCESS); +} + +/// Helper to inject an RX packet on queue 0, read it from the guest channel, +/// parse the RNDIS PPI, and complete the transfer. +async fn inject_and_parse_rx( + channel: &mut TestNicChannel<'_>, + endpoint_state: &Arc>, + parser: &RndisMessageParser, + data: Vec, + metadata: RxMetadata, +) -> RxPpiInfo { + { + let locked_state = endpoint_state.lock(); + locked_state.send_rx_with_metadata(0, data, metadata); + } + + let (ppi, txid) = channel + .read_with(|packet| match packet { + IncomingPacket::Data(data) => { + let (_, external_ranges) = parser.parse_data_message(data); + let ppi = parser.parse_rx_ppi(&external_ranges); + let txid = data + .transaction_id() + .expect("data packets should have txid"); + (ppi, txid) + } + _ => panic!("Unexpected packet type on RX"), + }) + .await + .expect("RX data packet"); + + // Complete the transfer so the buffer is returned. + channel + .write(OutgoingPacket { + transaction_id: txid, + packet_type: OutgoingPacketType::Completion, + payload: &NvspMessage { + header: protocol::MessageHeader { + message_type: protocol::MESSAGE1_TYPE_SEND_RNDIS_PACKET_COMPLETE, + }, + data: protocol::Message1SendRndisPacketComplete { + status: protocol::Status::SUCCESS, + }, + padding: &[], + } + .payload(), + }) + .await; + + ppi +} + +#[async_test] +async fn rndis_rx_vlan_packet(driver: DefaultDriver) { + let endpoint_state = TestNicEndpointState::new(); + let endpoint = TestNicEndpoint::new(Some(endpoint_state.clone())); + let nic = Nic::builder().build( + &VmTaskDriverSource::new(SingleDriverBackend::new(driver.clone())), + Guid::new_random(), + Box::new(endpoint), + [1, 2, 3, 4, 5, 6].into(), + 0, + ); + + let mut nic = TestNicDevice::new_with_nic(&driver, nic).await; + nic.start_vmbus_channel(); + let mut channel = nic.connect_vmbus_channel().await; + channel + .initialize(0, protocol::NdisConfigCapabilities::new()) + .await; + initialize_rndis_for_rx(&mut channel).await; + + let parser = channel.rndis_message_parser(); + let data = vec![0xAA; 60]; + let metadata = RxMetadata { + len: data.len(), + vlan: Some(net_backend::VlanMetadata { + priority: 5, + drop_eligible_indicator: true, + vlan_id: 100, + }), + ..Default::default() + }; + + let ppi = inject_and_parse_rx(&mut channel, &endpoint_state, &parser, data, metadata).await; + + let vlan = ppi.vlan.expect("VLAN PPI should be present"); + assert_eq!(vlan.vlan_id(), 100); + assert_eq!(vlan.priority(), 5); + assert_eq!(vlan.drop_eligible_indicator(), true); + // Checksum PPI should also be present (always emitted). + assert!( + ppi.checksum.is_some(), + "checksum PPI should always be present" + ); +} + +#[async_test] +async fn rndis_rx_vlan_packet_with_tcp_checksum(driver: DefaultDriver) { + let endpoint_state = TestNicEndpointState::new(); + let endpoint = TestNicEndpoint::new(Some(endpoint_state.clone())); + let nic = Nic::builder().build( + &VmTaskDriverSource::new(SingleDriverBackend::new(driver.clone())), + Guid::new_random(), + Box::new(endpoint), + [1, 2, 3, 4, 5, 6].into(), + 0, + ); + + let mut nic = TestNicDevice::new_with_nic(&driver, nic).await; + nic.start_vmbus_channel(); + let mut channel = nic.connect_vmbus_channel().await; + channel + .initialize(0, protocol::NdisConfigCapabilities::new()) + .await; + initialize_rndis_for_rx(&mut channel).await; + + let parser = channel.rndis_message_parser(); + let data = vec![0xBB; 60]; + let metadata = RxMetadata { + len: data.len(), + ip_checksum: RxChecksumState::Good, + l4_checksum: RxChecksumState::Good, + l4_protocol: L4Protocol::Tcp, + vlan: Some(net_backend::VlanMetadata { + priority: 3, + drop_eligible_indicator: false, + vlan_id: 42, + }), + ..Default::default() + }; + + let ppi = inject_and_parse_rx(&mut channel, &endpoint_state, &parser, data, metadata).await; + + // Verify VLAN PPI. + let vlan = ppi.vlan.expect("VLAN PPI should be present"); + assert_eq!(vlan.vlan_id(), 42); + assert_eq!(vlan.priority(), 3); + assert_eq!(vlan.drop_eligible_indicator(), false); + + // Verify checksum PPI reports TCP checksum succeeded. + let csum = ppi.checksum.expect("checksum PPI should be present"); + assert!(csum.tcp_checksum_succeeded()); + assert!(csum.ip_checksum_succeeded()); + assert!(!csum.tcp_checksum_failed()); +} + +#[async_test] +async fn rndis_rx_packet_no_vlan(driver: DefaultDriver) { + let endpoint_state = TestNicEndpointState::new(); + let endpoint = TestNicEndpoint::new(Some(endpoint_state.clone())); + let nic = Nic::builder().build( + &VmTaskDriverSource::new(SingleDriverBackend::new(driver.clone())), + Guid::new_random(), + Box::new(endpoint), + [1, 2, 3, 4, 5, 6].into(), + 0, + ); + + let mut nic = TestNicDevice::new_with_nic(&driver, nic).await; + nic.start_vmbus_channel(); + let mut channel = nic.connect_vmbus_channel().await; + channel + .initialize(0, protocol::NdisConfigCapabilities::new()) + .await; + initialize_rndis_for_rx(&mut channel).await; + + let parser = channel.rndis_message_parser(); + let data = vec![0xCC; 60]; + let metadata = RxMetadata { + len: data.len(), + ..Default::default() + }; + + let ppi = inject_and_parse_rx(&mut channel, &endpoint_state, &parser, data, metadata).await; + + assert!( + ppi.vlan.is_none(), + "VLAN PPI should not be present when no VLAN metadata is set" + ); + assert!( + ppi.checksum.is_some(), + "checksum PPI should always be present" + ); +} + +#[async_test] +async fn rndis_rx_vlan_preserves_packet_data(driver: DefaultDriver) { + let endpoint_state = TestNicEndpointState::new(); + let endpoint = TestNicEndpoint::new(Some(endpoint_state.clone())); + let nic = Nic::builder().build( + &VmTaskDriverSource::new(SingleDriverBackend::new(driver.clone())), + Guid::new_random(), + Box::new(endpoint), + [1, 2, 3, 4, 5, 6].into(), + 0, + ); + + let mut nic = TestNicDevice::new_with_nic(&driver, nic).await; + nic.start_vmbus_channel(); + let mut channel = nic.connect_vmbus_channel().await; + channel + .initialize(0, protocol::NdisConfigCapabilities::new()) + .await; + initialize_rndis_for_rx(&mut channel).await; + + let parser = channel.rndis_message_parser(); + let data = vec![0xDD; 60]; + let metadata = RxMetadata { + len: data.len(), + vlan: Some(net_backend::VlanMetadata { + priority: 7, + drop_eligible_indicator: false, + vlan_id: 4094, + }), + ..Default::default() + }; + + { + let locked_state = endpoint_state.lock(); + locked_state.send_rx_with_metadata(0, data.clone(), metadata); + } + + channel + .read_with(|packet| match packet { + IncomingPacket::Data(data_packet) => { + let (_, external_ranges) = parser.parse_data_message(data_packet); + // Verify the packet data is intact after the 256-byte RNDIS header. + let received: [u8; 60] = parser.get_data_packet_content(&external_ranges); + assert_eq!(&received[..], &data[..], "packet data should be preserved"); + + // Also verify the VLAN PPI. + let ppi = parser.parse_rx_ppi(&external_ranges); + let vlan = ppi.vlan.expect("VLAN PPI should be present"); + assert_eq!(vlan.vlan_id(), 4094); + assert_eq!(vlan.priority(), 7); + } + _ => panic!("Unexpected packet type on RX"), + }) + .await + .expect("RX data packet"); +} + /// Helper: builds an RSS-enable parameter block that the set_rss_parameter /// OID path accepts. fn build_rss_enable_params() -> Vec { diff --git a/vmm_tests/vmm_tests/tests/tests/multiarch.rs b/vmm_tests/vmm_tests/tests/tests/multiarch.rs index 0c06842fa6..d77f780298 100644 --- a/vmm_tests/vmm_tests/tests/tests/multiarch.rs +++ b/vmm_tests/vmm_tests/tests/tests/multiarch.rs @@ -32,6 +32,8 @@ mod openhcl_servicing; mod pcie; /// Tests involving TPM functionality mod tpm; +/// Tests for VLAN (802.1Q) support on virtual NICs. +mod vlan; /// Tests of vmbus relay functionality. mod vmbus_relay; /// Tests involving VMGS functionality diff --git a/vmm_tests/vmm_tests/tests/tests/multiarch/vlan.rs b/vmm_tests/vmm_tests/tests/tests/multiarch/vlan.rs new file mode 100644 index 0000000000..ce55667155 --- /dev/null +++ b/vmm_tests/vmm_tests/tests/tests/multiarch/vlan.rs @@ -0,0 +1,154 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Tests for VLAN (802.1Q) sub-interface configuration on virtual NICs. +//! +//! These tests validate that guest operating systems can create and configure +//! VLAN sub-interfaces on the VMM's synthetic NIC (netvsp). They exercise +//! the guest driver's VLAN support and verify the TX path with VLAN PPI +//! metadata does not error or crash. +//! +//! **Scope:** These tests validate guest-side VLAN configuration and TX smoke +//! behavior. Full end-to-end VLAN datapath validation (verifying that the VMM +//! backend correctly processes VLAN-tagged traffic) would require a +//! VLAN-aware backend; the current consomme backend ignores VLAN metadata. +//! Unit-level VLAN PPI parsing is covered by `netvsp/src/test.rs`. + +use anyhow::Context; +use petri::PetriVmBuilder; +use petri::openvmm::NIC_MAC_ADDRESS; +use petri::openvmm::OpenVmmPetriBackend; +use petri::pipette::cmd; +use pipette_client::shell::UnixShell; +use vmm_test_macros::openvmm_test; + +/// Find the network interface matching [`NIC_MAC_ADDRESS`] by scanning sysfs. +async fn find_nic_by_mac(sh: &UnixShell<'_>) -> anyhow::Result { + let expected_mac = NIC_MAC_ADDRESS.to_string().replace('-', ":"); + let ifaces = cmd!(sh, "ls /sys/class/net").read().await?; + for iface in ifaces.lines() { + let iface = iface.trim(); + if iface.is_empty() { + continue; + } + let addr_path = format!("/sys/class/net/{iface}/address"); + if let Ok(mac) = cmd!(sh, "cat {addr_path}").read().await { + if mac.trim() == expected_mac { + return Ok(iface.to_string()); + } + } + } + anyhow::bail!("no interface found with MAC address {expected_mac}") +} + +/// Test VLAN sub-interface creation and configuration on the guest NIC. +/// +/// Validates that the guest can: +/// 1. Create an 802.1Q VLAN sub-interface on the synthetic NIC +/// 2. Configure it with a specific VLAN ID, IP address, and bring it up +/// 3. Transmit packets through it (TX smoke test via ARP/ping) +/// 4. Maintain the parent interface in operational state throughout +/// +/// The TX smoke step exercises the netvsp VLAN PPI (Per-Packet Information) +/// path: the guest's netvsc driver emits VLAN metadata that netvsp extracts +/// into `TxMetadata`. The ping itself is expected to fail because the +/// consomme backend does not route VLAN-tagged traffic, but the TX operation +/// must not error or crash. +#[openvmm_test( + uefi_x64(vhd(ubuntu_2504_server_x64)), + uefi_aarch64(vhd(ubuntu_2404_server_aarch64)) +)] +async fn vlan_guest_config(config: PetriVmBuilder) -> anyhow::Result<()> { + let (vm, agent) = config.modify_backend(|c| c.with_nic()).run().await?; + let sh = agent.unix_shell(); + + // Find the NIC interface by its known MAC address. + let nic_name = find_nic_by_mac(&sh).await?; + tracing::info!(nic_name, "found NIC interface"); + + // Ensure the parent interface is up. + cmd!(sh, "ip link set {nic_name} up").run().await?; + + // Load the 8021q kernel module for VLAN support. This is a no-op if the + // module is already loaded or built into the kernel. + cmd!(sh, "modprobe 8021q").run().await?; + + // Create a VLAN sub-interface with VLAN ID 100. + let vlan_id = "100"; + let vlan_iface = format!("{nic_name}.{vlan_id}"); + cmd!( + sh, + "ip link add link {nic_name} name {vlan_iface} type vlan id {vlan_id}" + ) + .run() + .await?; + + // Verify the VLAN interface was created with correct 802.1Q configuration. + let vlan_info = cmd!(sh, "ip -d link show {vlan_iface}").read().await?; + tracing::info!(vlan_info, "VLAN interface details"); + assert!( + vlan_info.contains("vlan protocol 802.1Q"), + "interface should use 802.1Q VLAN protocol, got: {vlan_info}" + ); + assert!( + vlan_info.contains(&format!("id {vlan_id}")), + "VLAN ID should be {vlan_id}, got: {vlan_info}" + ); + + // Configure the VLAN interface with an IP address and bring it up. + cmd!(sh, "ip addr add 10.100.0.2/24 dev {vlan_iface}") + .run() + .await?; + cmd!(sh, "ip link set {vlan_iface} up").run().await?; + + // Verify the VLAN interface is up. + let link_brief = cmd!(sh, "ip -br link show {vlan_iface}").read().await?; + tracing::info!(link_brief, "VLAN interface link state"); + assert!( + link_brief.contains("UP"), + "VLAN interface should be in UP state, got: {link_brief}" + ); + + // Verify the IP address was assigned. + let addr_info = cmd!(sh, "ip -br addr show {vlan_iface}").read().await?; + assert!( + addr_info.contains("10.100.0.2"), + "VLAN interface should have the assigned IP address, got: {addr_info}" + ); + + // TX smoke test: send traffic through the VLAN interface. This exercises + // the netvsc → netvsp path with VLAN PPI metadata. The ping will fail + // (consomme doesn't handle VLAN-tagged ARP), but the TX must not crash. + let _ = cmd!(sh, "ping -I {vlan_iface} -c 1 -W 2 10.100.0.1") + .read() + .await; + + // Verify that at least one packet was transmitted through the VLAN + // interface (the ARP request for the ping target). + let tx_packets = cmd!(sh, "cat /sys/class/net/{vlan_iface}/statistics/tx_packets") + .read() + .await?; + let tx_count: u64 = tx_packets + .trim() + .parse() + .context("failed to parse tx_packets")?; + tracing::info!(tx_count, "TX packets through VLAN interface"); + assert!( + tx_count > 0, + "expected at least one TX packet through the VLAN interface" + ); + + // Verify the parent interface is still operational. + let parent_state = cmd!(sh, "ip -br link show {nic_name}").read().await?; + assert!( + parent_state.contains("UP"), + "parent interface should remain UP after VLAN operations, got: {parent_state}" + ); + + // Clean up: remove the VLAN interface. + cmd!(sh, "ip link del {vlan_iface}").run().await?; + + agent.power_off().await?; + vm.wait_for_clean_teardown().await?; + Ok(()) +}