From c07d8e9d4607fbc5e7263c9761c0c0602b7d7e3e Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Tue, 21 Apr 2026 15:52:09 -0700 Subject: [PATCH 01/19] Initial steps --- vm/devices/net/net_backend/src/lib.rs | 14 +- vm/devices/net/netvsp/src/lib.rs | 11 + vm/devices/net/netvsp/src/rndisprot.rs | 19 ++ vm/devices/net/netvsp/src/test.rs | 323 ++++++++++++++++++++++++- 4 files changed, 363 insertions(+), 4 deletions(-) diff --git a/vm/devices/net/net_backend/src/lib.rs b/vm/devices/net/net_backend/src/lib.rs index fb838beb61..2d057d7554 100644 --- a/vm/devices/net/net_backend/src/lib.rs +++ b/vm/devices/net/net_backend/src/lib.rs @@ -400,6 +400,12 @@ pub struct TxMetadata { /// Only guaranteed to be set if [`TxFlags::offload_tcp_segmentation`] or /// [`TxFlags::offload_udp_segmentation`] is set. pub max_segment_size: u16, + /// Priority for 802.1Q. Actually a 3-bit value. + pub priority: u8, + /// This should be 0. + pub canonical_format_id: u8, + /// The 802.1Q ID for this transmission. Actually a 12-bit value. + pub vlan_id: u16, } /// Flags affecting transmit behavior. @@ -430,8 +436,9 @@ pub struct TxFlags { /// Offload UDP segmentation (USO), allowing UDP packets larger than the /// MTU. `l2_len`, `l3_len`, and `max_segment_size` must be set. pub offload_udp_segmentation: bool, - #[bits(1)] - _reserved: u8, + /// 802.1Q VLAN support is enabled. Expect/use values in `priority`, + /// `canonical_format_id`, and `vlan_id`. + pub vlan_enabled: bool, } impl Default for TxMetadata { @@ -445,6 +452,9 @@ impl Default for TxMetadata { l3_len: 0, l4_len: 0, max_segment_size: 0, + priority: 0, + canonical_format_id: 0, + vlan_id: 0 } } } diff --git a/vm/devices/net/netvsp/src/lib.rs b/vm/devices/net/netvsp/src/lib.rs index a5c2410306..fa354776a9 100644 --- a/vm/devices/net/netvsp/src/lib.rs +++ b/vm/devices/net/netvsp/src/lib.rs @@ -2602,6 +2602,16 @@ impl NetChannel { stats.tx_invalid_lso_packets.increment(); } } + rndisprot::PPI_VLAN => { + let n: rndisprot::EthVlanInfo = d.reader(mem).read_plain()?; + + metadata.flags.set_vlan_enabled(true); + metadata.priority = n.priority(); + metadata.canonical_format_id = n.canonical_format_id(); + metadata.vlan_id = n.vlan_id(); + metadata.l2_len = ETHERNET_VLAN_HEADER_LEN as u8; + + } _ => {} } ppi = rest; @@ -3267,6 +3277,7 @@ const MIN_MTU: u32 = DEFAULT_MTU; const MAX_MTU: u32 = 9216; const ETHERNET_HEADER_LEN: u32 = 14; +const ETHERNET_VLAN_HEADER_LEN: u32 = 18; impl Adapter { fn get_guest_vf_serial_number(&self, vfid: u32) -> u32 { diff --git a/vm/devices/net/netvsp/src/rndisprot.rs b/vm/devices/net/netvsp/src/rndisprot.rs index 1ca93e8b88..bd99ffc6c4 100644 --- a/vm/devices/net/netvsp/src/rndisprot.rs +++ b/vm/devices/net/netvsp/src/rndisprot.rs @@ -709,8 +709,27 @@ impl TcpLsoInfo { } } +#[repr(C)] +#[derive(Debug, Copy, Clone, IntoBytes, Immutable, KnownLayout, FromBytes)] +pub struct EthVlanInfo(pub u32); + +impl EthVlanInfo { + pub fn priority(self) -> u8 { + (self.0 as u8) & 0x3 + } + + pub fn canonical_format_id(self) -> u8 { + (self.0 >> 3) as u8 & 0x1 + } + + pub fn vlan_id(self) -> u16 { + (self.0 >> 4) as u16 & 0xfff + } +} + pub const PPI_TCP_IP_CHECKSUM: u32 = 0; pub const PPI_LSO: u32 = 2; +pub const PPI_VLAN: u32 = 6; // // Format of Information buffer passed in a SetRequest for the OID diff --git a/vm/devices/net/netvsp/src/test.rs b/vm/devices/net/netvsp/src/test.rs index 56996b1528..8d0c4dcb31 100644 --- a/vm/devices/net/netvsp/src/test.rs +++ b/vm/devices/net/netvsp/src/test.rs @@ -145,6 +145,7 @@ struct TestNicEndpointState { /// TX packets are completed synchronously. When false it returns /// `(false, N)`, leaving packets in-flight. pub sync_tx: bool, + pub tx_metadata: Vec, } impl TestNicEndpointState { @@ -158,6 +159,7 @@ impl TestNicEndpointState { link_status_updater: None, queues: Vec::new(), sync_tx: true, + tx_metadata: Vec::new(), })) } @@ -244,7 +246,12 @@ impl net_backend::Endpoint for TestNicEndpoint { .into_iter() .map(|config| { let (tx, rx) = mesh::channel(); - queues.push(Box::new(TestNicQueue::new(config, rx, sync_tx))); + queues.push(Box::new(TestNicQueue::new( + config, + rx, + sync_tx, + inner.endpoint_state.clone(), + ))); tx }) .collect::>(); @@ -337,6 +344,8 @@ struct TestNicQueue { rx_ids: VecDeque, #[inspect(skip)] rx: mesh::Receiver>, + #[inspect(skip)] + endpoint_state: Option>>, next_rx_packet: Option>, sync_tx: bool, #[inspect(skip)] @@ -344,10 +353,16 @@ struct TestNicQueue { } impl TestNicQueue { - pub fn new(_config: QueueConfig, rx: mesh::Receiver>, sync_tx: bool) -> Self { + pub fn new( + _config: QueueConfig, + rx: mesh::Receiver>, + sync_tx: bool, + endpoint_state: Option>>, + ) -> Self { Self { rx_ids: VecDeque::new(), rx, + endpoint_state, next_rx_packet: None, sync_tx, scratch_segments: Vec::new(), @@ -427,6 +442,18 @@ impl NetQueue for TestNicQueue { _pool: &mut dyn BufferAccess, packets: &[TxSegment], ) -> anyhow::Result<(bool, usize)> { + if let Some(endpoint_state) = &self.endpoint_state { + let mut endpoint_state = endpoint_state.lock(); + endpoint_state + .tx_metadata + .extend(packets.iter().filter_map(|packet| { + if let net_backend::TxSegmentType::Head(metadata) = &packet.ty { + Some(metadata.clone()) + } else { + None + } + })); + } Ok((self.sync_tx, packets.len())) } @@ -1526,6 +1553,143 @@ impl<'a> TestNicChannel<'a> { self.transaction_id += 1; } + pub async fn send_rndis_packet_offload_with_vlan( + &mut self, + data: &[u8], + tcp_checksum: bool, + udp_checksum: bool, + lso: bool, + vlan_info: rndisprot::EthVlanInfo, + ) { + let mem = self.nic.mock_vmbus.memory.clone(); + let gpadl_view = self.gpadl_map.clone().view().map(self.send_buf_id).unwrap(); + let mut buf_writer = PagedRanges::new(&*gpadl_view).writer(&mem); + + assert!(lso || tcp_checksum || udp_checksum); + let per_packet_info_offset = size_of::() as u32; + let mut per_packet_info_length = 0u32; + if tcp_checksum || udp_checksum { + per_packet_info_length += size_of::() as u32 + + size_of::() as u32; + assert!(!lso); + } + if lso { + per_packet_info_length += size_of::() as u32 + + size_of::() as u32; + assert!(!(tcp_checksum || udp_checksum)); + } + per_packet_info_length += size_of::() as u32 + + size_of::() as u32; + + let message_length = size_of::() + + size_of::() + + per_packet_info_length as usize + + data.len(); + + buf_writer + .write( + rndisprot::MessageHeader { + message_type: rndisprot::MESSAGE_TYPE_PACKET_MSG, + message_length: message_length as u32, + } + .as_bytes(), + ) + .unwrap(); + + let packet = rndisprot::Packet { + data_offset: per_packet_info_offset + per_packet_info_length, + data_length: data.len() as u32, + oob_data_offset: 0, + oob_data_length: 0, + num_oob_data_elements: 0, + per_packet_info_offset, + per_packet_info_length, + vc_handle: 0, + reserved: 0, + }; + + buf_writer.write(packet.as_bytes()).unwrap(); + + const VLAN_TCP_HEADER_OFFSET: u16 = 38; // Ethernet (18) + IPv4 (20) + if tcp_checksum || udp_checksum { + let checksum_info = rndisprot::TxTcpIpChecksumInfo::new_zeroed() + .set_is_ipv4(true) + .set_tcp_checksum(tcp_checksum) + .set_udp_checksum(udp_checksum) + .set_ip_header_checksum(true) + .set_tcp_header_offset(VLAN_TCP_HEADER_OFFSET); + + buf_writer + .write( + rndisprot::PerPacketInfo { + size: size_of::() as u32 + + size_of::() as u32, + typ: rndisprot::PPI_TCP_IP_CHECKSUM, + per_packet_information_offset: size_of::() as u32, + } + .as_bytes(), + ) + .unwrap(); + buf_writer.write(checksum_info.as_bytes()).unwrap(); + } + + if lso { + const NORMAL_MTU: u32 = 1460; + let lso_info = + rndisprot::TcpLsoInfo(NORMAL_MTU | ((VLAN_TCP_HEADER_OFFSET as u32) << 20)); + + buf_writer + .write( + rndisprot::PerPacketInfo { + size: size_of::() as u32 + + size_of::() as u32, + typ: rndisprot::PPI_LSO, + per_packet_information_offset: size_of::() as u32, + } + .as_bytes(), + ) + .unwrap(); + buf_writer.write(lso_info.as_bytes()).unwrap(); + } + + buf_writer + .write( + rndisprot::PerPacketInfo { + size: size_of::() as u32 + + size_of::() as u32, + typ: rndisprot::PPI_VLAN, + per_packet_information_offset: size_of::() as u32, + } + .as_bytes(), + ) + .unwrap(); + buf_writer.write(vlan_info.as_bytes()).unwrap(); + + buf_writer.write(data).unwrap(); + + let message = NvspMessage { + header: protocol::MessageHeader { + message_type: protocol::MESSAGE1_TYPE_SEND_RNDIS_PACKET, + }, + data: protocol::Message1SendRndisPacket { + channel_type: protocol::DATA_CHANNEL_TYPE, + send_buffer_section_index: 0xffffffff, + send_buffer_section_size: 0, + }, + padding: &[], + }; + + let gpadl_map_view = self.gpadl_map.clone().view().map(self.send_buf_id).unwrap(); + let gpa_range = gpadl_map_view.first().unwrap().subrange(0, message_length); + self.write(OutgoingPacket { + transaction_id: self.transaction_id, + packet_type: OutgoingPacketType::GpaDirect(&[gpa_range]), + payload: &message.payload(), + }) + .await; + self.transaction_id += 1; + } + pub async fn connect_subchannel(&mut self, idx: u32) { self.subchannels .insert(idx, self.nic.connect_vmbus_subchannel(idx).await); @@ -5752,6 +5916,161 @@ async fn rndis_send_tcp_checksum_packet(driver: DefaultDriver) { assert_eq!(completion.status, protocol::Status::SUCCESS); } +fn build_vlan_ipv4_tcp_packet(vlan_id: u16) -> Vec { + let mut data = vec![0u8; 60]; + + data[..6].copy_from_slice(&[0x10, 0x11, 0x12, 0x13, 0x14, 0x15]); + data[6..12].copy_from_slice(&[0x20, 0x21, 0x22, 0x23, 0x24, 0x25]); + data[12..14].copy_from_slice(&0x8100u16.to_be_bytes()); + data[14..16].copy_from_slice(&(vlan_id & 0x0fff).to_be_bytes()); + data[16..18].copy_from_slice(&0x0800u16.to_be_bytes()); + + data[18] = 0x45; // IPv4, 20-byte header + data[20..22].copy_from_slice(&(42u16).to_be_bytes()); + data[26] = 64; // TTL + data[27] = 6; // TCP + + data[38 + 12] = 0x50; // TCP data offset = 5 (20 bytes) + + data +} + +#[async_test] +async fn rndis_send_tcp_checksum_packet_with_vlan_ppi(driver: DefaultDriver) { + let endpoint_state = TestNicEndpointState::new(); + let endpoint = TestNicEndpoint::new(Some(endpoint_state.clone())); + let builder = Nic::builder(); + let nic = builder.build( + &VmTaskDriverSource::new(SingleDriverBackend::new(driver.clone())), + Guid::new_random(), + Box::new(endpoint), + [1, 2, 3, 4, 5, 6].into(), + 0, + ); + + let mut nic = TestNicDevice::new_with_nic(&driver, nic).await; + nic.start_vmbus_channel(); + let mut channel = nic.connect_vmbus_channel().await; + channel + .initialize(0, protocol::NdisConfigCapabilities::new()) + .await; + channel + .send_rndis_control_message( + rndisprot::MESSAGE_TYPE_INITIALIZE_MSG, + rndisprot::InitializeRequest { + request_id: 123, + major_version: rndisprot::MAJOR_VERSION, + minor_version: rndisprot::MINOR_VERSION, + max_transfer_size: 0, + }, + &[], + ) + .await; + + let initialize_complete: rndisprot::InitializeComplete = channel + .read_rndis_control_message(rndisprot::MESSAGE_TYPE_INITIALIZE_CMPLT) + .await + .unwrap(); + assert_eq!(initialize_complete.request_id, 123); + assert_eq!(initialize_complete.status, rndisprot::STATUS_SUCCESS); + + let data = build_vlan_ipv4_tcp_packet(37); + let vlan_info = rndisprot::EthVlanInfo(37u32 << 4); + channel + .send_rndis_packet_offload_with_vlan(&data, true, false, false, vlan_info) + .await; + + let completion = channel.read_rndis_packet_complete_message().await.unwrap(); + assert_eq!(completion.status, protocol::Status::SUCCESS); + + let metadata = endpoint_state + .lock() + .tx_metadata + .last() + .cloned() + .expect("packet metadata should be captured"); + assert!(metadata.flags.offload_tcp_checksum()); + assert!(metadata.flags.offload_ip_header_checksum()); + assert!(metadata.flags.is_ipv4()); + assert_eq!( + metadata.l2_len, 18, + "VLAN-tagged packets must use an 18-byte L2 header" + ); + assert_eq!( + metadata.l3_len, 20, + "VLAN-tagged IPv4 packets must keep a 20-byte L3 header" + ); +} + +#[async_test] +async fn rndis_send_lso_packet_with_vlan_ppi(driver: DefaultDriver) { + let endpoint_state = TestNicEndpointState::new(); + let endpoint = TestNicEndpoint::new(Some(endpoint_state.clone())); + let builder = Nic::builder(); + let nic = builder.build( + &VmTaskDriverSource::new(SingleDriverBackend::new(driver.clone())), + Guid::new_random(), + Box::new(endpoint), + [1, 2, 3, 4, 5, 6].into(), + 0, + ); + + let mut nic = TestNicDevice::new_with_nic(&driver, nic).await; + nic.start_vmbus_channel(); + let mut channel = nic.connect_vmbus_channel().await; + channel + .initialize(0, protocol::NdisConfigCapabilities::new()) + .await; + channel + .send_rndis_control_message( + rndisprot::MESSAGE_TYPE_INITIALIZE_MSG, + rndisprot::InitializeRequest { + request_id: 123, + major_version: rndisprot::MAJOR_VERSION, + minor_version: rndisprot::MINOR_VERSION, + max_transfer_size: 0, + }, + &[], + ) + .await; + + let initialize_complete: rndisprot::InitializeComplete = channel + .read_rndis_control_message(rndisprot::MESSAGE_TYPE_INITIALIZE_CMPLT) + .await + .unwrap(); + assert_eq!(initialize_complete.request_id, 123); + assert_eq!(initialize_complete.status, rndisprot::STATUS_SUCCESS); + + let data = build_vlan_ipv4_tcp_packet(91); + let vlan_info = rndisprot::EthVlanInfo(91u32 << 4); + channel + .send_rndis_packet_offload_with_vlan(&data, false, false, true, vlan_info) + .await; + + let completion = channel.read_rndis_packet_complete_message().await.unwrap(); + assert_eq!(completion.status, protocol::Status::SUCCESS); + + let metadata = endpoint_state + .lock() + .tx_metadata + .last() + .cloned() + .expect("packet metadata should be captured"); + assert!(metadata.flags.offload_tcp_segmentation()); + assert!(metadata.flags.offload_tcp_checksum()); + assert!(metadata.flags.offload_ip_header_checksum()); + assert!(metadata.flags.is_ipv4()); + assert_eq!( + metadata.l2_len, 18, + "VLAN-tagged packets must use an 18-byte L2 header" + ); + assert_eq!( + metadata.l3_len, 20, + "VLAN-tagged IPv4 packets must keep a 20-byte L3 header" + ); + assert_eq!(metadata.max_segment_size, 1460); +} + /// Helper: builds an RSS-enable parameter block that the set_rss_parameter /// OID path accepts. fn build_rss_enable_params() -> Vec { From a7466f0cd92c9236f816e14d407553373b5bf29e Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Wed, 22 Apr 2026 11:01:10 -0700 Subject: [PATCH 02/19] First steps --- vm/devices/net/net_backend/src/lib.rs | 4 ++ vm/devices/net/netvsp/src/lib.rs | 94 +++++++++++++++------------ 2 files changed, 55 insertions(+), 43 deletions(-) diff --git a/vm/devices/net/net_backend/src/lib.rs b/vm/devices/net/net_backend/src/lib.rs index 2d057d7554..de8cfc48c2 100644 --- a/vm/devices/net/net_backend/src/lib.rs +++ b/vm/devices/net/net_backend/src/lib.rs @@ -396,6 +396,9 @@ pub struct TxMetadata { /// The length of the TCP header. Only guaranteed to be set if various /// offload flags are set. pub l4_len: u8, + /// The offset into the buffer where the TCP header begins. Only expected + /// to be set if offload flags are set. + pub tcp_header_offset: u16, /// The maximum segment size, used for segmentation offload (TSO or USO). /// Only guaranteed to be set if [`TxFlags::offload_tcp_segmentation`] or /// [`TxFlags::offload_udp_segmentation`] is set. @@ -451,6 +454,7 @@ impl Default for TxMetadata { l2_len: 0, l3_len: 0, l4_len: 0, + tcp_header_offset: 0, max_segment_size: 0, priority: 0, canonical_format_id: 0, diff --git a/vm/devices/net/netvsp/src/lib.rs b/vm/devices/net/netvsp/src/lib.rs index fa354776a9..b4193b31d6 100644 --- a/vm/devices/net/netvsp/src/lib.rs +++ b/vm/devices/net/netvsp/src/lib.rs @@ -2549,23 +2549,7 @@ impl NetChannel { .set_offload_ip_header_checksum(n.is_ipv4() && n.ip_header_checksum()); metadata.flags.set_is_ipv4(n.is_ipv4()); metadata.flags.set_is_ipv6(n.is_ipv6() && !n.is_ipv4()); - metadata.l2_len = ETHERNET_HEADER_LEN as u8; - if metadata.flags.offload_tcp_checksum() - || metadata.flags.offload_udp_checksum() - { - metadata.l3_len = if n.tcp_header_offset() >= metadata.l2_len as u16 { - n.tcp_header_offset() - metadata.l2_len as u16 - } else if n.is_ipv4() { - let mut reader = data.clone().reader(mem); - reader.skip(metadata.l2_len as usize)?; - let mut b = 0; - reader.read(std::slice::from_mut(&mut b))?; - (b as u16 >> 4) * 4 - } else { - // Hope there are no extensions. - 40 - }; - } + metadata.tcp_header_offset = n.tcp_header_offset(); } rndisprot::PPI_LSO => { let n: rndisprot::TcpLsoInfo = d.reader(mem).read_plain()?; @@ -2575,32 +2559,10 @@ impl NetChannel { metadata.flags.set_offload_ip_header_checksum(n.is_ipv4()); metadata.flags.set_is_ipv4(n.is_ipv4()); metadata.flags.set_is_ipv6(n.is_ipv6() && !n.is_ipv4()); - metadata.l2_len = ETHERNET_HEADER_LEN as u8; - if n.tcp_header_offset() < metadata.l2_len as u16 { - return Err(WorkerError::InvalidTcpHeaderOffset(n.tcp_header_offset())); - } - metadata.l3_len = n.tcp_header_offset() - metadata.l2_len as u16; - // Offset of `Data Offset` field in the TCP header (byte 12) - const TCP_DOFF_BYTE_OFFSET: u32 = 12; - let tcp_hdr_doff_offset = - u32::from(n.tcp_header_offset()) + TCP_DOFF_BYTE_OFFSET; - // Validate TCP header Data Offset 4 bit nibble within the packet data bounds. - if tcp_hdr_doff_offset >= request.data_length { - return Err(WorkerError::InvalidTcpHeaderOffset(n.tcp_header_offset())); - } - metadata.l4_len = { - let mut reader = data.clone().reader(mem); - reader.skip(tcp_hdr_doff_offset as usize)?; - let mut b = 0; - reader.read(std::slice::from_mut(&mut b))?; - (b >> 4) * 4 - }; metadata.max_segment_size = n.mss() as u16; + metadata.l2_len = ETHERNET_HEADER_LEN as u8; + metadata.tcp_header_offset = n.tcp_header_offset(); - if request.data_length >= rndisprot::LSO_MAX_OFFLOAD_SIZE { - // Not strictly enforced. - stats.tx_invalid_lso_packets.increment(); - } } rndisprot::PPI_VLAN => { let n: rndisprot::EthVlanInfo = d.reader(mem).read_plain()?; @@ -2609,13 +2571,59 @@ impl NetChannel { metadata.priority = n.priority(); metadata.canonical_format_id = n.canonical_format_id(); metadata.vlan_id = n.vlan_id(); - metadata.l2_len = ETHERNET_VLAN_HEADER_LEN as u8; - } _ => {} } ppi = rest; } + + metadata.l2_len = if metadata.flags.vlan_enabled() { ETHERNET_VLAN_HEADER_LEN } else { ETHERNET_HEADER_LEN } as u8; + + // rndisprot::PPI_TCP_IP_CHECKSUM version: + if metadata.flags.offload_tcp_checksum() + || metadata.flags.offload_udp_checksum() + { + metadata.l3_len = if metadata.tcp_header_offset >= metadata.l2_len as u16 { + metadata.tcp_header_offset - metadata.l2_len as u16 + } else if metadata.flags.is_ipv4() { + let mut reader = data.clone().reader(mem); + reader.skip(metadata.l2_len as usize)?; + let mut b = 0; + reader.read(std::slice::from_mut(&mut b))?; + (b as u16 >> 4) * 4 + } else { + // Hope there are no extensions. + 40 + }; + } + + // rndisprot::PPI_LSO version: + if metadata.flags.offload_tcp_segmentation() || metadata.flags.offload_udp_segmentation() { + if metadata.tcp_header_offset < metadata.l2_len as u16 { + return Err(WorkerError::InvalidTcpHeaderOffset(metadata.tcp_header_offset)); + } + metadata.l3_len = metadata.tcp_header_offset - metadata.l2_len as u16; + // Offset of `Data Offset` field in the TCP header (byte 12) + const TCP_DOFF_BYTE_OFFSET: u32 = 12; + let tcp_hdr_doff_offset = + u32::from(metadata.tcp_header_offset) + TCP_DOFF_BYTE_OFFSET; + // Validate TCP header Data Offset 4 bit nibble within the packet data bounds. + if tcp_hdr_doff_offset >= request.data_length { + return Err(WorkerError::InvalidTcpHeaderOffset(metadata.tcp_header_offset)); + } + metadata.l4_len = { + let mut reader = data.clone().reader(mem); + reader.skip(tcp_hdr_doff_offset as usize)?; + let mut b = 0; + reader.read(std::slice::from_mut(&mut b))?; + (b >> 4) * 4 + }; + + if request.data_length >= rndisprot::LSO_MAX_OFFLOAD_SIZE { + // Not strictly enforced. + stats.tx_invalid_lso_packets.increment(); + } + } } let start = segments.len(); From a8c0fa1e4bb7cc2d070ebd25c185d16d47f983fe Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Wed, 22 Apr 2026 15:17:09 -0700 Subject: [PATCH 03/19] Formatted with single pass evaluation of header offset and flags. --- vm/devices/net/net_backend/src/lib.rs | 2 +- vm/devices/net/netvsp/src/lib.rs | 60 ++++++++++++++++----------- 2 files changed, 36 insertions(+), 26 deletions(-) diff --git a/vm/devices/net/net_backend/src/lib.rs b/vm/devices/net/net_backend/src/lib.rs index de8cfc48c2..d556bec8f6 100644 --- a/vm/devices/net/net_backend/src/lib.rs +++ b/vm/devices/net/net_backend/src/lib.rs @@ -458,7 +458,7 @@ impl Default for TxMetadata { max_segment_size: 0, priority: 0, canonical_format_id: 0, - vlan_id: 0 + vlan_id: 0, } } } diff --git a/vm/devices/net/netvsp/src/lib.rs b/vm/devices/net/netvsp/src/lib.rs index b4193b31d6..bba68e676d 100644 --- a/vm/devices/net/netvsp/src/lib.rs +++ b/vm/devices/net/netvsp/src/lib.rs @@ -2562,7 +2562,6 @@ impl NetChannel { metadata.max_segment_size = n.mss() as u16; metadata.l2_len = ETHERNET_HEADER_LEN as u8; metadata.tcp_header_offset = n.tcp_header_offset(); - } rndisprot::PPI_VLAN => { let n: rndisprot::EthVlanInfo = d.reader(mem).read_plain()?; @@ -2577,39 +2576,50 @@ impl NetChannel { ppi = rest; } - metadata.l2_len = if metadata.flags.vlan_enabled() { ETHERNET_VLAN_HEADER_LEN } else { ETHERNET_HEADER_LEN } as u8; - - // rndisprot::PPI_TCP_IP_CHECKSUM version: - if metadata.flags.offload_tcp_checksum() - || metadata.flags.offload_udp_checksum() - { - metadata.l3_len = if metadata.tcp_header_offset >= metadata.l2_len as u16 { - metadata.tcp_header_offset - metadata.l2_len as u16 - } else if metadata.flags.is_ipv4() { - let mut reader = data.clone().reader(mem); - reader.skip(metadata.l2_len as usize)?; - let mut b = 0; - reader.read(std::slice::from_mut(&mut b))?; - (b as u16 >> 4) * 4 - } else { - // Hope there are no extensions. - 40 - }; - } + metadata.l2_len = if metadata.flags.vlan_enabled() { + ETHERNET_VLAN_HEADER_LEN + } else { + ETHERNET_HEADER_LEN + } as u8; - // rndisprot::PPI_LSO version: - if metadata.flags.offload_tcp_segmentation() || metadata.flags.offload_udp_segmentation() { + if metadata.flags.offload_tcp_checksum() { + // The offset must be set if a checksum is being captured. if metadata.tcp_header_offset < metadata.l2_len as u16 { - return Err(WorkerError::InvalidTcpHeaderOffset(metadata.tcp_header_offset)); + return Err(WorkerError::InvalidTcpHeaderOffset( + metadata.tcp_header_offset, + )); + } else if metadata.flags.is_ipv4() + && metadata.tcp_header_offset < (metadata.l2_len as u16 + 20) + { + return Err(WorkerError::InvalidTcpHeaderOffset( + metadata.tcp_header_offset, + )); + } else if metadata.flags.is_ipv6() + && metadata.tcp_header_offset < (metadata.l2_len as u16 + 40) + { + return Err(WorkerError::InvalidTcpHeaderOffset( + metadata.tcp_header_offset, + )); + } else if metadata.tcp_header_offset as u32 >= request.data_length { + return Err(WorkerError::InvalidTcpHeaderOffset( + metadata.tcp_header_offset, + )); } + metadata.l3_len = metadata.tcp_header_offset - metadata.l2_len as u16; - // Offset of `Data Offset` field in the TCP header (byte 12) + } + + // no UDP validation currently. + + if metadata.flags.offload_tcp_segmentation() { const TCP_DOFF_BYTE_OFFSET: u32 = 12; let tcp_hdr_doff_offset = u32::from(metadata.tcp_header_offset) + TCP_DOFF_BYTE_OFFSET; // Validate TCP header Data Offset 4 bit nibble within the packet data bounds. if tcp_hdr_doff_offset >= request.data_length { - return Err(WorkerError::InvalidTcpHeaderOffset(metadata.tcp_header_offset)); + return Err(WorkerError::InvalidTcpHeaderOffset( + metadata.tcp_header_offset, + )); } metadata.l4_len = { let mut reader = data.clone().reader(mem); From 2e2ae54864ff24378cd7bfb4fc365e2ae6f2882d Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Fri, 24 Apr 2026 10:41:32 -0700 Subject: [PATCH 04/19] formatting --- vm/devices/net/netvsp/src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vm/devices/net/netvsp/src/lib.rs b/vm/devices/net/netvsp/src/lib.rs index bba68e676d..5af2aa8513 100644 --- a/vm/devices/net/netvsp/src/lib.rs +++ b/vm/devices/net/netvsp/src/lib.rs @@ -2803,6 +2803,8 @@ impl NetChannel { error = &err as &dyn std::error::Error, "Failed to notify guest that data path is now synthetic" ); + } else { + tracing::info!("Switched data path to synthetic") } } From e8cefaec230a1bf5501b94f819aa933fc8c0695c Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Fri, 24 Apr 2026 12:40:55 -0700 Subject: [PATCH 05/19] Added test coverage. --- vm/devices/net/gdma/src/bnic.rs | 29 +++++- vm/devices/net/gdma/src/lib.rs | 5 ++ vm/devices/net/gdma/src/resolver.rs | 1 + vm/devices/net/mana_driver/src/tests.rs | 3 + vm/devices/net/net_mana/src/test.rs | 114 ++++++++++++++++++++++++ 5 files changed, 151 insertions(+), 1 deletion(-) diff --git a/vm/devices/net/gdma/src/bnic.rs b/vm/devices/net/gdma/src/bnic.rs index bf5a94d482..e17a767824 100644 --- a/vm/devices/net/gdma/src/bnic.rs +++ b/vm/devices/net/gdma/src/bnic.rs @@ -4,6 +4,7 @@ use self::bnic_defs::CQE_RX_TRUNCATED; use self::bnic_defs::CQE_TX_GDMA_ERR; use self::bnic_defs::CQE_TX_OKAY; +use self::bnic_defs::CQE_TX_VLAN_TAGGING_VIOLATION; use self::bnic_defs::MANA_CQE_COMPLETION; use self::bnic_defs::ManaCommandCode; use self::bnic_defs::ManaCqeHeader; @@ -184,6 +185,8 @@ struct Vport { task: TaskControl, queue_cfg: QueueCfg, serial_no: u32, + #[cfg(test)] + reject_tx_with_vlan_error: bool, } impl InspectMut for Vport { @@ -214,6 +217,8 @@ impl BasicNic { |VportConfig { mac_address, endpoint, + #[cfg(test)] + reject_tx_with_vlan_error, }| { assert!(endpoint.is_ordered()); Vport { @@ -222,6 +227,8 @@ impl BasicNic { task: TaskControl::new(TxRxState), queue_cfg: QueueCfg { tx: None, rx: None }, serial_no: 0, + #[cfg(test)] + reject_tx_with_vlan_error, } }, ) @@ -407,6 +414,8 @@ impl BasicNic { rq_cq_id, tx_segment_buffer: Vec::new(), rx_buf_count: 0, + #[cfg(test)] + reject_tx_with_vlan_error: vport.reject_tx_with_vlan_error, }, ); vport.task.start(); @@ -482,6 +491,8 @@ pub struct TxRxTask { rq_cq_id: u32, tx_segment_buffer: Vec, rx_buf_count: u32, + #[cfg(test)] + reject_tx_with_vlan_error: bool, } impl InspectTaskMut for TxRxState { @@ -547,6 +558,13 @@ impl TxRxTask { }; let sge0 = sqe.sgl().first().context("no sgl")?; + + #[cfg(test)] + if self.reject_tx_with_vlan_error { + self.post_tx_completion_vlan_error(); + return Ok(()); + } + let total_len: usize = sqe.sgl().iter().map(|sge| sge.size as usize).sum(); let mut meta = TxMetadata { id: TxId(0), @@ -562,6 +580,7 @@ impl TxRxTask { l3_len: oob.s_oob.trans_off().clamp(14, 255) - 14, l4_len: 0, max_segment_size: 0, + ..Default::default() }; if sqe.header.params.client_oob_in_sgl() { @@ -620,10 +639,18 @@ impl TxRxTask { // Possible test improvement: provide proper OOB data for the GDMA error. fn post_tx_completion_error(&mut self) { + self.post_tx_completion_with_type(CQE_TX_GDMA_ERR); + } + + fn post_tx_completion_vlan_error(&mut self) { + self.post_tx_completion_with_type(CQE_TX_VLAN_TAGGING_VIOLATION); + } + + fn post_tx_completion_with_type(&mut self, cqe_type: u8) { let tx_oob = ManaTxCompOob { cqe_hdr: ManaCqeHeader::new() .with_client_type(MANA_CQE_COMPLETION) - .with_cqe_type(CQE_TX_GDMA_ERR), + .with_cqe_type(cqe_type), tx_data_offset: 0, offsets: ManaTxCompOobOffsets::new(), reserved: [0; 12], diff --git a/vm/devices/net/gdma/src/lib.rs b/vm/devices/net/gdma/src/lib.rs index 6e6297ff13..8d7971377f 100644 --- a/vm/devices/net/gdma/src/lib.rs +++ b/vm/devices/net/gdma/src/lib.rs @@ -122,6 +122,11 @@ pub use bnic::BnicConfig; pub struct VportConfig { pub mac_address: MacAddress, pub endpoint: Box, + /// When set, all TX completions will be posted with + /// `CQE_TX_VLAN_TAGGING_VIOLATION` instead of being forwarded to the + /// backend endpoint. Used for testing VLAN fallback behavior. + #[cfg(test)] + pub reject_tx_with_vlan_error: bool, } impl GdmaDevice { diff --git a/vm/devices/net/gdma/src/resolver.rs b/vm/devices/net/gdma/src/resolver.rs index 740017beaa..4d20050e11 100644 --- a/vm/devices/net/gdma/src/resolver.rs +++ b/vm/devices/net/gdma/src/resolver.rs @@ -59,6 +59,7 @@ impl AsyncResolveResource for GdmaDeviceR Ok(VportConfig { mac_address: vport.mac_address, endpoint: endpoint.0, + reject_tx_with_vlan_error: false, }) })) .await?; diff --git a/vm/devices/net/mana_driver/src/tests.rs b/vm/devices/net/mana_driver/src/tests.rs index 44efe6b772..fef7c03dcf 100644 --- a/vm/devices/net/mana_driver/src/tests.rs +++ b/vm/devices/net/mana_driver/src/tests.rs @@ -37,6 +37,7 @@ async fn test_gdma(driver: DefaultDriver) { vec![VportConfig { mac_address: [1, 2, 3, 4, 5, 6].into(), endpoint: Box::new(NullEndpoint::new()), + reject_tx_with_vlan_error: false, }], &mut ExternallyManagedMmioIntercepts, ); @@ -175,6 +176,7 @@ async fn test_gdma_save_restore(driver: DefaultDriver) { vec![VportConfig { mac_address: [1, 2, 3, 4, 5, 6].into(), endpoint: Box::new(NullEndpoint::new()), + reject_tx_with_vlan_error: false, }], &mut ExternallyManagedMmioIntercepts, ); @@ -323,6 +325,7 @@ async fn test_gdma_reconfig_vf(driver: DefaultDriver) { vec![VportConfig { mac_address: [1, 2, 3, 4, 5, 6].into(), endpoint: Box::new(NullEndpoint::new()), + reject_tx_with_vlan_error: false, }], &mut ExternallyManagedMmioIntercepts, ); diff --git a/vm/devices/net/net_mana/src/test.rs b/vm/devices/net/net_mana/src/test.rs index b41fc70ce2..a9946b360b 100644 --- a/vm/devices/net/net_mana/src/test.rs +++ b/vm/devices/net/net_mana/src/test.rs @@ -554,6 +554,7 @@ async fn test_vport_with_query_filter_state(driver: DefaultDriver) { vec![VportConfig { mac_address: [1, 2, 3, 4, 5, 6].into(), endpoint: Box::new(LoopbackEndpoint::new()), + reject_tx_with_vlan_error: false, }], &mut ExternallyManagedMmioIntercepts, ); @@ -902,6 +903,7 @@ async fn test_endpoint( vec![VportConfig { mac_address: [1, 2, 3, 4, 5, 6].into(), endpoint: Box::new(LoopbackEndpoint::new()), + reject_tx_with_vlan_error: false, }], &mut ExternallyManagedMmioIntercepts, ); @@ -1038,3 +1040,115 @@ fn get_queue_stats(queue_stats: Option<&dyn net_backend::BackendQueueStats>) -> ..Default::default() } } + +#[async_test] +async fn test_vlan_violation_triggers_fallback(driver: DefaultDriver) { + // Verify that a CQE_TX_VLAN_TAGGING_VIOLATION completion triggers the + // fallback signal through the Vport, which the emuplat layer would + // observe to initiate a VF-to-synthetic transition. + let pages = 256; + let mem = DeviceTestMemory::new(pages * 2, true, "test_vlan_fallback"); + let payload_mem = mem.payload_mem(); + + let msi_conn = MsiConnection::new(); + let device = gdma::GdmaDevice::new( + &VmTaskDriverSource::new(SingleDriverBackend::new(driver.clone())), + mem.guest_memory(), + msi_conn.target(), + vec![VportConfig { + mac_address: [1, 2, 3, 4, 5, 6].into(), + endpoint: Box::new(LoopbackEndpoint::new()), + reject_tx_with_vlan_error: true, + }], + &mut ExternallyManagedMmioIntercepts, + ); + let device = EmulatedDevice::new(device, msi_conn, mem.dma_client()); + let dev_config = ManaQueryDeviceCfgResp { + pf_cap_flags1: 0.into(), + pf_cap_flags2: 0, + pf_cap_flags3: 0, + pf_cap_flags4: 0, + max_num_vports: 1, + reserved: 0, + max_num_eqs: 64, + }; + + let (fallback_tx, mut fallback_rx) = mesh::channel::<()>(); + let vport_state = mana_driver::mana::VportState::new(None, None, Some(fallback_tx)); + + let thing = ManaDevice::new(&driver, device, 1, 1, None).await.unwrap(); + let vport = thing + .new_vport(0, Some(vport_state), &dev_config) + .await + .unwrap(); + let mut endpoint = ManaEndpoint::new(driver.clone(), vport, GuestDmaMode::DirectDma).await; + let mut queues = Vec::new(); + let mut pool = net_backend::tests::Bufs::new(payload_mem.clone()); + endpoint + .get_queues( + vec![QueueConfig { + driver: Box::new(driver.clone()), + }], + None, + &mut queues, + ) + .await + .unwrap(); + + // Post initial RX buffers. + queues[0].rx_avail(&mut pool, &(1..128u32).map(RxId).collect::>()); + + // Build and send a single packet. + let packet_len = 128; + let data_to_send = vec![0xABu8; packet_len]; + payload_mem.write_at(0, &data_to_send).unwrap(); + + let mut pkt_builder = TxPacketBuilder::new(); + build_tx_segments(packet_len, 1, false, &mut pkt_builder); + queues[0] + .tx_avail(&mut pool, pkt_builder.segments()) + .unwrap(); + + // Poll for the TX completion. The GDMA emulator will respond with + // CQE_TX_VLAN_TAGGING_VIOLATION, which tx_poll handles by calling + // signal_fallback() on the vport. It does not return a TxError. + let mut tx_done = [TxId(0); 2]; + let mut tx_done_n = 0; + loop { + let mut context = CancelContext::new().with_timeout(Duration::from_secs(1)); + match context + .until_cancelled(poll_fn(|cx| queues[0].poll_ready(cx, &mut pool))) + .await + { + Err(CancelReason::DeadlineExceeded) => break, + Err(e) => panic!("Unexpected error polling queue: {e:?}"), + _ => {} + } + tx_done_n += queues[0] + .tx_poll(&mut pool, &mut tx_done[tx_done_n..]) + .unwrap_or(0); + if tx_done_n >= 1 { + break; + } + } + assert_eq!(tx_done_n, 1, "TX completion should be received"); + + // Verify stats reflect the VLAN violation. + let stats = get_queue_stats(queues[0].queue_stats()); + assert_eq!(stats.tx_errors.get(), 1, "tx_errors should be incremented"); + assert_eq!( + stats.tx_packets.get(), + 0, + "tx_packets should not be incremented for VLAN violations" + ); + + // Verify the fallback signal was sent through the channel. + let fallback_signal = fallback_rx.try_recv(); + assert!( + fallback_signal.is_ok(), + "fallback channel should have received a signal" + ); + + drop(queues); + endpoint.stop().await; +} From 18f0c986cbec1c87d8df7033f58ff5f69541cb74 Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Fri, 24 Apr 2026 15:10:45 -0700 Subject: [PATCH 06/19] vmm_tests: add VLAN guest configuration integration test Add a new VMM test that validates guest-side VLAN (802.1Q) sub-interface creation and configuration on the synthetic NIC (netvsp). The test: - Boots a Linux VM with a NIC (via consomme backend) - Finds the NIC by MAC address in sysfs - Creates a VLAN sub-interface (ID 100) using ip-link - Verifies 802.1Q protocol and VLAN ID in interface details - Assigns an IP address and brings the interface up - Sends TX smoke traffic (ping) to exercise the netvsp VLAN PPI path - Verifies at least one TX packet was transmitted - Confirms the parent interface remains operational - Cleans up the VLAN interface This exercises the guest netvsc driver's VLAN support and the netvsp VLAN PPI (Per-Packet Information) metadata extraction path. Full end-to-end VLAN datapath validation would require a VLAN-aware backend; the current consomme backend ignores VLAN metadata, so this test focuses on guest-side configuration correctness and TX path stability. Also fix a pre-existing build error in gdma/src/resolver.rs where the cfg(test)-gated reject_tx_with_vlan_error field was referenced unconditionally in struct initialization. The test took a little over 2m running under WSL2, which seems long. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- vm/devices/net/gdma/src/resolver.rs | 1 + vmm_tests/vmm_tests/tests/tests/multiarch.rs | 2 + .../vmm_tests/tests/tests/multiarch/vlan.rs | 154 ++++++++++++++++++ 3 files changed, 157 insertions(+) create mode 100644 vmm_tests/vmm_tests/tests/tests/multiarch/vlan.rs diff --git a/vm/devices/net/gdma/src/resolver.rs b/vm/devices/net/gdma/src/resolver.rs index 4d20050e11..ef9603ee33 100644 --- a/vm/devices/net/gdma/src/resolver.rs +++ b/vm/devices/net/gdma/src/resolver.rs @@ -59,6 +59,7 @@ impl AsyncResolveResource for GdmaDeviceR Ok(VportConfig { mac_address: vport.mac_address, endpoint: endpoint.0, + #[cfg(test)] reject_tx_with_vlan_error: false, }) })) diff --git a/vmm_tests/vmm_tests/tests/tests/multiarch.rs b/vmm_tests/vmm_tests/tests/tests/multiarch.rs index 0c06842fa6..d77f780298 100644 --- a/vmm_tests/vmm_tests/tests/tests/multiarch.rs +++ b/vmm_tests/vmm_tests/tests/tests/multiarch.rs @@ -32,6 +32,8 @@ mod openhcl_servicing; mod pcie; /// Tests involving TPM functionality mod tpm; +/// Tests for VLAN (802.1Q) support on virtual NICs. +mod vlan; /// Tests of vmbus relay functionality. mod vmbus_relay; /// Tests involving VMGS functionality diff --git a/vmm_tests/vmm_tests/tests/tests/multiarch/vlan.rs b/vmm_tests/vmm_tests/tests/tests/multiarch/vlan.rs new file mode 100644 index 0000000000..ce55667155 --- /dev/null +++ b/vmm_tests/vmm_tests/tests/tests/multiarch/vlan.rs @@ -0,0 +1,154 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Tests for VLAN (802.1Q) sub-interface configuration on virtual NICs. +//! +//! These tests validate that guest operating systems can create and configure +//! VLAN sub-interfaces on the VMM's synthetic NIC (netvsp). They exercise +//! the guest driver's VLAN support and verify the TX path with VLAN PPI +//! metadata does not error or crash. +//! +//! **Scope:** These tests validate guest-side VLAN configuration and TX smoke +//! behavior. Full end-to-end VLAN datapath validation (verifying that the VMM +//! backend correctly processes VLAN-tagged traffic) would require a +//! VLAN-aware backend; the current consomme backend ignores VLAN metadata. +//! Unit-level VLAN PPI parsing is covered by `netvsp/src/test.rs`. + +use anyhow::Context; +use petri::PetriVmBuilder; +use petri::openvmm::NIC_MAC_ADDRESS; +use petri::openvmm::OpenVmmPetriBackend; +use petri::pipette::cmd; +use pipette_client::shell::UnixShell; +use vmm_test_macros::openvmm_test; + +/// Find the network interface matching [`NIC_MAC_ADDRESS`] by scanning sysfs. +async fn find_nic_by_mac(sh: &UnixShell<'_>) -> anyhow::Result { + let expected_mac = NIC_MAC_ADDRESS.to_string().replace('-', ":"); + let ifaces = cmd!(sh, "ls /sys/class/net").read().await?; + for iface in ifaces.lines() { + let iface = iface.trim(); + if iface.is_empty() { + continue; + } + let addr_path = format!("/sys/class/net/{iface}/address"); + if let Ok(mac) = cmd!(sh, "cat {addr_path}").read().await { + if mac.trim() == expected_mac { + return Ok(iface.to_string()); + } + } + } + anyhow::bail!("no interface found with MAC address {expected_mac}") +} + +/// Test VLAN sub-interface creation and configuration on the guest NIC. +/// +/// Validates that the guest can: +/// 1. Create an 802.1Q VLAN sub-interface on the synthetic NIC +/// 2. Configure it with a specific VLAN ID, IP address, and bring it up +/// 3. Transmit packets through it (TX smoke test via ARP/ping) +/// 4. Maintain the parent interface in operational state throughout +/// +/// The TX smoke step exercises the netvsp VLAN PPI (Per-Packet Information) +/// path: the guest's netvsc driver emits VLAN metadata that netvsp extracts +/// into `TxMetadata`. The ping itself is expected to fail because the +/// consomme backend does not route VLAN-tagged traffic, but the TX operation +/// must not error or crash. +#[openvmm_test( + uefi_x64(vhd(ubuntu_2504_server_x64)), + uefi_aarch64(vhd(ubuntu_2404_server_aarch64)) +)] +async fn vlan_guest_config(config: PetriVmBuilder) -> anyhow::Result<()> { + let (vm, agent) = config.modify_backend(|c| c.with_nic()).run().await?; + let sh = agent.unix_shell(); + + // Find the NIC interface by its known MAC address. + let nic_name = find_nic_by_mac(&sh).await?; + tracing::info!(nic_name, "found NIC interface"); + + // Ensure the parent interface is up. + cmd!(sh, "ip link set {nic_name} up").run().await?; + + // Load the 8021q kernel module for VLAN support. This is a no-op if the + // module is already loaded or built into the kernel. + cmd!(sh, "modprobe 8021q").run().await?; + + // Create a VLAN sub-interface with VLAN ID 100. + let vlan_id = "100"; + let vlan_iface = format!("{nic_name}.{vlan_id}"); + cmd!( + sh, + "ip link add link {nic_name} name {vlan_iface} type vlan id {vlan_id}" + ) + .run() + .await?; + + // Verify the VLAN interface was created with correct 802.1Q configuration. + let vlan_info = cmd!(sh, "ip -d link show {vlan_iface}").read().await?; + tracing::info!(vlan_info, "VLAN interface details"); + assert!( + vlan_info.contains("vlan protocol 802.1Q"), + "interface should use 802.1Q VLAN protocol, got: {vlan_info}" + ); + assert!( + vlan_info.contains(&format!("id {vlan_id}")), + "VLAN ID should be {vlan_id}, got: {vlan_info}" + ); + + // Configure the VLAN interface with an IP address and bring it up. + cmd!(sh, "ip addr add 10.100.0.2/24 dev {vlan_iface}") + .run() + .await?; + cmd!(sh, "ip link set {vlan_iface} up").run().await?; + + // Verify the VLAN interface is up. + let link_brief = cmd!(sh, "ip -br link show {vlan_iface}").read().await?; + tracing::info!(link_brief, "VLAN interface link state"); + assert!( + link_brief.contains("UP"), + "VLAN interface should be in UP state, got: {link_brief}" + ); + + // Verify the IP address was assigned. + let addr_info = cmd!(sh, "ip -br addr show {vlan_iface}").read().await?; + assert!( + addr_info.contains("10.100.0.2"), + "VLAN interface should have the assigned IP address, got: {addr_info}" + ); + + // TX smoke test: send traffic through the VLAN interface. This exercises + // the netvsc → netvsp path with VLAN PPI metadata. The ping will fail + // (consomme doesn't handle VLAN-tagged ARP), but the TX must not crash. + let _ = cmd!(sh, "ping -I {vlan_iface} -c 1 -W 2 10.100.0.1") + .read() + .await; + + // Verify that at least one packet was transmitted through the VLAN + // interface (the ARP request for the ping target). + let tx_packets = cmd!(sh, "cat /sys/class/net/{vlan_iface}/statistics/tx_packets") + .read() + .await?; + let tx_count: u64 = tx_packets + .trim() + .parse() + .context("failed to parse tx_packets")?; + tracing::info!(tx_count, "TX packets through VLAN interface"); + assert!( + tx_count > 0, + "expected at least one TX packet through the VLAN interface" + ); + + // Verify the parent interface is still operational. + let parent_state = cmd!(sh, "ip -br link show {nic_name}").read().await?; + assert!( + parent_state.contains("UP"), + "parent interface should remain UP after VLAN operations, got: {parent_state}" + ); + + // Clean up: remove the VLAN interface. + cmd!(sh, "ip link del {vlan_iface}").run().await?; + + agent.power_off().await?; + vm.wait_for_clean_teardown().await?; + Ok(()) +} From 8f2a824b1b8b3e7b7a03593a3a983ec4238e4f54 Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Mon, 27 Apr 2026 11:05:15 -0700 Subject: [PATCH 07/19] net/gdma: replace test-only VLAN error flag with OOB-based detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the `reject_tx_with_vlan_error` field from VportConfig, Vport, and TxRxTask, which was gated behind #[cfg(test)] and caused net_mana tests to fail to compile (gdma is built as a regular dependency, not in test mode, when net_mana tests run). Instead, have the BNIC emulator inspect the `inject_vlan_pri_tag` field in the TX long OOB — matching how real MANA hardware detects and rejects 802.1Q VLAN tag insertion requests with CQE_TX_VLAN_TAGGING_VIOLATION. On the net_mana side, translate TxMetadata's vlan_enabled flag into the MANA OOB's inject_vlan_pri_tag/vlan_id/pcp/dei fields, and force the long OOB format when VLAN tagging is requested. The VLAN fallback test now sends an actual VLAN-tagged packet rather than relying on a synthetic per-vport flag. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- vm/devices/net/gdma/src/bnic.rs | 16 +++------- vm/devices/net/gdma/src/lib.rs | 5 --- vm/devices/net/gdma/src/resolver.rs | 2 -- vm/devices/net/net_mana/src/lib.rs | 8 ++++- vm/devices/net/net_mana/src/test.rs | 48 ++++++++++++++++++++++++++--- 5 files changed, 54 insertions(+), 25 deletions(-) diff --git a/vm/devices/net/gdma/src/bnic.rs b/vm/devices/net/gdma/src/bnic.rs index e17a767824..a6a27e378a 100644 --- a/vm/devices/net/gdma/src/bnic.rs +++ b/vm/devices/net/gdma/src/bnic.rs @@ -185,8 +185,6 @@ struct Vport { task: TaskControl, queue_cfg: QueueCfg, serial_no: u32, - #[cfg(test)] - reject_tx_with_vlan_error: bool, } impl InspectMut for Vport { @@ -217,8 +215,6 @@ impl BasicNic { |VportConfig { mac_address, endpoint, - #[cfg(test)] - reject_tx_with_vlan_error, }| { assert!(endpoint.is_ordered()); Vport { @@ -227,8 +223,6 @@ impl BasicNic { task: TaskControl::new(TxRxState), queue_cfg: QueueCfg { tx: None, rx: None }, serial_no: 0, - #[cfg(test)] - reject_tx_with_vlan_error, } }, ) @@ -414,8 +408,6 @@ impl BasicNic { rq_cq_id, tx_segment_buffer: Vec::new(), rx_buf_count: 0, - #[cfg(test)] - reject_tx_with_vlan_error: vport.reject_tx_with_vlan_error, }, ); vport.task.start(); @@ -491,8 +483,6 @@ pub struct TxRxTask { rq_cq_id: u32, tx_segment_buffer: Vec, rx_buf_count: u32, - #[cfg(test)] - reject_tx_with_vlan_error: bool, } impl InspectTaskMut for TxRxState { @@ -559,8 +549,10 @@ impl TxRxTask { let sge0 = sqe.sgl().first().context("no sgl")?; - #[cfg(test)] - if self.reject_tx_with_vlan_error { + // Real MANA hardware rejects packets that request 802.1Q VLAN tag + // insertion, since MANA does not support VLANs. Emulate this by + // inspecting the long OOB field that the guest driver sets. + if oob.l_oob.inject_vlan_pri_tag() { self.post_tx_completion_vlan_error(); return Ok(()); } diff --git a/vm/devices/net/gdma/src/lib.rs b/vm/devices/net/gdma/src/lib.rs index 8d7971377f..6e6297ff13 100644 --- a/vm/devices/net/gdma/src/lib.rs +++ b/vm/devices/net/gdma/src/lib.rs @@ -122,11 +122,6 @@ pub use bnic::BnicConfig; pub struct VportConfig { pub mac_address: MacAddress, pub endpoint: Box, - /// When set, all TX completions will be posted with - /// `CQE_TX_VLAN_TAGGING_VIOLATION` instead of being forwarded to the - /// backend endpoint. Used for testing VLAN fallback behavior. - #[cfg(test)] - pub reject_tx_with_vlan_error: bool, } impl GdmaDevice { diff --git a/vm/devices/net/gdma/src/resolver.rs b/vm/devices/net/gdma/src/resolver.rs index ef9603ee33..740017beaa 100644 --- a/vm/devices/net/gdma/src/resolver.rs +++ b/vm/devices/net/gdma/src/resolver.rs @@ -59,8 +59,6 @@ impl AsyncResolveResource for GdmaDeviceR Ok(VportConfig { mac_address: vport.mac_address, endpoint: endpoint.0, - #[cfg(test)] - reject_tx_with_vlan_error: false, }) })) .await?; diff --git a/vm/devices/net/net_mana/src/lib.rs b/vm/devices/net/net_mana/src/lib.rs index 4483ea19c7..06e1ef7e35 100644 --- a/vm/devices/net/net_mana/src/lib.rs +++ b/vm/devices/net/net_mana/src/lib.rs @@ -1166,7 +1166,13 @@ impl ManaQueue { if meta.flags.offload_tcp_checksum() { oob.s_oob.set_trans_off(meta.l2_len as u16 + meta.l3_len); } - let short_format = self.vp_offset <= 0xff; + if meta.flags.vlan_enabled() { + oob.l_oob.set_inject_vlan_pri_tag(true); + oob.l_oob.set_vlan_id(meta.vlan_id); + oob.l_oob.set_pcp(meta.priority); + oob.l_oob.set_dei(meta.canonical_format_id != 0); + } + let short_format = self.vp_offset <= 0xff && !meta.flags.vlan_enabled(); if short_format { oob.s_oob.set_pkt_fmt(MANA_SHORT_PKT_FMT); oob.s_oob.set_short_vp_offset(self.vp_offset as u8); diff --git a/vm/devices/net/net_mana/src/test.rs b/vm/devices/net/net_mana/src/test.rs index a9946b360b..8315ee1571 100644 --- a/vm/devices/net/net_mana/src/test.rs +++ b/vm/devices/net/net_mana/src/test.rs @@ -554,7 +554,6 @@ async fn test_vport_with_query_filter_state(driver: DefaultDriver) { vec![VportConfig { mac_address: [1, 2, 3, 4, 5, 6].into(), endpoint: Box::new(LoopbackEndpoint::new()), - reject_tx_with_vlan_error: false, }], &mut ExternallyManagedMmioIntercepts, ); @@ -880,6 +879,46 @@ fn build_tx_segments( } } +/// Like [`build_tx_segments`] but with 802.1Q VLAN tagging enabled. +fn build_tx_segments_vlan( + packet_len: usize, + num_segments: usize, + vlan_id: u16, + pkt_builder: &mut TxPacketBuilder, +) { + assert_eq!(packet_len % num_segments, 0); + let tx_id = 1; + let segment_len = packet_len / num_segments; + let mut tx_metadata = net_backend::TxMetadata { + id: TxId(tx_id), + segment_count: num_segments as u8, + len: packet_len as u32, + l2_len: 18, // Ethernet header (with VLAN) + l3_len: 20, // IPv4 header + l4_len: 20, // TCP header + max_segment_size: 1460, // Typical MSS for Ethernet + vlan_id, + ..Default::default() + }; + tx_metadata.flags.set_vlan_enabled(true); + + let mut gpa = pkt_builder.data_len(); + pkt_builder.push(TxSegment { + ty: net_backend::TxSegmentType::Head(tx_metadata.clone()), + gpa, + len: segment_len as u32, + }); + + for _ in 0..(num_segments - 1) { + gpa += segment_len as u64; + pkt_builder.push(TxSegment { + ty: net_backend::TxSegmentType::Tail, + gpa, + len: segment_len as u32, + }); + } +} + async fn test_endpoint( driver: DefaultDriver, dma_mode: GuestDmaMode, @@ -903,7 +942,6 @@ async fn test_endpoint( vec![VportConfig { mac_address: [1, 2, 3, 4, 5, 6].into(), endpoint: Box::new(LoopbackEndpoint::new()), - reject_tx_with_vlan_error: false, }], &mut ExternallyManagedMmioIntercepts, ); @@ -1058,7 +1096,6 @@ async fn test_vlan_violation_triggers_fallback(driver: DefaultDriver) { vec![VportConfig { mac_address: [1, 2, 3, 4, 5, 6].into(), endpoint: Box::new(LoopbackEndpoint::new()), - reject_tx_with_vlan_error: true, }], &mut ExternallyManagedMmioIntercepts, ); @@ -1098,13 +1135,14 @@ async fn test_vlan_violation_triggers_fallback(driver: DefaultDriver) { // Post initial RX buffers. queues[0].rx_avail(&mut pool, &(1..128u32).map(RxId).collect::>()); - // Build and send a single packet. + // Build and send a single VLAN-tagged packet. The BNIC emulator will + // reject this because MANA does not support 802.1Q VLAN tag insertion. let packet_len = 128; let data_to_send = vec![0xABu8; packet_len]; payload_mem.write_at(0, &data_to_send).unwrap(); let mut pkt_builder = TxPacketBuilder::new(); - build_tx_segments(packet_len, 1, false, &mut pkt_builder); + build_tx_segments_vlan(packet_len, 1, 100, &mut pkt_builder); queues[0] .tx_avail(&mut pool, pkt_builder.segments()) .unwrap(); From ea71b578a6b1b73e49f4bf5acf25da1b673781a1 Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Mon, 27 Apr 2026 13:07:03 -0700 Subject: [PATCH 08/19] Added tracing to log usage of vlan. --- vm/devices/net/netvsp/src/lib.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vm/devices/net/netvsp/src/lib.rs b/vm/devices/net/netvsp/src/lib.rs index 5af2aa8513..06849e8e83 100644 --- a/vm/devices/net/netvsp/src/lib.rs +++ b/vm/devices/net/netvsp/src/lib.rs @@ -513,6 +513,7 @@ struct QueueStats { tx_lso_packets: Counter, tx_checksum_packets: Counter, tx_invalid_lso_packets: Counter, + tx_vlan_packets: Counter, tx_packets_per_wake: Histogram<10>, rx_packets_per_wake: Histogram<10>, } @@ -2655,6 +2656,9 @@ impl NetChannel { if metadata.flags.offload_tcp_segmentation() { stats.tx_lso_packets.increment(); } + if metadata.flags.vlan_enabled() { + stats.tx_vlan_packets.increment(); + } segments[start].ty = net_backend::TxSegmentType::Head(metadata); From 08b73ab2903bb828b2da067e0caa3a55686ca14b Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Tue, 28 Apr 2026 11:54:30 -0700 Subject: [PATCH 09/19] Steps --- vm/devices/net/gdma/src/bnic.rs | 5 +++++ vm/devices/net/net_backend/src/lib.rs | 29 ++++++++++++++++---------- vm/devices/net/net_consomme/src/lib.rs | 1 + vm/devices/net/net_mana/src/lib.rs | 21 ++++++++++++++----- vm/devices/net/net_tap/src/lib.rs | 1 + vm/devices/net/netvsp/src/lib.rs | 8 ++++--- 6 files changed, 46 insertions(+), 19 deletions(-) diff --git a/vm/devices/net/gdma/src/bnic.rs b/vm/devices/net/gdma/src/bnic.rs index a6a27e378a..2334f09e6c 100644 --- a/vm/devices/net/gdma/src/bnic.rs +++ b/vm/devices/net/gdma/src/bnic.rs @@ -140,6 +140,11 @@ impl BufferAccess for GuestBuffers { }, } + if metadata.vlan_id != 0 { + flags.set_rx_vlantag_present(true); + flags.set_rx_vlan_id(metadata.vlan_id as u32); + } + let packet = &mut self.rx_packets[id.0 as usize]; let cqe_type = if metadata.len > packet.len as usize { diff --git a/vm/devices/net/net_backend/src/lib.rs b/vm/devices/net/net_backend/src/lib.rs index d556bec8f6..734ecc90bf 100644 --- a/vm/devices/net/net_backend/src/lib.rs +++ b/vm/devices/net/net_backend/src/lib.rs @@ -281,6 +281,16 @@ pub trait BufferAccess { } } +#[derive(Debug, Copy, Clone)] +pub struct VlanMetadata { + /// Priority for 802.1Q. Actually a 3-bit value. + pub priority: u8, + /// This should be 0. + pub canonical_format_id: u8, + /// The 802.1Q ID for this transmission. Actually a 12-bit value. + pub vlan_id: u16, +} + /// A receive buffer ID. #[derive(Debug, Copy, Clone)] #[repr(transparent)] @@ -308,6 +318,10 @@ pub struct RxMetadata { pub l4_checksum: RxChecksumState, /// The L4 protocol. pub l4_protocol: L4Protocol, + /// Information about 802.1Q VLAN tagging. When a vlan is in use, this structure + /// is populated. Only applies when traffic is being received over an L2 connection, + /// so L3-only or above traffic will not use this option. + pub vlan: Option, } impl Default for RxMetadata { @@ -318,6 +332,7 @@ impl Default for RxMetadata { ip_checksum: RxChecksumState::Unknown, l4_checksum: RxChecksumState::Unknown, l4_protocol: L4Protocol::Unknown, + vlan: None, } } } @@ -403,12 +418,7 @@ pub struct TxMetadata { /// Only guaranteed to be set if [`TxFlags::offload_tcp_segmentation`] or /// [`TxFlags::offload_udp_segmentation`] is set. pub max_segment_size: u16, - /// Priority for 802.1Q. Actually a 3-bit value. - pub priority: u8, - /// This should be 0. - pub canonical_format_id: u8, - /// The 802.1Q ID for this transmission. Actually a 12-bit value. - pub vlan_id: u16, + pub vlan: Option } /// Flags affecting transmit behavior. @@ -439,8 +449,7 @@ pub struct TxFlags { /// Offload UDP segmentation (USO), allowing UDP packets larger than the /// MTU. `l2_len`, `l3_len`, and `max_segment_size` must be set. pub offload_udp_segmentation: bool, - /// 802.1Q VLAN support is enabled. Expect/use values in `priority`, - /// `canonical_format_id`, and `vlan_id`. + /// 802.1Q VLAN support is enabled. `vlan` is populated if this is set. pub vlan_enabled: bool, } @@ -456,9 +465,7 @@ impl Default for TxMetadata { l4_len: 0, tcp_header_offset: 0, max_segment_size: 0, - priority: 0, - canonical_format_id: 0, - vlan_id: 0, + vlan: None, } } } diff --git a/vm/devices/net/net_consomme/src/lib.rs b/vm/devices/net/net_consomme/src/lib.rs index 9375f07132..88575342ea 100644 --- a/vm/devices/net/net_consomme/src/lib.rs +++ b/vm/devices/net/net_consomme/src/lib.rs @@ -522,6 +522,7 @@ impl consomme::Client for Client<'_> { } else { L4Protocol::Unknown }, + vlan: None }, data, ); diff --git a/vm/devices/net/net_mana/src/lib.rs b/vm/devices/net/net_mana/src/lib.rs index 06e1ef7e35..d2200f71e5 100644 --- a/vm/devices/net/net_mana/src/lib.rs +++ b/vm/devices/net/net_mana/src/lib.rs @@ -58,6 +58,7 @@ use net_backend::TxId; use net_backend::TxOffloadSupport; use net_backend::TxSegment; use net_backend::TxSegmentType; +use net_backend::VlanMetadata; use pal_async::task::Spawn; use safeatomic::AtomicSliceOps; use std::collections::VecDeque; @@ -965,6 +966,15 @@ impl Queue for ManaQueue { } else { (L4Protocol::Unknown, RxChecksumState::Unknown) }; + let vlantag = if rx_oob.flags.rx_vlantag_present() { + Some(VlanMetadata{ + canonical_format_id: 0, + priority: 0, + vlan_id: rx_oob.flags.rx_vlan_id() as u16 + }) + } else { + None + }; let len = rx_oob.ppi[0].pkt_len.into(); pool.write_header( rx.id, @@ -974,6 +984,7 @@ impl Queue for ManaQueue { ip_checksum, l4_checksum, l4_protocol, + vlan: vlantag, }, ); if rx.bounced_len_with_padding > 0 { @@ -1166,13 +1177,13 @@ impl ManaQueue { if meta.flags.offload_tcp_checksum() { oob.s_oob.set_trans_off(meta.l2_len as u16 + meta.l3_len); } - if meta.flags.vlan_enabled() { + if let Some(vlan) = &meta.vlan { oob.l_oob.set_inject_vlan_pri_tag(true); - oob.l_oob.set_vlan_id(meta.vlan_id); - oob.l_oob.set_pcp(meta.priority); - oob.l_oob.set_dei(meta.canonical_format_id != 0); + oob.l_oob.set_vlan_id(vlan.vlan_id); + oob.l_oob.set_pcp(vlan.priority); + oob.l_oob.set_dei(vlan.canonical_format_id != 0); } - let short_format = self.vp_offset <= 0xff && !meta.flags.vlan_enabled(); + let short_format = self.vp_offset <= 0xff && !meta.vlan.is_some(); if short_format { oob.s_oob.set_pkt_fmt(MANA_SHORT_PKT_FMT); oob.s_oob.set_short_vp_offset(self.vp_offset as u8); diff --git a/vm/devices/net/net_tap/src/lib.rs b/vm/devices/net/net_tap/src/lib.rs index da9bcb69c6..6afba1d559 100644 --- a/vm/devices/net/net_tap/src/lib.rs +++ b/vm/devices/net/net_tap/src/lib.rs @@ -520,6 +520,7 @@ fn parse_vnet_hdr(hdr: &VirtioNetHdr) -> RxMetadata { ip_checksum, l4_checksum, l4_protocol, + vlan: None, } } diff --git a/vm/devices/net/netvsp/src/lib.rs b/vm/devices/net/netvsp/src/lib.rs index 06849e8e83..707022d086 100644 --- a/vm/devices/net/netvsp/src/lib.rs +++ b/vm/devices/net/netvsp/src/lib.rs @@ -2568,9 +2568,11 @@ impl NetChannel { let n: rndisprot::EthVlanInfo = d.reader(mem).read_plain()?; metadata.flags.set_vlan_enabled(true); - metadata.priority = n.priority(); - metadata.canonical_format_id = n.canonical_format_id(); - metadata.vlan_id = n.vlan_id(); + metadata.vlan = Some(net_backend::VlanMetadata { + priority: n.priority(), + canonical_format_id: n.canonical_format_id(), + vlan_id: n.vlan_id() + }); } _ => {} } From b29ec04f9261b307f85b00c9ab4df29a71181fc7 Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Tue, 28 Apr 2026 15:54:44 -0700 Subject: [PATCH 10/19] cleaning up for the moment --- vm/devices/net/net_backend/src/lib.rs | 8 ++++---- vm/devices/net/net_consomme/src/lib.rs | 2 +- vm/devices/net/net_mana/src/lib.rs | 8 ++++---- vm/devices/net/netvsp/src/lib.rs | 12 ++++++------ vm/devices/net/netvsp/src/rndisprot.rs | 2 +- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/vm/devices/net/net_backend/src/lib.rs b/vm/devices/net/net_backend/src/lib.rs index 734ecc90bf..3eb1a1a182 100644 --- a/vm/devices/net/net_backend/src/lib.rs +++ b/vm/devices/net/net_backend/src/lib.rs @@ -286,7 +286,7 @@ pub struct VlanMetadata { /// Priority for 802.1Q. Actually a 3-bit value. pub priority: u8, /// This should be 0. - pub canonical_format_id: u8, + pub drop_eligible_indicator: u8, /// The 802.1Q ID for this transmission. Actually a 12-bit value. pub vlan_id: u16, } @@ -418,7 +418,7 @@ pub struct TxMetadata { /// Only guaranteed to be set if [`TxFlags::offload_tcp_segmentation`] or /// [`TxFlags::offload_udp_segmentation`] is set. pub max_segment_size: u16, - pub vlan: Option + pub vlan: Option, } /// Flags affecting transmit behavior. @@ -449,8 +449,8 @@ pub struct TxFlags { /// Offload UDP segmentation (USO), allowing UDP packets larger than the /// MTU. `l2_len`, `l3_len`, and `max_segment_size` must be set. pub offload_udp_segmentation: bool, - /// 802.1Q VLAN support is enabled. `vlan` is populated if this is set. - pub vlan_enabled: bool, + #[bits(1)] + _reserved: u8, } impl Default for TxMetadata { diff --git a/vm/devices/net/net_consomme/src/lib.rs b/vm/devices/net/net_consomme/src/lib.rs index 88575342ea..4b7cca5185 100644 --- a/vm/devices/net/net_consomme/src/lib.rs +++ b/vm/devices/net/net_consomme/src/lib.rs @@ -522,7 +522,7 @@ impl consomme::Client for Client<'_> { } else { L4Protocol::Unknown }, - vlan: None + vlan: None, }, data, ); diff --git a/vm/devices/net/net_mana/src/lib.rs b/vm/devices/net/net_mana/src/lib.rs index d2200f71e5..6c55116315 100644 --- a/vm/devices/net/net_mana/src/lib.rs +++ b/vm/devices/net/net_mana/src/lib.rs @@ -967,10 +967,10 @@ impl Queue for ManaQueue { (L4Protocol::Unknown, RxChecksumState::Unknown) }; let vlantag = if rx_oob.flags.rx_vlantag_present() { - Some(VlanMetadata{ - canonical_format_id: 0, + Some(VlanMetadata { + drop_eligible_indicator: 0, priority: 0, - vlan_id: rx_oob.flags.rx_vlan_id() as u16 + vlan_id: rx_oob.flags.rx_vlan_id() as u16, }) } else { None @@ -1181,7 +1181,7 @@ impl ManaQueue { oob.l_oob.set_inject_vlan_pri_tag(true); oob.l_oob.set_vlan_id(vlan.vlan_id); oob.l_oob.set_pcp(vlan.priority); - oob.l_oob.set_dei(vlan.canonical_format_id != 0); + oob.l_oob.set_dei(vlan.drop_eligible_indicator != 0); } let short_format = self.vp_offset <= 0xff && !meta.vlan.is_some(); if short_format { diff --git a/vm/devices/net/netvsp/src/lib.rs b/vm/devices/net/netvsp/src/lib.rs index 707022d086..0d3fd464c4 100644 --- a/vm/devices/net/netvsp/src/lib.rs +++ b/vm/devices/net/netvsp/src/lib.rs @@ -513,6 +513,7 @@ struct QueueStats { tx_lso_packets: Counter, tx_checksum_packets: Counter, tx_invalid_lso_packets: Counter, + rx_vlan_packets: Counter, tx_vlan_packets: Counter, tx_packets_per_wake: Histogram<10>, rx_packets_per_wake: Histogram<10>, @@ -2561,25 +2562,24 @@ impl NetChannel { metadata.flags.set_is_ipv4(n.is_ipv4()); metadata.flags.set_is_ipv6(n.is_ipv6() && !n.is_ipv4()); metadata.max_segment_size = n.mss() as u16; - metadata.l2_len = ETHERNET_HEADER_LEN as u8; metadata.tcp_header_offset = n.tcp_header_offset(); } rndisprot::PPI_VLAN => { let n: rndisprot::EthVlanInfo = d.reader(mem).read_plain()?; - metadata.flags.set_vlan_enabled(true); metadata.vlan = Some(net_backend::VlanMetadata { priority: n.priority(), - canonical_format_id: n.canonical_format_id(), - vlan_id: n.vlan_id() + drop_eligible_indicator: n.drop_eligible_indicator(), + vlan_id: n.vlan_id(), }); + stats.rx_vlan_packets.increment(); } _ => {} } ppi = rest; } - metadata.l2_len = if metadata.flags.vlan_enabled() { + metadata.l2_len = if metadata.vlan.is_some() { ETHERNET_VLAN_HEADER_LEN } else { ETHERNET_HEADER_LEN @@ -2658,7 +2658,7 @@ impl NetChannel { if metadata.flags.offload_tcp_segmentation() { stats.tx_lso_packets.increment(); } - if metadata.flags.vlan_enabled() { + if metadata.vlan.is_some() { stats.tx_vlan_packets.increment(); } diff --git a/vm/devices/net/netvsp/src/rndisprot.rs b/vm/devices/net/netvsp/src/rndisprot.rs index bd99ffc6c4..51fec82f80 100644 --- a/vm/devices/net/netvsp/src/rndisprot.rs +++ b/vm/devices/net/netvsp/src/rndisprot.rs @@ -718,7 +718,7 @@ impl EthVlanInfo { (self.0 as u8) & 0x3 } - pub fn canonical_format_id(self) -> u8 { + pub fn drop_eligible_indicator(self) -> u8 { (self.0 >> 3) as u8 & 0x1 } From 8c4f22c16263b8e682d5aa0bc49f85e655027cf9 Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Wed, 29 Apr 2026 00:48:33 +0000 Subject: [PATCH 11/19] Cleaning up after ripping out some changes. --- vm/devices/net/gdma/src/bnic.rs | 4 +- vm/devices/net/net_mana/src/test.rs | 117 +--------------------------- 2 files changed, 4 insertions(+), 117 deletions(-) diff --git a/vm/devices/net/gdma/src/bnic.rs b/vm/devices/net/gdma/src/bnic.rs index 2334f09e6c..d60805ed08 100644 --- a/vm/devices/net/gdma/src/bnic.rs +++ b/vm/devices/net/gdma/src/bnic.rs @@ -140,9 +140,9 @@ impl BufferAccess for GuestBuffers { }, } - if metadata.vlan_id != 0 { + if let Some(vlan) = &metadata.vlan { flags.set_rx_vlantag_present(true); - flags.set_rx_vlan_id(metadata.vlan_id as u32); + flags.set_rx_vlan_id(vlan.vlan_id as u32); } let packet = &mut self.rx_packets[id.0 as usize]; diff --git a/vm/devices/net/net_mana/src/test.rs b/vm/devices/net/net_mana/src/test.rs index 8315ee1571..a780e5dcf5 100644 --- a/vm/devices/net/net_mana/src/test.rs +++ b/vm/devices/net/net_mana/src/test.rs @@ -889,7 +889,7 @@ fn build_tx_segments_vlan( assert_eq!(packet_len % num_segments, 0); let tx_id = 1; let segment_len = packet_len / num_segments; - let mut tx_metadata = net_backend::TxMetadata { + let tx_metadata = net_backend::TxMetadata { id: TxId(tx_id), segment_count: num_segments as u8, len: packet_len as u32, @@ -897,10 +897,9 @@ fn build_tx_segments_vlan( l3_len: 20, // IPv4 header l4_len: 20, // TCP header max_segment_size: 1460, // Typical MSS for Ethernet - vlan_id, + vlan: Some(net_backend::VlanMetadata{ priority: 0, drop_eligible_indicator: 0, vlan_id}), ..Default::default() }; - tx_metadata.flags.set_vlan_enabled(true); let mut gpa = pkt_builder.data_len(); pkt_builder.push(TxSegment { @@ -1078,115 +1077,3 @@ fn get_queue_stats(queue_stats: Option<&dyn net_backend::BackendQueueStats>) -> ..Default::default() } } - -#[async_test] -async fn test_vlan_violation_triggers_fallback(driver: DefaultDriver) { - // Verify that a CQE_TX_VLAN_TAGGING_VIOLATION completion triggers the - // fallback signal through the Vport, which the emuplat layer would - // observe to initiate a VF-to-synthetic transition. - let pages = 256; - let mem = DeviceTestMemory::new(pages * 2, true, "test_vlan_fallback"); - let payload_mem = mem.payload_mem(); - - let msi_conn = MsiConnection::new(); - let device = gdma::GdmaDevice::new( - &VmTaskDriverSource::new(SingleDriverBackend::new(driver.clone())), - mem.guest_memory(), - msi_conn.target(), - vec![VportConfig { - mac_address: [1, 2, 3, 4, 5, 6].into(), - endpoint: Box::new(LoopbackEndpoint::new()), - }], - &mut ExternallyManagedMmioIntercepts, - ); - let device = EmulatedDevice::new(device, msi_conn, mem.dma_client()); - let dev_config = ManaQueryDeviceCfgResp { - pf_cap_flags1: 0.into(), - pf_cap_flags2: 0, - pf_cap_flags3: 0, - pf_cap_flags4: 0, - max_num_vports: 1, - reserved: 0, - max_num_eqs: 64, - }; - - let (fallback_tx, mut fallback_rx) = mesh::channel::<()>(); - let vport_state = mana_driver::mana::VportState::new(None, None, Some(fallback_tx)); - - let thing = ManaDevice::new(&driver, device, 1, 1, None).await.unwrap(); - let vport = thing - .new_vport(0, Some(vport_state), &dev_config) - .await - .unwrap(); - let mut endpoint = ManaEndpoint::new(driver.clone(), vport, GuestDmaMode::DirectDma).await; - let mut queues = Vec::new(); - let mut pool = net_backend::tests::Bufs::new(payload_mem.clone()); - endpoint - .get_queues( - vec![QueueConfig { - driver: Box::new(driver.clone()), - }], - None, - &mut queues, - ) - .await - .unwrap(); - - // Post initial RX buffers. - queues[0].rx_avail(&mut pool, &(1..128u32).map(RxId).collect::>()); - - // Build and send a single VLAN-tagged packet. The BNIC emulator will - // reject this because MANA does not support 802.1Q VLAN tag insertion. - let packet_len = 128; - let data_to_send = vec![0xABu8; packet_len]; - payload_mem.write_at(0, &data_to_send).unwrap(); - - let mut pkt_builder = TxPacketBuilder::new(); - build_tx_segments_vlan(packet_len, 1, 100, &mut pkt_builder); - queues[0] - .tx_avail(&mut pool, pkt_builder.segments()) - .unwrap(); - - // Poll for the TX completion. The GDMA emulator will respond with - // CQE_TX_VLAN_TAGGING_VIOLATION, which tx_poll handles by calling - // signal_fallback() on the vport. It does not return a TxError. - let mut tx_done = [TxId(0); 2]; - let mut tx_done_n = 0; - loop { - let mut context = CancelContext::new().with_timeout(Duration::from_secs(1)); - match context - .until_cancelled(poll_fn(|cx| queues[0].poll_ready(cx, &mut pool))) - .await - { - Err(CancelReason::DeadlineExceeded) => break, - Err(e) => panic!("Unexpected error polling queue: {e:?}"), - _ => {} - } - tx_done_n += queues[0] - .tx_poll(&mut pool, &mut tx_done[tx_done_n..]) - .unwrap_or(0); - if tx_done_n >= 1 { - break; - } - } - assert_eq!(tx_done_n, 1, "TX completion should be received"); - - // Verify stats reflect the VLAN violation. - let stats = get_queue_stats(queues[0].queue_stats()); - assert_eq!(stats.tx_errors.get(), 1, "tx_errors should be incremented"); - assert_eq!( - stats.tx_packets.get(), - 0, - "tx_packets should not be incremented for VLAN violations" - ); - - // Verify the fallback signal was sent through the channel. - let fallback_signal = fallback_rx.try_recv(); - assert!( - fallback_signal.is_ok(), - "fallback channel should have received a signal" - ); - - drop(queues); - endpoint.stop().await; -} From 078b0bd30b054412bff58c87512dcc9e25f38fa2 Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Wed, 29 Apr 2026 00:49:39 +0000 Subject: [PATCH 12/19] Format fixes --- vm/devices/net/net_mana/src/test.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vm/devices/net/net_mana/src/test.rs b/vm/devices/net/net_mana/src/test.rs index a780e5dcf5..b4dffa526e 100644 --- a/vm/devices/net/net_mana/src/test.rs +++ b/vm/devices/net/net_mana/src/test.rs @@ -897,7 +897,11 @@ fn build_tx_segments_vlan( l3_len: 20, // IPv4 header l4_len: 20, // TCP header max_segment_size: 1460, // Typical MSS for Ethernet - vlan: Some(net_backend::VlanMetadata{ priority: 0, drop_eligible_indicator: 0, vlan_id}), + vlan: Some(net_backend::VlanMetadata { + priority: 0, + drop_eligible_indicator: 0, + vlan_id, + }), ..Default::default() }; From e0f35e0a055fc65890f27bee31f6206333b756b8 Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Thu, 30 Apr 2026 15:50:54 -0700 Subject: [PATCH 13/19] Fixed the lack of Rx-side support for VLANs. --- vm/devices/net/netvsp/src/buffers.rs | 49 ++- vm/devices/net/netvsp/src/rndisprot.rs | 25 +- vm/devices/net/netvsp/src/test.rs | 443 ++++++++++++++++++++++--- 3 files changed, 463 insertions(+), 54 deletions(-) diff --git a/vm/devices/net/netvsp/src/buffers.rs b/vm/devices/net/netvsp/src/buffers.rs index 68a381a5e7..72d9da915b 100644 --- a/vm/devices/net/netvsp/src/buffers.rs +++ b/vm/devices/net/netvsp/src/buffers.rs @@ -150,16 +150,16 @@ impl BufferAccess for BufferPool { struct Header { header: rndisprot::MessageHeader, packet: rndisprot::Packet, - per_packet_info: PerPacketInfo, } #[repr(C)] #[derive(zerocopy::IntoBytes, Immutable, KnownLayout, Debug)] struct PerPacketInfo { header: rndisprot::PerPacketInfo, - checksum: rndisprot::RxTcpIpChecksumInfo, + payload: u32, } + let mut ppi_count = 1; let checksum = rndisprot::RxTcpIpChecksumInfo::new_zeroed() .set_ip_checksum_failed(metadata.ip_checksum == RxChecksumState::Bad) .set_ip_checksum_succeeded(metadata.ip_checksum.is_valid()) @@ -184,6 +184,32 @@ impl BufferAccess for BufferPool { .set_udp_checksum_succeeded( metadata.l4_protocol == L4Protocol::Udp && metadata.l4_checksum.is_valid(), ); + let checksum_ppi = PerPacketInfo { + header: rndisprot::PerPacketInfo { + size: size_of::() as u32, + typ: rndisprot::PPI_TCP_IP_CHECKSUM, + per_packet_information_offset: size_of::() as u32, + }, + payload: checksum.0, + }; + + let vlan = if let Some(vlan_info) = metadata.vlan { + ppi_count += 1; + Some(PerPacketInfo { + header: rndisprot::PerPacketInfo { + size: size_of::() as u32, + typ: rndisprot::PPI_VLAN, + per_packet_information_offset: size_of::() as u32, + }, + payload: rndisprot::EthVlanInfo::new_zeroed() + .set_priority(vlan_info.priority) + .set_drop_eligible_indicator(vlan_info.drop_eligible_indicator != 0) + .set_vlan_id(vlan_info.vlan_id) + .0, + }) + } else { + None + }; let header = Header { header: rndisprot::MessageHeader { @@ -202,21 +228,20 @@ impl BufferAccess for BufferPool { oob_data_length: 0, num_oob_data_elements: 0, per_packet_info_offset: size_of::() as u32, - per_packet_info_length: size_of::() as u32, + per_packet_info_length: ppi_count * size_of::() as u32, vc_handle: 0, reserved: 0, }, - per_packet_info: PerPacketInfo { - header: rndisprot::PerPacketInfo { - size: size_of::() as u32, - typ: rndisprot::PPI_TCP_IP_CHECKSUM, - per_packet_information_offset: size_of::() as u32, - }, - checksum, - }, }; - self.buffers.write_at(self.offset(id), header.as_bytes()); + let mut offset = self.offset(id); + self.buffers.write_at(offset, header.as_bytes()); + offset += size_of::
() as u32; + self.buffers.write_at(offset, checksum_ppi.as_bytes()); + offset += size_of::() as u32; + if let Some(vlan_ppi) = vlan { + self.buffers.write_at(offset, vlan_ppi.as_bytes()); + } } } diff --git a/vm/devices/net/netvsp/src/rndisprot.rs b/vm/devices/net/netvsp/src/rndisprot.rs index 51fec82f80..8509c4e887 100644 --- a/vm/devices/net/netvsp/src/rndisprot.rs +++ b/vm/devices/net/netvsp/src/rndisprot.rs @@ -714,15 +714,32 @@ impl TcpLsoInfo { pub struct EthVlanInfo(pub u32); impl EthVlanInfo { - pub fn priority(self) -> u8 { - (self.0 as u8) & 0x3 + /// priority is a 3-bit field, any bits outside the lower portion of the low + /// nybble are ignored + pub fn set_priority(mut self, priority: u8) -> Self { + self.0 = (self.0 & !0x7) | (priority as u32 & 0x7); + self + } + + pub fn priority(&self) -> u8 { + (self.0 as u8) & 0x7 } - pub fn drop_eligible_indicator(self) -> u8 { + pub fn set_drop_eligible_indicator(mut self, indicator: bool) -> Self { + self.0 = (self.0 & !0x8) | if indicator { 0x8 } else { 0x0 }; + self + } + + pub fn drop_eligible_indicator(&self) -> u8 { (self.0 >> 3) as u8 & 0x1 } - pub fn vlan_id(self) -> u16 { + pub fn set_vlan_id(mut self, vlan_id: u16) -> Self { + self.0 = (self.0 & !0xFFF0) | ((vlan_id as u32 & 0xFFF) << 4); + self + } + + pub fn vlan_id(&self) -> u16 { (self.0 >> 4) as u16 & 0xfff } } diff --git a/vm/devices/net/netvsp/src/test.rs b/vm/devices/net/netvsp/src/test.rs index 8d0c4dcb31..f05da598e9 100644 --- a/vm/devices/net/netvsp/src/test.rs +++ b/vm/devices/net/netvsp/src/test.rs @@ -29,10 +29,12 @@ use net_backend::BufferAccess; use net_backend::DisconnectableEndpoint; use net_backend::Endpoint; use net_backend::EndpointAction; +use net_backend::L4Protocol; use net_backend::MultiQueueSupport; use net_backend::Queue as NetQueue; use net_backend::QueueConfig; -use net_backend::RxBufferSegment; +use net_backend::RxChecksumState; +use net_backend::RxMetadata; use net_backend::TxError; use net_backend::TxOffloadSupport; use net_backend::null::NullEndpoint; @@ -140,7 +142,7 @@ struct TestNicEndpointState { pub vf_state: Option, pub stop_endpoint_counter: usize, pub link_status_updater: Option>>, - pub queues: Vec>>, + pub queues: Vec, RxMetadata)>>, /// When true (default), `TestNicQueue::tx_avail` returns `(true, N)` so /// TX packets are completed synchronously. When false it returns /// `(false, N)`, leaving packets in-flight. @@ -169,6 +171,20 @@ impl TestNicEndpointState { let status_vec = link_status.iter().copied().collect::>(); link_status_updater.send(status_vec); } + + /// Send an RX packet on the given queue with default (no offload) metadata. + pub fn send_rx(&self, queue_idx: usize, data: Vec) { + let metadata = RxMetadata { + len: data.len(), + ..Default::default() + }; + self.queues[queue_idx].send((data, metadata)); + } + + /// Send an RX packet on the given queue with explicit metadata. + pub fn send_rx_with_metadata(&self, queue_idx: usize, data: Vec, metadata: RxMetadata) { + self.queues[queue_idx].send((data, metadata)); + } } struct TestNicEndpointInner { @@ -343,19 +359,18 @@ struct TestNicQueue { #[inspect(skip)] rx_ids: VecDeque, #[inspect(skip)] - rx: mesh::Receiver>, + rx: mesh::Receiver<(Vec, RxMetadata)>, #[inspect(skip)] endpoint_state: Option>>, - next_rx_packet: Option>, - sync_tx: bool, #[inspect(skip)] - scratch_segments: Vec, + next_rx_packet: Option<(Vec, RxMetadata)>, + sync_tx: bool, } impl TestNicQueue { pub fn new( _config: QueueConfig, - rx: mesh::Receiver>, + rx: mesh::Receiver<(Vec, RxMetadata)>, sync_tx: bool, endpoint_state: Option>>, ) -> Self { @@ -365,7 +380,6 @@ impl TestNicQueue { endpoint_state, next_rx_packet: None, sync_tx, - scratch_segments: Vec::new(), } } } @@ -407,29 +421,24 @@ impl NetQueue for TestNicQueue { self.next_rx_packet = self.rx.try_recv().ok(); } - if let Some(packet) = self.next_rx_packet.take() { - let len = packet.len(); - assert!(len > 0); + if let Some((packet, metadata)) = self.next_rx_packet.take() { + assert!(!packet.is_empty(), "test RX packets must not be empty"); + assert_eq!( + metadata.len, + packet.len(), + "RxMetadata.len must match actual packet length" + ); let rx_id = self.rx_ids.pop_front().unwrap(); - tracing::info!(rx_id = rx_id.0, ?packet, "returning packet on receive path"); - let mut packet = &packet[..]; - self.scratch_segments.clear(); - pool.push_guest_addresses(rx_id, &mut self.scratch_segments); - let guest_memory = pool.guest_memory(); - for seg in &self.scratch_segments { - // N.B. The packet data is written after the implicit header, - // which is 256 bytes long. The header can be written with - // self.pool.write_header(...) if desired. - let write_len = packet.len().min(seg.len as usize); - tracing::info!(seg.gpa, write_len, "writing packet to guest memory"); - guest_memory - .write_at(seg.gpa, &packet[..write_len]) - .unwrap(); - packet = &packet[write_len..]; - if packet.is_empty() { - break; - } - } + assert!( + packet.len() <= pool.capacity(rx_id) as usize, + "test RX packet exceeds buffer capacity" + ); + tracing::info!( + rx_id = rx_id.0, + len = packet.len(), + "returning packet on receive path" + ); + pool.write_packet(rx_id, &metadata, &packet); packets[0] = rx_id; Ok(1) } else { @@ -1831,6 +1840,79 @@ impl RndisMessageParser { assert!(reader.skip(RX_HEADER_LEN).is_ok()); reader.read_plain::().unwrap() } + + /// Parse the per-packet info (PPI) entries from an RX data message. + /// Walks the PPI chain using the Packet header's offset/length fields, + /// matching each entry by type. + pub fn parse_rx_ppi(&self, external_ranges: &MultiPagedRangeBuf) -> RxPpiInfo { + let mut reader = PagedRanges::new(external_ranges.iter()).reader(&self.mem); + // Skip the MessageHeader to read the Packet struct. + assert!(reader.skip(size_of::()).is_ok()); + let packet: rndisprot::Packet = reader.read_plain().unwrap(); + + let ppi_offset = packet.per_packet_info_offset as usize; + let ppi_length = packet.per_packet_info_length as usize; + + if ppi_length == 0 { + return RxPpiInfo::default(); + } + + // Seek to the PPI area (relative to after MessageHeader). + let mut reader = PagedRanges::new(external_ranges.iter()).reader(&self.mem); + let ppi_start = size_of::() + ppi_offset; + assert!(reader.skip(ppi_start).is_ok()); + + let mut ppi_bytes = vec![0u8; ppi_length]; + reader.read(&mut ppi_bytes).unwrap(); + + let mut result = RxPpiInfo::default(); + let mut offset = 0usize; + while offset < ppi_length { + assert!( + offset + size_of::() <= ppi_length, + "PPI header extends past PPI region" + ); + let header = rndisprot::PerPacketInfo::read_from_prefix(&ppi_bytes[offset..]) + .unwrap() + .0; + assert!( + header.size as usize >= size_of::(), + "PPI entry size too small" + ); + assert!( + offset + header.size as usize <= ppi_length, + "PPI entry extends past PPI region" + ); + + let payload_start = offset + header.per_packet_information_offset as usize; + match header.typ { + rndisprot::PPI_TCP_IP_CHECKSUM => { + let value = u32::read_from_prefix(&ppi_bytes[payload_start..]) + .unwrap() + .0; + result.checksum = Some(rndisprot::RxTcpIpChecksumInfo(value)); + } + rndisprot::PPI_VLAN => { + let value = u32::read_from_prefix(&ppi_bytes[payload_start..]) + .unwrap() + .0; + result.vlan = Some(rndisprot::EthVlanInfo(value)); + } + _ => { + // Unknown PPI type — skip. + } + } + offset += header.size as usize; + } + result + } +} + +/// Parsed per-packet info from an RX RNDIS message. +#[derive(Default, Debug)] +struct RxPpiInfo { + pub checksum: Option, + pub vlan: Option, } enum TestVirtualFunctionStateChange { @@ -4412,8 +4494,8 @@ async fn send_rndis_set_packet_filter(driver: DefaultDriver) { // Send a packet on every queue. { let locked_state = endpoint_state.lock(); - for (idx, queue) in locked_state.queues.iter().enumerate() { - queue.send(vec![idx as u8]); + for idx in 0..locked_state.queues.len() { + locked_state.send_rx(idx, vec![idx as u8]); } } @@ -4452,8 +4534,8 @@ async fn send_rndis_set_packet_filter(driver: DefaultDriver) { // Send a packet on every queue. { let locked_state = endpoint_state.lock(); - for (idx, queue) in locked_state.queues.iter().enumerate() { - queue.send(vec![idx as u8]); + for idx in 0..locked_state.queues.len() { + locked_state.send_rx(idx, vec![idx as u8]); } } @@ -4521,8 +4603,8 @@ async fn send_rndis_set_packet_filter(driver: DefaultDriver) { // Test sending packets with the filter set to None. for _ in 0..2 { let locked_state = endpoint_state.lock(); - for (idx, queue) in locked_state.queues.iter().enumerate() { - queue.send(vec![idx as u8]); + for idx in 0..locked_state.queues.len() { + locked_state.send_rx(idx, vec![idx as u8]); } } @@ -5307,8 +5389,8 @@ async fn set_rss_parameter_bufs_not_evenly_divisible(driver: DefaultDriver) { // Receive a packet on every queue. { let locked_state = endpoint_state.lock(); - for (idx, queue) in locked_state.queues.iter().enumerate() { - queue.send(vec![idx as u8]); + for idx in 0..locked_state.queues.len() { + locked_state.send_rx(idx, vec![idx as u8]); } } @@ -6071,6 +6153,291 @@ async fn rndis_send_lso_packet_with_vlan_ppi(driver: DefaultDriver) { assert_eq!(metadata.max_segment_size, 1460); } +/// Helper to initialize RNDIS and set the packet filter on a channel so +/// that RX packets will be delivered to the guest. +async fn initialize_rndis_for_rx(channel: &mut TestNicChannel<'_>) { + channel + .send_rndis_control_message( + rndisprot::MESSAGE_TYPE_INITIALIZE_MSG, + rndisprot::InitializeRequest { + request_id: 1, + major_version: rndisprot::MAJOR_VERSION, + minor_version: rndisprot::MINOR_VERSION, + max_transfer_size: 0, + }, + &[], + ) + .await; + + let init_complete: rndisprot::InitializeComplete = channel + .read_rndis_control_message(rndisprot::MESSAGE_TYPE_INITIALIZE_CMPLT) + .await + .unwrap(); + assert_eq!(init_complete.status, rndisprot::STATUS_SUCCESS); + + // Set packet filter so RX packets are delivered to the guest. + channel + .send_rndis_control_message( + rndisprot::MESSAGE_TYPE_SET_MSG, + rndisprot::SetRequest { + request_id: 2, + oid: rndisprot::Oid::OID_GEN_CURRENT_PACKET_FILTER, + information_buffer_length: size_of::() as u32, + information_buffer_offset: size_of::() as u32, + device_vc_handle: 0, + }, + &rndisprot::NPROTO_PACKET_FILTER.to_le_bytes(), + ) + .await; + + let set_complete: rndisprot::SetComplete = channel + .read_rndis_control_message(rndisprot::MESSAGE_TYPE_SET_CMPLT) + .await + .unwrap(); + assert_eq!(set_complete.status, rndisprot::STATUS_SUCCESS); +} + +/// Helper to inject an RX packet on queue 0, read it from the guest channel, +/// parse the RNDIS PPI, and complete the transfer. +async fn inject_and_parse_rx( + channel: &mut TestNicChannel<'_>, + endpoint_state: &Arc>, + parser: &RndisMessageParser, + data: Vec, + metadata: RxMetadata, +) -> RxPpiInfo { + { + let locked_state = endpoint_state.lock(); + locked_state.send_rx_with_metadata(0, data, metadata); + } + + let (ppi, txid) = channel + .read_with(|packet| match packet { + IncomingPacket::Data(data) => { + let (_, external_ranges) = parser.parse_data_message(data); + let ppi = parser.parse_rx_ppi(&external_ranges); + let txid = data + .transaction_id() + .expect("data packets should have txid"); + (ppi, txid) + } + _ => panic!("Unexpected packet type on RX"), + }) + .await + .expect("RX data packet"); + + // Complete the transfer so the buffer is returned. + channel + .write(OutgoingPacket { + transaction_id: txid, + packet_type: OutgoingPacketType::Completion, + payload: &NvspMessage { + header: protocol::MessageHeader { + message_type: protocol::MESSAGE1_TYPE_SEND_RNDIS_PACKET_COMPLETE, + }, + data: protocol::Message1SendRndisPacketComplete { + status: protocol::Status::SUCCESS, + }, + padding: &[], + } + .payload(), + }) + .await; + + ppi +} + +#[async_test] +async fn rndis_rx_vlan_packet(driver: DefaultDriver) { + let endpoint_state = TestNicEndpointState::new(); + let endpoint = TestNicEndpoint::new(Some(endpoint_state.clone())); + let nic = Nic::builder().build( + &VmTaskDriverSource::new(SingleDriverBackend::new(driver.clone())), + Guid::new_random(), + Box::new(endpoint), + [1, 2, 3, 4, 5, 6].into(), + 0, + ); + + let mut nic = TestNicDevice::new_with_nic(&driver, nic).await; + nic.start_vmbus_channel(); + let mut channel = nic.connect_vmbus_channel().await; + channel + .initialize(0, protocol::NdisConfigCapabilities::new()) + .await; + initialize_rndis_for_rx(&mut channel).await; + + let parser = channel.rndis_message_parser(); + let data = vec![0xAA; 60]; + let metadata = RxMetadata { + len: data.len(), + vlan: Some(net_backend::VlanMetadata { + priority: 5, + drop_eligible_indicator: 1, + vlan_id: 100, + }), + ..Default::default() + }; + + let ppi = inject_and_parse_rx(&mut channel, &endpoint_state, &parser, data, metadata).await; + + let vlan = ppi.vlan.expect("VLAN PPI should be present"); + assert_eq!(vlan.vlan_id(), 100); + assert_eq!(vlan.priority(), 5); + assert_eq!(vlan.drop_eligible_indicator(), 1); + // Checksum PPI should also be present (always emitted). + assert!( + ppi.checksum.is_some(), + "checksum PPI should always be present" + ); +} + +#[async_test] +async fn rndis_rx_vlan_packet_with_tcp_checksum(driver: DefaultDriver) { + let endpoint_state = TestNicEndpointState::new(); + let endpoint = TestNicEndpoint::new(Some(endpoint_state.clone())); + let nic = Nic::builder().build( + &VmTaskDriverSource::new(SingleDriverBackend::new(driver.clone())), + Guid::new_random(), + Box::new(endpoint), + [1, 2, 3, 4, 5, 6].into(), + 0, + ); + + let mut nic = TestNicDevice::new_with_nic(&driver, nic).await; + nic.start_vmbus_channel(); + let mut channel = nic.connect_vmbus_channel().await; + channel + .initialize(0, protocol::NdisConfigCapabilities::new()) + .await; + initialize_rndis_for_rx(&mut channel).await; + + let parser = channel.rndis_message_parser(); + let data = vec![0xBB; 60]; + let metadata = RxMetadata { + len: data.len(), + ip_checksum: RxChecksumState::Good, + l4_checksum: RxChecksumState::Good, + l4_protocol: L4Protocol::Tcp, + vlan: Some(net_backend::VlanMetadata { + priority: 3, + drop_eligible_indicator: 0, + vlan_id: 42, + }), + ..Default::default() + }; + + let ppi = inject_and_parse_rx(&mut channel, &endpoint_state, &parser, data, metadata).await; + + // Verify VLAN PPI. + let vlan = ppi.vlan.expect("VLAN PPI should be present"); + assert_eq!(vlan.vlan_id(), 42); + assert_eq!(vlan.priority(), 3); + assert_eq!(vlan.drop_eligible_indicator(), 0); + + // Verify checksum PPI reports TCP checksum succeeded. + let csum = ppi.checksum.expect("checksum PPI should be present"); + assert!(csum.tcp_checksum_succeeded()); + assert!(csum.ip_checksum_succeeded()); + assert!(!csum.tcp_checksum_failed()); +} + +#[async_test] +async fn rndis_rx_packet_no_vlan(driver: DefaultDriver) { + let endpoint_state = TestNicEndpointState::new(); + let endpoint = TestNicEndpoint::new(Some(endpoint_state.clone())); + let nic = Nic::builder().build( + &VmTaskDriverSource::new(SingleDriverBackend::new(driver.clone())), + Guid::new_random(), + Box::new(endpoint), + [1, 2, 3, 4, 5, 6].into(), + 0, + ); + + let mut nic = TestNicDevice::new_with_nic(&driver, nic).await; + nic.start_vmbus_channel(); + let mut channel = nic.connect_vmbus_channel().await; + channel + .initialize(0, protocol::NdisConfigCapabilities::new()) + .await; + initialize_rndis_for_rx(&mut channel).await; + + let parser = channel.rndis_message_parser(); + let data = vec![0xCC; 60]; + let metadata = RxMetadata { + len: data.len(), + ..Default::default() + }; + + let ppi = inject_and_parse_rx(&mut channel, &endpoint_state, &parser, data, metadata).await; + + assert!( + ppi.vlan.is_none(), + "VLAN PPI should not be present when no VLAN metadata is set" + ); + assert!( + ppi.checksum.is_some(), + "checksum PPI should always be present" + ); +} + +#[async_test] +async fn rndis_rx_vlan_preserves_packet_data(driver: DefaultDriver) { + let endpoint_state = TestNicEndpointState::new(); + let endpoint = TestNicEndpoint::new(Some(endpoint_state.clone())); + let nic = Nic::builder().build( + &VmTaskDriverSource::new(SingleDriverBackend::new(driver.clone())), + Guid::new_random(), + Box::new(endpoint), + [1, 2, 3, 4, 5, 6].into(), + 0, + ); + + let mut nic = TestNicDevice::new_with_nic(&driver, nic).await; + nic.start_vmbus_channel(); + let mut channel = nic.connect_vmbus_channel().await; + channel + .initialize(0, protocol::NdisConfigCapabilities::new()) + .await; + initialize_rndis_for_rx(&mut channel).await; + + let parser = channel.rndis_message_parser(); + let data = vec![0xDD; 60]; + let metadata = RxMetadata { + len: data.len(), + vlan: Some(net_backend::VlanMetadata { + priority: 7, + drop_eligible_indicator: 0, + vlan_id: 4094, + }), + ..Default::default() + }; + + { + let locked_state = endpoint_state.lock(); + locked_state.send_rx_with_metadata(0, data.clone(), metadata); + } + + channel + .read_with(|packet| match packet { + IncomingPacket::Data(data_packet) => { + let (_, external_ranges) = parser.parse_data_message(data_packet); + // Verify the packet data is intact after the 256-byte RNDIS header. + let received: [u8; 60] = parser.get_data_packet_content(&external_ranges); + assert_eq!(&received[..], &data[..], "packet data should be preserved"); + + // Also verify the VLAN PPI. + let ppi = parser.parse_rx_ppi(&external_ranges); + let vlan = ppi.vlan.expect("VLAN PPI should be present"); + assert_eq!(vlan.vlan_id(), 4094); + assert_eq!(vlan.priority(), 7); + } + _ => panic!("Unexpected packet type on RX"), + }) + .await + .expect("RX data packet"); +} + /// Helper: builds an RSS-enable parameter block that the set_rss_parameter /// OID path accepts. fn build_rss_enable_params() -> Vec { From b43accc4c89f3f37591c1efe73b4c75f33ccd4ec Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Fri, 1 May 2026 22:12:59 +0000 Subject: [PATCH 14/19] Ensure GDMA/bnic side is functioning correctly, and include additional test coverage. --- Cargo.lock | 1 + vm/devices/net/gdma/src/bnic.rs | 29 +- vm/devices/net/mana_driver/src/tests.rs | 3 - vm/devices/net/net_backend/src/tests.rs | 7 + vm/devices/net/net_mana/Cargo.toml | 1 + vm/devices/net/net_mana/src/test.rs | 425 ++++++++++++++++++++++++ vm/devices/net/netvsp/src/lib.rs | 22 +- 7 files changed, 450 insertions(+), 38 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c456516676..27c0b0597b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4627,6 +4627,7 @@ dependencies = [ "mesh", "net_backend", "pal_async", + "parking_lot", "pci_core", "safeatomic", "test_with_tracing", diff --git a/vm/devices/net/gdma/src/bnic.rs b/vm/devices/net/gdma/src/bnic.rs index d60805ed08..b3f6589dff 100644 --- a/vm/devices/net/gdma/src/bnic.rs +++ b/vm/devices/net/gdma/src/bnic.rs @@ -4,7 +4,6 @@ use self::bnic_defs::CQE_RX_TRUNCATED; use self::bnic_defs::CQE_TX_GDMA_ERR; use self::bnic_defs::CQE_TX_OKAY; -use self::bnic_defs::CQE_TX_VLAN_TAGGING_VIOLATION; use self::bnic_defs::MANA_CQE_COMPLETION; use self::bnic_defs::ManaCommandCode; use self::bnic_defs::ManaCqeHeader; @@ -554,14 +553,6 @@ impl TxRxTask { let sge0 = sqe.sgl().first().context("no sgl")?; - // Real MANA hardware rejects packets that request 802.1Q VLAN tag - // insertion, since MANA does not support VLANs. Emulate this by - // inspecting the long OOB field that the guest driver sets. - if oob.l_oob.inject_vlan_pri_tag() { - self.post_tx_completion_vlan_error(); - return Ok(()); - } - let total_len: usize = sqe.sgl().iter().map(|sge| sge.size as usize).sum(); let mut meta = TxMetadata { id: TxId(0), @@ -576,8 +567,16 @@ impl TxRxTask { l2_len: 14, l3_len: oob.s_oob.trans_off().clamp(14, 255) - 14, l4_len: 0, + tcp_header_offset: 0, max_segment_size: 0, - ..Default::default() + vlan: oob + .l_oob + .inject_vlan_pri_tag() + .then(|| net_backend::VlanMetadata { + priority: 0, + drop_eligible_indicator: 0, + vlan_id: oob.l_oob.vlan_id(), + }), }; if sqe.header.params.client_oob_in_sgl() { @@ -636,18 +635,10 @@ impl TxRxTask { // Possible test improvement: provide proper OOB data for the GDMA error. fn post_tx_completion_error(&mut self) { - self.post_tx_completion_with_type(CQE_TX_GDMA_ERR); - } - - fn post_tx_completion_vlan_error(&mut self) { - self.post_tx_completion_with_type(CQE_TX_VLAN_TAGGING_VIOLATION); - } - - fn post_tx_completion_with_type(&mut self, cqe_type: u8) { let tx_oob = ManaTxCompOob { cqe_hdr: ManaCqeHeader::new() .with_client_type(MANA_CQE_COMPLETION) - .with_cqe_type(cqe_type), + .with_cqe_type(CQE_TX_GDMA_ERR), tx_data_offset: 0, offsets: ManaTxCompOobOffsets::new(), reserved: [0; 12], diff --git a/vm/devices/net/mana_driver/src/tests.rs b/vm/devices/net/mana_driver/src/tests.rs index fef7c03dcf..44efe6b772 100644 --- a/vm/devices/net/mana_driver/src/tests.rs +++ b/vm/devices/net/mana_driver/src/tests.rs @@ -37,7 +37,6 @@ async fn test_gdma(driver: DefaultDriver) { vec![VportConfig { mac_address: [1, 2, 3, 4, 5, 6].into(), endpoint: Box::new(NullEndpoint::new()), - reject_tx_with_vlan_error: false, }], &mut ExternallyManagedMmioIntercepts, ); @@ -176,7 +175,6 @@ async fn test_gdma_save_restore(driver: DefaultDriver) { vec![VportConfig { mac_address: [1, 2, 3, 4, 5, 6].into(), endpoint: Box::new(NullEndpoint::new()), - reject_tx_with_vlan_error: false, }], &mut ExternallyManagedMmioIntercepts, ); @@ -325,7 +323,6 @@ async fn test_gdma_reconfig_vf(driver: DefaultDriver) { vec![VportConfig { mac_address: [1, 2, 3, 4, 5, 6].into(), endpoint: Box::new(NullEndpoint::new()), - reject_tx_with_vlan_error: false, }], &mut ExternallyManagedMmioIntercepts, ); diff --git a/vm/devices/net/net_backend/src/tests.rs b/vm/devices/net/net_backend/src/tests.rs index fa4ab40667..abe218153e 100644 --- a/vm/devices/net/net_backend/src/tests.rs +++ b/vm/devices/net/net_backend/src/tests.rs @@ -75,3 +75,10 @@ impl BufferAccess for Bufs { *self.inner.rx_metadata[id.0 as usize].lock() = Some(*metadata); } } + +impl Bufs { + /// Returns the [`RxMetadata`] written for the given receive buffer, if any. + pub fn rx_metadata(&self, id: RxId) -> Option { + *self.inner.rx_metadata[id.0 as usize].lock() + } +} diff --git a/vm/devices/net/net_mana/Cargo.toml b/vm/devices/net/net_mana/Cargo.toml index c8870908e2..43421182d1 100644 --- a/vm/devices/net/net_mana/Cargo.toml +++ b/vm/devices/net/net_mana/Cargo.toml @@ -32,6 +32,7 @@ zerocopy.workspace = true [dev-dependencies] chipset_device.workspace = true gdma.workspace = true +parking_lot.workspace = true pci_core.workspace = true test_with_tracing.workspace = true user_driver_emulated_mock.workspace = true diff --git a/vm/devices/net/net_mana/src/test.rs b/vm/devices/net/net_mana/src/test.rs index b4dffa526e..bfecd90270 100644 --- a/vm/devices/net/net_mana/src/test.rs +++ b/vm/devices/net/net_mana/src/test.rs @@ -7,9 +7,11 @@ use crate::GuestDmaMode; use crate::ManaEndpoint; use crate::ManaTestConfiguration; use crate::QueueStats; +use async_trait::async_trait; use chipset_device::mmio::ExternallyManagedMmioIntercepts; use gdma::VportConfig; use gdma_defs::bnic::ManaQueryDeviceCfgResp; +use inspect::InspectMut; use inspect_counters::Counter; use mana_driver::mana::ManaDevice; use mesh::CancelContext; @@ -24,7 +26,11 @@ use net_backend::loopback::LoopbackEndpoint; use pal_async::DefaultDriver; use pal_async::async_test; use pci_core::msi::MsiConnection; +use std::collections::VecDeque; use std::future::poll_fn; +use std::sync::Arc; +use std::task::Context; +use std::task::Poll; use std::time::Duration; use test_with_tracing::test; use user_driver_emulated_mock::DeviceTestMemory; @@ -1081,3 +1087,422 @@ fn get_queue_stats(queue_stats: Option<&dyn net_backend::BackendQueueStats>) -> ..Default::default() } } + +// --------------------------------------------------------------------------- +// VLAN-preserving loopback endpoint +// --------------------------------------------------------------------------- + +/// Shared state capturing TX metadata from the GDMA emulator, accessible to +/// test assertions. +#[derive(Clone)] +struct VlanTestState { + inner: Arc>, +} + +#[derive(Default)] +struct VlanTestStateInner { + /// TX metadata captured from each transmitted packet, in order. + tx_metadata: Vec, +} + +impl VlanTestState { + fn new() -> Self { + Self { + inner: Arc::new(parking_lot::Mutex::new(VlanTestStateInner::default())), + } + } + + fn tx_metadata(&self) -> Vec { + self.inner.lock().tx_metadata.clone() + } +} + +/// A loopback endpoint that preserves VLAN metadata from TX → RX and captures +/// every transmitted packet's [`TxMetadata`] for later inspection. +struct VlanPreservingEndpoint { + state: VlanTestState, +} + +impl VlanPreservingEndpoint { + fn new(state: VlanTestState) -> Self { + Self { state } + } +} + +struct VlanPreservingQueue { + rx_avail: VecDeque, + rx_done: VecDeque, + state: VlanTestState, +} + +#[async_trait] +impl Endpoint for VlanPreservingEndpoint { + fn endpoint_type(&self) -> &'static str { + "vlan_loopback" + } + + async fn get_queues( + &mut self, + config: Vec, + _rss: Option<&net_backend::RssConfig<'_>>, + queues: &mut Vec>, + ) -> anyhow::Result<()> { + queues.extend(config.into_iter().map(|_config| { + Box::new(VlanPreservingQueue { + rx_avail: VecDeque::new(), + rx_done: VecDeque::new(), + state: self.state.clone(), + }) as _ + })); + Ok(()) + } + + async fn stop(&mut self) {} + + fn is_ordered(&self) -> bool { + true + } + + fn multiqueue_support(&self) -> net_backend::MultiQueueSupport { + net_backend::MultiQueueSupport { + max_queues: u16::MAX, + indirection_table_size: 64, + } + } +} + +impl InspectMut for VlanPreservingEndpoint { + fn inspect_mut(&mut self, _req: inspect::Request<'_>) {} +} + +impl InspectMut for VlanPreservingQueue { + fn inspect_mut(&mut self, _req: inspect::Request<'_>) {} +} + +impl net_backend::Queue for VlanPreservingQueue { + fn poll_ready(&mut self, _cx: &mut Context<'_>, _pool: &mut dyn BufferAccess) -> Poll<()> { + if self.rx_done.is_empty() { + Poll::Pending + } else { + Poll::Ready(()) + } + } + + fn rx_avail(&mut self, _pool: &mut dyn BufferAccess, done: &[RxId]) { + self.rx_avail.extend(done); + } + + fn rx_poll( + &mut self, + _pool: &mut dyn BufferAccess, + packets: &mut [RxId], + ) -> anyhow::Result { + let n = packets.len().min(self.rx_done.len()); + for (d, s) in packets.iter_mut().zip(self.rx_done.drain(..n)) { + *d = s; + } + Ok(n) + } + + fn tx_avail( + &mut self, + pool: &mut dyn BufferAccess, + mut segments: &[TxSegment], + ) -> anyhow::Result<(bool, usize)> { + let mut sent = 0; + while !segments.is_empty() && !self.rx_avail.is_empty() { + let (meta, _, _) = net_backend::next_packet(segments); + let vlan = meta.vlan; + { + let mut state = self.state.inner.lock(); + state.tx_metadata.push(meta.clone()); + } + let before = segments.len(); + let packet = net_backend::linearize(pool, &mut segments)?; + sent += before - segments.len(); + let rx_id = self.rx_avail.pop_front().unwrap(); + pool.write_packet( + rx_id, + &net_backend::RxMetadata { + offset: 0, + len: packet.len(), + vlan, + ..Default::default() + }, + &packet, + ); + self.rx_done.push_back(rx_id); + } + Ok((true, sent)) + } + + fn tx_poll( + &mut self, + _pool: &mut dyn BufferAccess, + _done: &mut [TxId], + ) -> Result { + Ok(0) + } +} + +/// Run a VLAN-aware send/receive test. Returns the captured TX metadata (from +/// the GDMA→backend boundary) and the per-buffer RX metadata (from the +/// net_mana→pool boundary), along with queue stats. +async fn test_vlan_endpoint( + driver: DefaultDriver, + dma_mode: GuestDmaMode, + pkt_builder: &TxPacketBuilder, + expected_num_send_packets: usize, + expected_num_received_packets: usize, +) -> ( + QueueStats, + Vec, + Vec>, +) { + let pages = 256; + let allow_dma = dma_mode == GuestDmaMode::DirectDma; + let mem: DeviceTestMemory = DeviceTestMemory::new(pages * 2, allow_dma, "test_vlan_endpoint"); + let payload_mem = mem.payload_mem(); + let data_to_send = pkt_builder.packet_data(); + let tx_segments = pkt_builder.segments(); + + let vlan_state = VlanTestState::new(); + let msi_conn = MsiConnection::new(); + let device = gdma::GdmaDevice::new( + &VmTaskDriverSource::new(SingleDriverBackend::new(driver.clone())), + mem.guest_memory(), + msi_conn.target(), + vec![VportConfig { + mac_address: [1, 2, 3, 4, 5, 6].into(), + endpoint: Box::new(VlanPreservingEndpoint::new(vlan_state.clone())), + }], + &mut ExternallyManagedMmioIntercepts, + ); + let device = EmulatedDevice::new(device, msi_conn, mem.dma_client()); + let dev_config = ManaQueryDeviceCfgResp { + pf_cap_flags1: 0.into(), + pf_cap_flags2: 0, + pf_cap_flags3: 0, + pf_cap_flags4: 0, + max_num_vports: 1, + reserved: 0, + max_num_eqs: 64, + adapter_mtu: 0, + reserved2: 0, + adapter_link_speed_mbps: 0, + }; + let thing = ManaDevice::new(&driver, device, 1, 1, None).await.unwrap(); + let vport = thing.new_vport(0, None, &dev_config).await.unwrap(); + let mut endpoint = ManaEndpoint::new(driver.clone(), vport, dma_mode).await; + let mut queues = Vec::new(); + let mut pool = net_backend::tests::Bufs::new(payload_mem.clone()); + endpoint + .get_queues( + vec![QueueConfig { + driver: Box::new(driver.clone()), + }], + None, + &mut queues, + ) + .await + .unwrap(); + + queues[0].rx_avail(&mut pool, &(1..128u32).map(RxId).collect::>()); + + payload_mem.write_at(0, &data_to_send).unwrap(); + + queues[0].tx_avail(&mut pool, tx_segments).unwrap(); + + let mut rx_packets = (0..expected_num_received_packets.max(2)) + .map(|i| RxId(i as u32)) + .collect::>(); + let mut rx_packets_n = 0; + let mut tx_done = vec![TxId(0); expected_num_send_packets.max(2)]; + let mut tx_done_n = 0; + + let done = |rx_n: usize, tx_n: usize| -> bool { + rx_n >= expected_num_received_packets && tx_n >= expected_num_send_packets + }; + + loop { + let mut context = CancelContext::new().with_timeout(Duration::from_secs(1)); + match context + .until_cancelled(poll_fn(|cx| queues[0].poll_ready(cx, &mut pool))) + .await + { + Err(CancelReason::DeadlineExceeded) => break, + Err(e) => { + tracing::error!(error = ?e, "Failed to poll queue ready"); + break; + } + _ => {} + } + rx_packets_n += queues[0] + .rx_poll(&mut pool, &mut rx_packets[rx_packets_n..]) + .unwrap(); + tx_done_n += queues[0] + .tx_poll(&mut pool, &mut tx_done[tx_done_n..]) + .unwrap_or(0); + if done(rx_packets_n, tx_done_n) { + break; + } + } + assert_eq!(rx_packets_n, expected_num_received_packets); + assert_eq!(tx_done_n, expected_num_send_packets); + + // Gather per-buffer RX metadata written by net_mana. + let rx_meta: Vec> = rx_packets[..rx_packets_n] + .iter() + .map(|id| pool.rx_metadata(*id)) + .collect(); + + let stats = get_queue_stats(queues[0].queue_stats()); + let captured_tx = vlan_state.tx_metadata(); + drop(queues); + endpoint.stop().await; + (stats, captured_tx, rx_meta) +} + +// --------------------------------------------------------------------------- +// VLAN tests +// --------------------------------------------------------------------------- + +/// Verify that a single VLAN-tagged packet round-trips through the MANA TX and +/// RX paths with the VLAN ID preserved. +#[async_test] +async fn test_vlan_tx_rx_roundtrip_direct_dma(driver: DefaultDriver) { + let mut pkt_builder = TxPacketBuilder::new(); + build_tx_segments_vlan(1138, 1, 42, &mut pkt_builder); + + let (stats, captured_tx, rx_meta) = test_vlan_endpoint( + driver, + GuestDmaMode::DirectDma, + &pkt_builder, + 1, // expected TX + 1, // expected RX + ) + .await; + + assert_eq!(stats.tx_packets.get(), 1); + assert_eq!(stats.rx_packets.get(), 1); + + // TX: GDMA decoded the OOB and the backend received VLAN metadata. + assert_eq!(captured_tx.len(), 1); + let tx_vlan = captured_tx[0].vlan.expect("TX metadata should carry VLAN"); + assert_eq!(tx_vlan.vlan_id, 42); + + // RX: net_mana parsed the CQE and surfaced the VLAN to the pool. + let rx = rx_meta[0].expect("RX metadata should be present"); + let rx_vlan = rx.vlan.expect("RX metadata should carry VLAN"); + assert_eq!(rx_vlan.vlan_id, 42); +} + +/// Same round-trip but with bounce-buffer DMA mode. +#[async_test] +async fn test_vlan_tx_rx_roundtrip_bounce_buffer(driver: DefaultDriver) { + let mut pkt_builder = TxPacketBuilder::new(); + build_tx_segments_vlan(1138, 1, 99, &mut pkt_builder); + + let (stats, captured_tx, rx_meta) = + test_vlan_endpoint(driver, GuestDmaMode::BounceBuffer, &pkt_builder, 1, 1).await; + + assert_eq!(stats.tx_packets.get(), 1); + assert_eq!(stats.rx_packets.get(), 1); + + let tx_vlan = captured_tx[0].vlan.expect("TX metadata should carry VLAN"); + assert_eq!(tx_vlan.vlan_id, 99); + + let rx_vlan = rx_meta[0] + .expect("RX metadata should be present") + .vlan + .expect("RX metadata should carry VLAN"); + assert_eq!(rx_vlan.vlan_id, 99); +} + +/// Verify that a non-VLAN packet does NOT produce VLAN metadata. +#[async_test] +async fn test_no_vlan_rx_metadata_when_untagged(driver: DefaultDriver) { + let mut pkt_builder = TxPacketBuilder::new(); + build_tx_segments(1138, 1, false, &mut pkt_builder); + + let (_stats, captured_tx, rx_meta) = + test_vlan_endpoint(driver, GuestDmaMode::DirectDma, &pkt_builder, 1, 1).await; + + assert!( + captured_tx[0].vlan.is_none(), + "TX metadata must not carry VLAN for an untagged packet" + ); + + let rx = rx_meta[0].expect("RX metadata should be present"); + assert!( + rx.vlan.is_none(), + "RX metadata must not carry VLAN for an untagged packet" + ); +} + +/// Mix of VLAN-tagged and untagged packets in a single TX batch. +#[async_test] +async fn test_vlan_mixed_batch(driver: DefaultDriver) { + let mut pkt_builder = TxPacketBuilder::new(); + + // Packet 0: no VLAN + build_tx_segments(550, 1, false, &mut pkt_builder); + // Packet 1: VLAN 100 + build_tx_segments_vlan(550, 1, 100, &mut pkt_builder); + // Packet 2: no VLAN, multi-segment + build_tx_segments(1130, 10, false, &mut pkt_builder); + // Packet 3: VLAN 4094 (max 12-bit value) + build_tx_segments_vlan(550, 1, 4094, &mut pkt_builder); + + let (stats, captured_tx, rx_meta) = + test_vlan_endpoint(driver, GuestDmaMode::DirectDma, &pkt_builder, 4, 4).await; + + assert_eq!(stats.tx_packets.get(), 4); + assert_eq!(stats.rx_packets.get(), 4); + + // Packet 0: no VLAN + assert!(captured_tx[0].vlan.is_none()); + assert!( + rx_meta[0] + .expect("RX metadata should be present") + .vlan + .is_none() + ); + + // Packet 1: VLAN 100 + assert_eq!( + captured_tx[1].vlan.expect("TX should carry VLAN").vlan_id, + 100 + ); + assert_eq!( + rx_meta[1] + .expect("RX metadata should be present") + .vlan + .expect("RX should carry VLAN") + .vlan_id, + 100 + ); + + // Packet 2: no VLAN + assert!(captured_tx[2].vlan.is_none()); + assert!( + rx_meta[2] + .expect("RX metadata should be present") + .vlan + .is_none() + ); + + // Packet 3: VLAN 4094 + assert_eq!( + captured_tx[3].vlan.expect("TX should carry VLAN").vlan_id, + 4094 + ); + assert_eq!( + rx_meta[3] + .expect("RX metadata should be present") + .vlan + .expect("RX should carry VLAN") + .vlan_id, + 4094 + ); +} diff --git a/vm/devices/net/netvsp/src/lib.rs b/vm/devices/net/netvsp/src/lib.rs index 0d3fd464c4..5bda62f22b 100644 --- a/vm/devices/net/netvsp/src/lib.rs +++ b/vm/devices/net/netvsp/src/lib.rs @@ -2587,26 +2587,16 @@ impl NetChannel { if metadata.flags.offload_tcp_checksum() { // The offset must be set if a checksum is being captured. - if metadata.tcp_header_offset < metadata.l2_len as u16 { - return Err(WorkerError::InvalidTcpHeaderOffset( - metadata.tcp_header_offset, - )); - } else if metadata.flags.is_ipv4() - && metadata.tcp_header_offset < (metadata.l2_len as u16 + 20) - { - return Err(WorkerError::InvalidTcpHeaderOffset( - metadata.tcp_header_offset, - )); - } else if metadata.flags.is_ipv6() - && metadata.tcp_header_offset < (metadata.l2_len as u16 + 40) + if (metadata.tcp_header_offset < metadata.l2_len as u16) + || (metadata.flags.is_ipv4() + && metadata.tcp_header_offset < (metadata.l2_len as u16 + 20)) + || (metadata.flags.is_ipv6() + && metadata.tcp_header_offset < (metadata.l2_len as u16 + 40)) + || (metadata.tcp_header_offset as u32 >= request.data_length) { return Err(WorkerError::InvalidTcpHeaderOffset( metadata.tcp_header_offset, )); - } else if metadata.tcp_header_offset as u32 >= request.data_length { - return Err(WorkerError::InvalidTcpHeaderOffset( - metadata.tcp_header_offset, - )); } metadata.l3_len = metadata.tcp_header_offset - metadata.l2_len as u16; From 8d02b15dd3aee905dd95ed7589dd6c66dc93af90 Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Fri, 1 May 2026 23:06:07 +0000 Subject: [PATCH 15/19] Cleaning up loose ends --- vm/devices/net/gdma/src/bnic.rs | 3 +-- vm/devices/net/net_backend/src/lib.rs | 7 +++++-- vm/devices/net/net_mana/src/lib.rs | 16 ++++++---------- vm/devices/net/net_mana/src/test.rs | 2 +- vm/devices/net/netvsp/src/buffers.rs | 2 +- vm/devices/net/netvsp/src/rndisprot.rs | 7 +++++-- vm/devices/net/netvsp/src/test.rs | 10 +++++----- 7 files changed, 24 insertions(+), 23 deletions(-) diff --git a/vm/devices/net/gdma/src/bnic.rs b/vm/devices/net/gdma/src/bnic.rs index b3f6589dff..5c5f346e5e 100644 --- a/vm/devices/net/gdma/src/bnic.rs +++ b/vm/devices/net/gdma/src/bnic.rs @@ -552,7 +552,6 @@ impl TxRxTask { }; let sge0 = sqe.sgl().first().context("no sgl")?; - let total_len: usize = sqe.sgl().iter().map(|sge| sge.size as usize).sum(); let mut meta = TxMetadata { id: TxId(0), @@ -574,7 +573,7 @@ impl TxRxTask { .inject_vlan_pri_tag() .then(|| net_backend::VlanMetadata { priority: 0, - drop_eligible_indicator: 0, + drop_eligible_indicator: false, vlan_id: oob.l_oob.vlan_id(), }), }; diff --git a/vm/devices/net/net_backend/src/lib.rs b/vm/devices/net/net_backend/src/lib.rs index 3eb1a1a182..33c7997d7d 100644 --- a/vm/devices/net/net_backend/src/lib.rs +++ b/vm/devices/net/net_backend/src/lib.rs @@ -285,8 +285,8 @@ pub trait BufferAccess { pub struct VlanMetadata { /// Priority for 802.1Q. Actually a 3-bit value. pub priority: u8, - /// This should be 0. - pub drop_eligible_indicator: u8, + /// In pretty much every circumstance this is false. + pub drop_eligible_indicator: bool, /// The 802.1Q ID for this transmission. Actually a 12-bit value. pub vlan_id: u16, } @@ -418,6 +418,9 @@ pub struct TxMetadata { /// Only guaranteed to be set if [`TxFlags::offload_tcp_segmentation`] or /// [`TxFlags::offload_udp_segmentation`] is set. pub max_segment_size: u16, + /// Information about 802.1Q VLAN tagging. When a vlan is in use, this structure + /// is populated. Only applies when traffic is being received over an L2 connection, + /// so L3-only or above traffic will not use this option. pub vlan: Option, } diff --git a/vm/devices/net/net_mana/src/lib.rs b/vm/devices/net/net_mana/src/lib.rs index 6c55116315..3e9cb37ef9 100644 --- a/vm/devices/net/net_mana/src/lib.rs +++ b/vm/devices/net/net_mana/src/lib.rs @@ -966,15 +966,11 @@ impl Queue for ManaQueue { } else { (L4Protocol::Unknown, RxChecksumState::Unknown) }; - let vlantag = if rx_oob.flags.rx_vlantag_present() { - Some(VlanMetadata { - drop_eligible_indicator: 0, - priority: 0, - vlan_id: rx_oob.flags.rx_vlan_id() as u16, - }) - } else { - None - }; + let vlantag = rx_oob.flags.rx_vlantag_present().then(|| VlanMetadata { + drop_eligible_indicator: false, + priority: 0, + vlan_id: rx_oob.flags.rx_vlan_id() as u16, + }); let len = rx_oob.ppi[0].pkt_len.into(); pool.write_header( rx.id, @@ -1181,7 +1177,7 @@ impl ManaQueue { oob.l_oob.set_inject_vlan_pri_tag(true); oob.l_oob.set_vlan_id(vlan.vlan_id); oob.l_oob.set_pcp(vlan.priority); - oob.l_oob.set_dei(vlan.drop_eligible_indicator != 0); + oob.l_oob.set_dei(vlan.drop_eligible_indicator); } let short_format = self.vp_offset <= 0xff && !meta.vlan.is_some(); if short_format { diff --git a/vm/devices/net/net_mana/src/test.rs b/vm/devices/net/net_mana/src/test.rs index bfecd90270..ddf8a671f7 100644 --- a/vm/devices/net/net_mana/src/test.rs +++ b/vm/devices/net/net_mana/src/test.rs @@ -905,7 +905,7 @@ fn build_tx_segments_vlan( max_segment_size: 1460, // Typical MSS for Ethernet vlan: Some(net_backend::VlanMetadata { priority: 0, - drop_eligible_indicator: 0, + drop_eligible_indicator: false, vlan_id, }), ..Default::default() diff --git a/vm/devices/net/netvsp/src/buffers.rs b/vm/devices/net/netvsp/src/buffers.rs index 72d9da915b..669f97df94 100644 --- a/vm/devices/net/netvsp/src/buffers.rs +++ b/vm/devices/net/netvsp/src/buffers.rs @@ -203,7 +203,7 @@ impl BufferAccess for BufferPool { }, payload: rndisprot::EthVlanInfo::new_zeroed() .set_priority(vlan_info.priority) - .set_drop_eligible_indicator(vlan_info.drop_eligible_indicator != 0) + .set_drop_eligible_indicator(vlan_info.drop_eligible_indicator) .set_vlan_id(vlan_info.vlan_id) .0, }) diff --git a/vm/devices/net/netvsp/src/rndisprot.rs b/vm/devices/net/netvsp/src/rndisprot.rs index 8509c4e887..56a6e4f5be 100644 --- a/vm/devices/net/netvsp/src/rndisprot.rs +++ b/vm/devices/net/netvsp/src/rndisprot.rs @@ -725,15 +725,18 @@ impl EthVlanInfo { (self.0 as u8) & 0x7 } + /// In practical use this should always be false, but who knows? pub fn set_drop_eligible_indicator(mut self, indicator: bool) -> Self { self.0 = (self.0 & !0x8) | if indicator { 0x8 } else { 0x0 }; self } - pub fn drop_eligible_indicator(&self) -> u8 { - (self.0 >> 3) as u8 & 0x1 + pub fn drop_eligible_indicator(&self) -> bool { + self.0 & 0x8 != 0 } + /// VLAN IDs are 12 bits. This will silently reject any bits outside of + /// the range. pub fn set_vlan_id(mut self, vlan_id: u16) -> Self { self.0 = (self.0 & !0xFFF0) | ((vlan_id as u32 & 0xFFF) << 4); self diff --git a/vm/devices/net/netvsp/src/test.rs b/vm/devices/net/netvsp/src/test.rs index f05da598e9..b6088f80e9 100644 --- a/vm/devices/net/netvsp/src/test.rs +++ b/vm/devices/net/netvsp/src/test.rs @@ -6273,7 +6273,7 @@ async fn rndis_rx_vlan_packet(driver: DefaultDriver) { len: data.len(), vlan: Some(net_backend::VlanMetadata { priority: 5, - drop_eligible_indicator: 1, + drop_eligible_indicator: true, vlan_id: 100, }), ..Default::default() @@ -6284,7 +6284,7 @@ async fn rndis_rx_vlan_packet(driver: DefaultDriver) { let vlan = ppi.vlan.expect("VLAN PPI should be present"); assert_eq!(vlan.vlan_id(), 100); assert_eq!(vlan.priority(), 5); - assert_eq!(vlan.drop_eligible_indicator(), 1); + assert_eq!(vlan.drop_eligible_indicator(), true); // Checksum PPI should also be present (always emitted). assert!( ppi.checksum.is_some(), @@ -6321,7 +6321,7 @@ async fn rndis_rx_vlan_packet_with_tcp_checksum(driver: DefaultDriver) { l4_protocol: L4Protocol::Tcp, vlan: Some(net_backend::VlanMetadata { priority: 3, - drop_eligible_indicator: 0, + drop_eligible_indicator: false, vlan_id: 42, }), ..Default::default() @@ -6333,7 +6333,7 @@ async fn rndis_rx_vlan_packet_with_tcp_checksum(driver: DefaultDriver) { let vlan = ppi.vlan.expect("VLAN PPI should be present"); assert_eq!(vlan.vlan_id(), 42); assert_eq!(vlan.priority(), 3); - assert_eq!(vlan.drop_eligible_indicator(), 0); + assert_eq!(vlan.drop_eligible_indicator(), false); // Verify checksum PPI reports TCP checksum succeeded. let csum = ppi.checksum.expect("checksum PPI should be present"); @@ -6407,7 +6407,7 @@ async fn rndis_rx_vlan_preserves_packet_data(driver: DefaultDriver) { len: data.len(), vlan: Some(net_backend::VlanMetadata { priority: 7, - drop_eligible_indicator: 0, + drop_eligible_indicator: false, vlan_id: 4094, }), ..Default::default() From dc7f8642c2991b2ffa0de100cf5a3481c97a14a8 Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Mon, 4 May 2026 10:53:33 -0700 Subject: [PATCH 16/19] Yes, it's transport length. --- vm/devices/net/gdma/src/bnic.rs | 2 +- vm/devices/net/net_backend/src/lib.rs | 8 +++---- vm/devices/net/net_mana/src/lib.rs | 2 +- vm/devices/net/netvsp/src/lib.rs | 34 ++++++++++++++++----------- 4 files changed, 26 insertions(+), 20 deletions(-) diff --git a/vm/devices/net/gdma/src/bnic.rs b/vm/devices/net/gdma/src/bnic.rs index 5c5f346e5e..e7be892207 100644 --- a/vm/devices/net/gdma/src/bnic.rs +++ b/vm/devices/net/gdma/src/bnic.rs @@ -566,7 +566,7 @@ impl TxRxTask { l2_len: 14, l3_len: oob.s_oob.trans_off().clamp(14, 255) - 14, l4_len: 0, - tcp_header_offset: 0, + transport_header_offset: 0, max_segment_size: 0, vlan: oob .l_oob diff --git a/vm/devices/net/net_backend/src/lib.rs b/vm/devices/net/net_backend/src/lib.rs index 33c7997d7d..20857dca2a 100644 --- a/vm/devices/net/net_backend/src/lib.rs +++ b/vm/devices/net/net_backend/src/lib.rs @@ -411,9 +411,9 @@ pub struct TxMetadata { /// The length of the TCP header. Only guaranteed to be set if various /// offload flags are set. pub l4_len: u8, - /// The offset into the buffer where the TCP header begins. Only expected - /// to be set if offload flags are set. - pub tcp_header_offset: u16, + /// The offset into the buffer where the L4 header begins (TCP or UDP). Only + /// expected to be set if offload (checksum and/or segmentation) flags are set. + pub transport_header_offset: u16, /// The maximum segment size, used for segmentation offload (TSO or USO). /// Only guaranteed to be set if [`TxFlags::offload_tcp_segmentation`] or /// [`TxFlags::offload_udp_segmentation`] is set. @@ -466,7 +466,7 @@ impl Default for TxMetadata { l2_len: 0, l3_len: 0, l4_len: 0, - tcp_header_offset: 0, + transport_header_offset: 0, max_segment_size: 0, vlan: None, } diff --git a/vm/devices/net/net_mana/src/lib.rs b/vm/devices/net/net_mana/src/lib.rs index 3e9cb37ef9..70721fff68 100644 --- a/vm/devices/net/net_mana/src/lib.rs +++ b/vm/devices/net/net_mana/src/lib.rs @@ -1170,7 +1170,7 @@ impl ManaQueue { .set_comp_tcp_csum(meta.flags.offload_tcp_checksum()); oob.s_oob .set_comp_udp_csum(meta.flags.offload_udp_checksum()); - if meta.flags.offload_tcp_checksum() { + if meta.flags.offload_tcp_checksum() || meta.flags.offload_udp_checksum() { oob.s_oob.set_trans_off(meta.l2_len as u16 + meta.l3_len); } if let Some(vlan) = &meta.vlan { diff --git a/vm/devices/net/netvsp/src/lib.rs b/vm/devices/net/netvsp/src/lib.rs index 5bda62f22b..131204207c 100644 --- a/vm/devices/net/netvsp/src/lib.rs +++ b/vm/devices/net/netvsp/src/lib.rs @@ -2551,7 +2551,7 @@ impl NetChannel { .set_offload_ip_header_checksum(n.is_ipv4() && n.ip_header_checksum()); metadata.flags.set_is_ipv4(n.is_ipv4()); metadata.flags.set_is_ipv6(n.is_ipv6() && !n.is_ipv4()); - metadata.tcp_header_offset = n.tcp_header_offset(); + metadata.transport_header_offset = n.tcp_header_offset(); } rndisprot::PPI_LSO => { let n: rndisprot::TcpLsoInfo = d.reader(mem).read_plain()?; @@ -2562,7 +2562,7 @@ impl NetChannel { metadata.flags.set_is_ipv4(n.is_ipv4()); metadata.flags.set_is_ipv6(n.is_ipv6() && !n.is_ipv4()); metadata.max_segment_size = n.mss() as u16; - metadata.tcp_header_offset = n.tcp_header_offset(); + metadata.transport_header_offset = n.tcp_header_offset(); } rndisprot::PPI_VLAN => { let n: rndisprot::EthVlanInfo = d.reader(mem).read_plain()?; @@ -2585,33 +2585,34 @@ impl NetChannel { ETHERNET_HEADER_LEN } as u8; - if metadata.flags.offload_tcp_checksum() { - // The offset must be set if a checksum is being captured. - if (metadata.tcp_header_offset < metadata.l2_len as u16) + if metadata.flags.offload_tcp_checksum() || metadata.flags.offload_udp_checksum() { + // The offset must be set if we're handling checksums; we already know from the above logic + // that the L4 checksum-type will match the L4 protocol. + if (metadata.transport_header_offset < metadata.l2_len as u16) || (metadata.flags.is_ipv4() - && metadata.tcp_header_offset < (metadata.l2_len as u16 + 20)) + && metadata.transport_header_offset + < (metadata.l2_len as u16 + IPV4_MIN_HEADER_LEN)) || (metadata.flags.is_ipv6() - && metadata.tcp_header_offset < (metadata.l2_len as u16 + 40)) - || (metadata.tcp_header_offset as u32 >= request.data_length) + && metadata.transport_header_offset + < (metadata.l2_len as u16 + IPV6_MIN_HEADER_LEN)) + || (metadata.transport_header_offset as u32 >= request.data_length) { return Err(WorkerError::InvalidTcpHeaderOffset( - metadata.tcp_header_offset, + metadata.transport_header_offset, )); } - metadata.l3_len = metadata.tcp_header_offset - metadata.l2_len as u16; + metadata.l3_len = metadata.transport_header_offset - metadata.l2_len as u16; } - // no UDP validation currently. - if metadata.flags.offload_tcp_segmentation() { const TCP_DOFF_BYTE_OFFSET: u32 = 12; let tcp_hdr_doff_offset = - u32::from(metadata.tcp_header_offset) + TCP_DOFF_BYTE_OFFSET; + u32::from(metadata.transport_header_offset) + TCP_DOFF_BYTE_OFFSET; // Validate TCP header Data Offset 4 bit nibble within the packet data bounds. if tcp_hdr_doff_offset >= request.data_length { return Err(WorkerError::InvalidTcpHeaderOffset( - metadata.tcp_header_offset, + metadata.transport_header_offset, )); } metadata.l4_len = { @@ -2627,6 +2628,8 @@ impl NetChannel { stats.tx_invalid_lso_packets.increment(); } } + + // TODO: USO support is not present. } let start = segments.len(); @@ -3295,6 +3298,9 @@ const MAX_MTU: u32 = 9216; const ETHERNET_HEADER_LEN: u32 = 14; const ETHERNET_VLAN_HEADER_LEN: u32 = 18; +const IPV4_MIN_HEADER_LEN: u16 = 20; +const IPV6_MIN_HEADER_LEN: u16 = 40; + impl Adapter { fn get_guest_vf_serial_number(&self, vfid: u32) -> u32 { if let Some(guest_os_id) = self.get_guest_os_id.as_ref().map(|f| f()) { From d692c03ba41fd1d4732904763f7d9b76b2bf8107 Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Mon, 4 May 2026 13:00:19 -0700 Subject: [PATCH 17/19] Updates from generated feedback, clippy fixes. --- vm/devices/net/gdma/src/bnic.rs | 9 +++++++-- vm/devices/net/net_backend/src/lib.rs | 8 +++++++- vm/devices/net/net_mana/src/lib.rs | 2 +- vm/devices/net/netvsp/src/lib.rs | 26 ++++++++++---------------- vm/devices/net/netvsp/src/test.rs | 4 ++++ 5 files changed, 29 insertions(+), 20 deletions(-) diff --git a/vm/devices/net/gdma/src/bnic.rs b/vm/devices/net/gdma/src/bnic.rs index e7be892207..c9cf578657 100644 --- a/vm/devices/net/gdma/src/bnic.rs +++ b/vm/devices/net/gdma/src/bnic.rs @@ -553,6 +553,11 @@ impl TxRxTask { let sge0 = sqe.sgl().first().context("no sgl")?; let total_len: usize = sqe.sgl().iter().map(|sge| sge.size as usize).sum(); + let l2_len = if oob.l_oob.inject_vlan_pri_tag() { + net_backend::ETHERNET_VLAN_HEADER_LEN + } else { + net_backend::ETHERNET_HEADER_LEN + } as u16; let mut meta = TxMetadata { id: TxId(0), segment_count: sqe.sgl().len().try_into().unwrap(), @@ -563,8 +568,8 @@ impl TxRxTask { .with_offload_udp_checksum(oob.s_oob.comp_udp_csum()) .with_is_ipv4(oob.s_oob.is_outer_ipv4()) .with_is_ipv6(oob.s_oob.is_outer_ipv6() && !oob.s_oob.is_outer_ipv4()), - l2_len: 14, - l3_len: oob.s_oob.trans_off().clamp(14, 255) - 14, + l2_len: l2_len as u8, + l3_len: oob.s_oob.trans_off().clamp(l2_len, 255) - l2_len, l4_len: 0, transport_header_offset: 0, max_segment_size: 0, diff --git a/vm/devices/net/net_backend/src/lib.rs b/vm/devices/net/net_backend/src/lib.rs index 20857dca2a..e2466ae924 100644 --- a/vm/devices/net/net_backend/src/lib.rs +++ b/vm/devices/net/net_backend/src/lib.rs @@ -281,6 +281,12 @@ pub trait BufferAccess { } } +pub const ETHERNET_HEADER_LEN: u32 = 14; +pub const ETHERNET_VLAN_HEADER_LEN: u32 = 18; + +pub const IPV4_MIN_HEADER_LEN: u16 = 20; +pub const IPV6_MIN_HEADER_LEN: u16 = 40; + #[derive(Debug, Copy, Clone)] pub struct VlanMetadata { /// Priority for 802.1Q. Actually a 3-bit value. @@ -419,7 +425,7 @@ pub struct TxMetadata { /// [`TxFlags::offload_udp_segmentation`] is set. pub max_segment_size: u16, /// Information about 802.1Q VLAN tagging. When a vlan is in use, this structure - /// is populated. Only applies when traffic is being received over an L2 connection, + /// is populated. Only applies when traffic is being sent over an L2 connection, /// so L3-only or above traffic will not use this option. pub vlan: Option, } diff --git a/vm/devices/net/net_mana/src/lib.rs b/vm/devices/net/net_mana/src/lib.rs index 70721fff68..41ff233959 100644 --- a/vm/devices/net/net_mana/src/lib.rs +++ b/vm/devices/net/net_mana/src/lib.rs @@ -1179,7 +1179,7 @@ impl ManaQueue { oob.l_oob.set_pcp(vlan.priority); oob.l_oob.set_dei(vlan.drop_eligible_indicator); } - let short_format = self.vp_offset <= 0xff && !meta.vlan.is_some(); + let short_format = self.vp_offset <= 0xff && meta.vlan.is_none(); if short_format { oob.s_oob.set_pkt_fmt(MANA_SHORT_PKT_FMT); oob.s_oob.set_short_vp_offset(self.vp_offset as u8); diff --git a/vm/devices/net/netvsp/src/lib.rs b/vm/devices/net/netvsp/src/lib.rs index 131204207c..5d900d9047 100644 --- a/vm/devices/net/netvsp/src/lib.rs +++ b/vm/devices/net/netvsp/src/lib.rs @@ -2572,7 +2572,7 @@ impl NetChannel { drop_eligible_indicator: n.drop_eligible_indicator(), vlan_id: n.vlan_id(), }); - stats.rx_vlan_packets.increment(); + stats.tx_vlan_packets.increment(); } _ => {} } @@ -2580,9 +2580,9 @@ impl NetChannel { } metadata.l2_len = if metadata.vlan.is_some() { - ETHERNET_VLAN_HEADER_LEN + net_backend::ETHERNET_VLAN_HEADER_LEN } else { - ETHERNET_HEADER_LEN + net_backend::ETHERNET_HEADER_LEN } as u8; if metadata.flags.offload_tcp_checksum() || metadata.flags.offload_udp_checksum() { @@ -2591,10 +2591,10 @@ impl NetChannel { if (metadata.transport_header_offset < metadata.l2_len as u16) || (metadata.flags.is_ipv4() && metadata.transport_header_offset - < (metadata.l2_len as u16 + IPV4_MIN_HEADER_LEN)) + < (metadata.l2_len as u16 + net_backend::IPV4_MIN_HEADER_LEN)) || (metadata.flags.is_ipv6() && metadata.transport_header_offset - < (metadata.l2_len as u16 + IPV6_MIN_HEADER_LEN)) + < (metadata.l2_len as u16 + net_backend::IPV6_MIN_HEADER_LEN)) || (metadata.transport_header_offset as u32 >= request.data_length) { return Err(WorkerError::InvalidTcpHeaderOffset( @@ -3295,12 +3295,6 @@ const DEFAULT_MTU: u32 = 1514; const MIN_MTU: u32 = DEFAULT_MTU; const MAX_MTU: u32 = 9216; -const ETHERNET_HEADER_LEN: u32 = 14; -const ETHERNET_VLAN_HEADER_LEN: u32 = 18; - -const IPV4_MIN_HEADER_LEN: u16 = 20; -const IPV6_MIN_HEADER_LEN: u16 = 40; - impl Adapter { fn get_guest_vf_serial_number(&self, vfid: u32) -> u32 { if let Some(guest_os_id) = self.get_guest_os_id.as_ref().map(|f| f()) { @@ -3420,7 +3414,7 @@ impl Adapter { rndisprot::Oid::OID_GEN_MAXIMUM_LOOKAHEAD | rndisprot::Oid::OID_GEN_CURRENT_LOOKAHEAD | rndisprot::Oid::OID_GEN_MAXIMUM_FRAME_SIZE => { - let len: u32 = buffers.ndis_config.mtu - ETHERNET_HEADER_LEN; + let len: u32 = buffers.ndis_config.mtu - net_backend::ETHERNET_HEADER_LEN; writer.write(len.as_bytes())?; } rndisprot::Oid::OID_GEN_MAXIMUM_TOTAL_SIZE @@ -3519,10 +3513,10 @@ impl Adapter { }, ipv4_enabled: rndisprot::NDIS_OFFLOAD_SUPPORTED, ipv4_encapsulation_type: rndisprot::NDIS_ENCAPSULATION_IEEE_802_3, - ipv4_header_size: ETHERNET_HEADER_LEN, + ipv4_header_size: net_backend::ETHERNET_HEADER_LEN, ipv6_enabled: rndisprot::NDIS_OFFLOAD_SUPPORTED, ipv6_encapsulation_type: rndisprot::NDIS_ENCAPSULATION_IEEE_802_3, - ipv6_header_size: ETHERNET_HEADER_LEN, + ipv6_header_size: net_backend::ETHERNET_HEADER_LEN, } .as_bytes()[..rndisprot::NDIS_SIZEOF_OFFLOAD_ENCAPSULATION_REVISION_1], )?; @@ -3752,13 +3746,13 @@ impl Adapter { )?; if encap.ipv4_enabled == rndisprot::NDIS_OFFLOAD_SET_ON && (encap.ipv4_encapsulation_type != rndisprot::NDIS_ENCAPSULATION_IEEE_802_3 - || encap.ipv4_header_size != ETHERNET_HEADER_LEN) + || encap.ipv4_header_size != net_backend::ETHERNET_HEADER_LEN) { return Err(OidError::NotSupported("ipv4 encap")); } if encap.ipv6_enabled == rndisprot::NDIS_OFFLOAD_SET_ON && (encap.ipv6_encapsulation_type != rndisprot::NDIS_ENCAPSULATION_IEEE_802_3 - || encap.ipv6_header_size != ETHERNET_HEADER_LEN) + || encap.ipv6_header_size != net_backend::ETHERNET_HEADER_LEN) { return Err(OidError::NotSupported("ipv6 encap")); } diff --git a/vm/devices/net/netvsp/src/test.rs b/vm/devices/net/netvsp/src/test.rs index b6088f80e9..2a2cf7e4d2 100644 --- a/vm/devices/net/netvsp/src/test.rs +++ b/vm/devices/net/netvsp/src/test.rs @@ -1885,6 +1885,10 @@ impl RndisMessageParser { ); let payload_start = offset + header.per_packet_information_offset as usize; + assert!( + payload_start + 4 <= offset + header.size as usize, + "PPI offset results in invalid reads" + ); match header.typ { rndisprot::PPI_TCP_IP_CHECKSUM => { let value = u32::read_from_prefix(&ppi_bytes[payload_start..]) From 2d504c13a0253cafcbbf171ed6fc5ac3f28152ea Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Mon, 4 May 2026 13:14:10 -0700 Subject: [PATCH 18/19] Missed a test break in tap --- vm/devices/net/net_tap/tests/tap_tests.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vm/devices/net/net_tap/tests/tap_tests.rs b/vm/devices/net/net_tap/tests/tap_tests.rs index 7f275f0281..424b39c88d 100644 --- a/vm/devices/net/net_tap/tests/tap_tests.rs +++ b/vm/devices/net/net_tap/tests/tap_tests.rs @@ -572,7 +572,9 @@ mod tap_tests { l2_len: 14, l3_len: 20, l4_len: 20, + transport_header_offset: 34, max_segment_size: 1460, + vlan: None, }), gpa: 0, len: frame_len, From 86502affc67a6206a950e68b17f0e5bf70d40262 Mon Sep 17 00:00:00 2001 From: Ben Lewis Date: Mon, 4 May 2026 14:36:12 -0700 Subject: [PATCH 19/19] Minor edits. --- vm/devices/net/gdma/src/bnic.rs | 8 ++++---- vm/devices/net/net_backend/src/lib.rs | 4 +++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/vm/devices/net/gdma/src/bnic.rs b/vm/devices/net/gdma/src/bnic.rs index c9cf578657..1988a8cbfc 100644 --- a/vm/devices/net/gdma/src/bnic.rs +++ b/vm/devices/net/gdma/src/bnic.rs @@ -557,7 +557,7 @@ impl TxRxTask { net_backend::ETHERNET_VLAN_HEADER_LEN } else { net_backend::ETHERNET_HEADER_LEN - } as u16; + }; let mut meta = TxMetadata { id: TxId(0), segment_count: sqe.sgl().len().try_into().unwrap(), @@ -569,7 +569,7 @@ impl TxRxTask { .with_is_ipv4(oob.s_oob.is_outer_ipv4()) .with_is_ipv6(oob.s_oob.is_outer_ipv6() && !oob.s_oob.is_outer_ipv4()), l2_len: l2_len as u8, - l3_len: oob.s_oob.trans_off().clamp(l2_len, 255) - l2_len, + l3_len: oob.s_oob.trans_off().clamp(l2_len as u16, 255) - l2_len as u16, l4_len: 0, transport_header_offset: 0, max_segment_size: 0, @@ -577,8 +577,8 @@ impl TxRxTask { .l_oob .inject_vlan_pri_tag() .then(|| net_backend::VlanMetadata { - priority: 0, - drop_eligible_indicator: false, + priority: oob.l_oob.pcp(), + drop_eligible_indicator: oob.l_oob.dei(), vlan_id: oob.l_oob.vlan_id(), }), }; diff --git a/vm/devices/net/net_backend/src/lib.rs b/vm/devices/net/net_backend/src/lib.rs index e2466ae924..e096dca6ea 100644 --- a/vm/devices/net/net_backend/src/lib.rs +++ b/vm/devices/net/net_backend/src/lib.rs @@ -291,7 +291,9 @@ pub const IPV6_MIN_HEADER_LEN: u16 = 40; pub struct VlanMetadata { /// Priority for 802.1Q. Actually a 3-bit value. pub priority: u8, - /// In pretty much every circumstance this is false. + /// In pretty much every circumstance this is false. When + /// it is used, setting DEI will inform switches/routing infra + /// that this can be dropped before higher priority traffic. pub drop_eligible_indicator: bool, /// The 802.1Q ID for this transmission. Actually a 12-bit value. pub vlan_id: u16,