From 7bfdedf1303f23f3b9c83f0895a19f257e2a3fb9 Mon Sep 17 00:00:00 2001 From: Gerard Date: Wed, 1 Jul 2026 19:58:31 -0400 Subject: [PATCH] fix(net): DpdkBackend::send_frame dropped EVERY frame (data_mut before set_data_len) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause of the DUT establishing ZERO TCP connections on real hardware (perf run 28552788420: TRex opened 150k flows / sent 21M packets, Active-flows=0). DpdkBackend::send_frame called mbuf.data_mut() on a freshly-allocated mbuf — whose data_len is 0 — so the returned slice was zero-length, the 'data.len() < frame.len()' capacity check was ALWAYS true, and it returned 'Frame too large: mbuf capacity: 0' before ever reaching tx_burst. Every outbound frame (SYN-ACK, ACK, data, RST, retransmit) for both TCP and QUIC real-NIC was silently dropped. Reorder to compute capacity from buf_len-data_offset and set_data_len() BEFORE data_mut(), matching the proven UDP TX path (dpdk-udp/src/lib.rs:1498-1523). - Tighten test_dpdk_backend_send_frame: it tolerated the capacity error ('is_err() || ...'), masking the bug. Now asserts the frame reaches tx_burst (Ok, or WouldBlock under stubs) and never fails the capacity check. - runtime.rs: the engine driver discarded send_frame errors ('let _ ='), so the silent drop was invisible and cost a full EC2 run to find. Route all 3 TX sites through send_or_warn(), which logs the first non-transient TX error. Verified locally (cargo build + test, no EC2). Found via a code-level investigation of the RX->SYN->SYN-ACK path. Co-Authored-By: Claude Opus 4.8 (1M context) --- dpdk-stdlib-net/src/backend_dpdk.rs | 33 ++++++++++++++++++++--------- dpdk-stdlib-tcp/src/runtime.rs | 21 +++++++++++++++--- 2 files changed, 41 insertions(+), 13 deletions(-) diff --git a/dpdk-stdlib-net/src/backend_dpdk.rs b/dpdk-stdlib-net/src/backend_dpdk.rs index d448398..f679ce5 100644 --- a/dpdk-stdlib-net/src/backend_dpdk.rs +++ b/dpdk-stdlib-net/src/backend_dpdk.rs @@ -169,21 +169,26 @@ impl PacketBackend for DpdkBackend { let mut mbuf = self.mempool.alloc() .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("mbuf alloc failed: {}", e)))?; - // Copy frame data into mbuf - let data = mbuf.data_mut() - .ok_or_else(|| io::Error::new(io::ErrorKind::Other, "Failed to get mbuf data"))?; - - if data.len() < frame.len() { + // A freshly-allocated mbuf has data_len == 0, so data_mut() would return + // a zero-length slice. Compute capacity from the buffer and set the data + // length BEFORE data_mut() so it returns a correctly-sized slice. Getting + // this order wrong makes the capacity check read 0 and silently drops + // EVERY frame — this matches the proven UDP TX path in dpdk-udp. + let capacity = mbuf.buf_len() as usize - mbuf.data_offset() as usize; + if capacity < frame.len() { return Err(io::Error::new( io::ErrorKind::InvalidInput, - format!("Frame too large: {} bytes, mbuf capacity: {}", frame.len(), data.len()), + format!("Frame too large: {} bytes, mbuf capacity: {}", frame.len(), capacity), )); } - data[..frame.len()].copy_from_slice(frame); mbuf.set_data_len(frame.len() as u16); mbuf.set_packet_len(frame.len() as u32); + let data = mbuf.data_mut() + .ok_or_else(|| io::Error::new(io::ErrorKind::Other, "Failed to get mbuf data"))?; + data[..frame.len()].copy_from_slice(frame); + // Transmit let port = self.port.lock().unwrap(); let mut packets = vec![mbuf]; @@ -298,10 +303,18 @@ mod tests { // EtherType (IPv4) frame[12..14].copy_from_slice(&[0x08, 0x00]); - // With stubs, tx_burst returns 0, so this will get WouldBlock + // The frame must pass the capacity check and reach tx_burst. With stubs + // tx_burst returns 0 → WouldBlock; on real hardware it returns Ok(len). + // It must NEVER fail the capacity check — that was the regression that + // silently dropped every TX frame (data_mut() before set_data_len()). let result = backend.send_frame(&frame); - // Stubs return 0 for tx_burst, so we expect WouldBlock - assert!(result.is_err() || result.unwrap() == 64); + if let Err(e) = &result { + assert_eq!( + e.kind(), + std::io::ErrorKind::WouldBlock, + "send_frame must reach tx_burst, not fail the capacity check: {e}" + ); + } } #[test] diff --git a/dpdk-stdlib-tcp/src/runtime.rs b/dpdk-stdlib-tcp/src/runtime.rs index 17f5072..98b1650 100644 --- a/dpdk-stdlib-tcp/src/runtime.rs +++ b/dpdk-stdlib-tcp/src/runtime.rs @@ -121,6 +121,21 @@ pub fn init_dpdk_tcp_context(cfg: DpdkTcpRuntimeConfig) -> io::Result<()> { /// /// Split out from [`run_engine_driver`] so it can be unit-tested with a mock /// backend. +/// Send a frame, warning ONCE if the backend rejects it for a non-transient +/// reason. A persistent TX failure would otherwise be silent — silently +/// dropping every SYN-ACK once cost a full EC2 perf run to diagnose. Transient +/// WouldBlock (tx ring full) is normal backpressure and ignored. +fn send_or_warn(backend: &Arc, out: &[u8]) { + if let Err(e) = backend.send_frame(out) { + if e.kind() != std::io::ErrorKind::WouldBlock { + static TX_ERR_ONCE: std::sync::Once = std::sync::Once::new(); + TX_ERR_ONCE.call_once(|| { + eprintln!("tcp-engine: backend.send_frame failed — frames are being dropped: {e}"); + }); + } + } +} + fn drive_once( backend: &Arc, engine: &mut TcpEngine, @@ -140,7 +155,7 @@ fn drive_once( src_mac.copy_from_slice(&frame[6..12]); if let Ok(seg) = parse_tcp_packet(frame) { for out in engine.on_segment_with_macs(&seg, src_mac, dst_mac) { - let _ = backend.send_frame(&out); + send_or_warn(backend, &out); } } } @@ -149,14 +164,14 @@ fn drive_once( // Commands (Connect/Listen/Accept/Shutdown/SetOption/Close). while let Ok(cmd) = cmd_rx.try_recv() { for out in engine.on_command(cmd) { - let _ = backend.send_frame(&out); + send_or_warn(backend, &out); } } // Timers + tx-ring drain → segments. let now = engine.clock().now(); for out in engine.on_tick(now) { - let _ = backend.send_frame(&out); + send_or_warn(backend, &out); } }