diff --git a/vm/devices/net/net_backend/src/lib.rs b/vm/devices/net/net_backend/src/lib.rs
index fb838beb61..8e04d2e8ba 100644
--- a/vm/devices/net/net_backend/src/lib.rs
+++ b/vm/devices/net/net_backend/src/lib.rs
@@ -308,6 +308,17 @@ pub struct RxMetadata {
     pub l4_checksum: RxChecksumState,
     /// The L4 protocol.
     pub l4_protocol: L4Protocol,
+    /// The L3 protocol (IPv4/IPv6). Used for GSO/LRO metadata.
+    pub l3_protocol: L3Protocol,
+    /// L2 (Ethernet) header length in bytes (e.g. 14, or 18 with VLAN).
+    pub l2_len: u8,
+    /// L3 (IP) header length in bytes.
+    pub l3_len: u16,
+    /// L4 (TCP/UDP) header length in bytes.
+    pub l4_len: u8,
+    /// If non-zero, this is a GSO/LRO packet and this value is the MSS
+    /// (maximum segment size) that should be advertised to the guest.
+    pub gso_size: u16,
 }
 
 impl Default for RxMetadata {
@@ -318,6 +329,11 @@ impl Default for RxMetadata {
             ip_checksum: RxChecksumState::Unknown,
             l4_checksum: RxChecksumState::Unknown,
             l4_protocol: L4Protocol::Unknown,
+            l3_protocol: L3Protocol::Unknown,
+            l2_len: 0,
+            l3_len: 0,
+            l4_len: 0,
+            gso_size: 0,
         }
     }
 }
diff --git a/vm/devices/net/net_consomme/consomme/src/tcp.rs b/vm/devices/net/net_consomme/consomme/src/tcp.rs
index 51fd341770..a007f5fa2a 100644
--- a/vm/devices/net/net_consomme/consomme/src/tcp.rs
+++ b/vm/devices/net/net_consomme/consomme/src/tcp.rs
@@ -21,6 +21,7 @@ use pal_async::driver::Driver;
 use pal_async::interest::PollEvents;
 use pal_async::socket::PollReady;
 use pal_async::socket::PolledSocket;
+use smoltcp::phy::Checksum;
 use smoltcp::phy::ChecksumCapabilities;
 use smoltcp::wire::ETHERNET_HEADER_LEN;
 use smoltcp::wire::EthernetFrame;
@@ -471,7 +472,19 @@ struct Sender<'a, T> {
 }
 
 impl<T: Client> Sender<'_, T> {
-    fn send_packet(&mut self, tcp: &TcpRepr<'_>, payload: Option<ring::View<'_>>) {
+    /// Assemble and deliver a TCP packet to the client.
+    ///
+    /// When `tso_mss` is `Some(mss)`, the payload is larger than a single
+    /// segment and the packet is delivered with [`ChecksumState::tso`] set so
+    /// that the downstream virtio-net device can present it to the guest as an
+    /// LRO/GSO packet. In this mode the TCP checksum is left as a
+    /// pseudo-header partial checksum (the guest completes it per-segment).
+    fn send_packet(
+        &mut self,
+        tcp: &TcpRepr<'_>,
+        payload: Option<ring::View<'_>>,
+        tso_mss: Option<u16>,
+    ) {
         let buffer = &mut self.state.buffer;
         let mut eth_packet = EthernetFrame::new_unchecked(&mut buffer[..]);
         eth_packet.set_dst_addr(self.state.params.client_mac);
@@ -511,22 +524,36 @@ impl<T: Client> Sender<'_, T> {
         let dst_ip_addr: IpAddress = self.ft.dst.ip().into();
         let src_ip_addr: IpAddress = self.ft.src.ip().into();
         let mut tcp_packet = TcpPacket::new_unchecked(tcp_payload_buf);
-        tcp.emit(
-            &mut tcp_packet,
-            &dst_ip_addr,
-            &src_ip_addr,
-            &ChecksumCapabilities::default(),
-        );
-
+        // Skip the TCP checksum during emit--fill_checksum below recomputes
+        // it after the payload has been copied in.
+        let mut caps = ChecksumCapabilities::default();
+        caps.tcp = Checksum::None;
+        tcp.emit(&mut tcp_packet, &dst_ip_addr, &src_ip_addr, &caps);
         // Copy payload into TCP packet
         if let Some(payload) = &payload {
             payload.copy_to_slice(tcp_packet.payload_mut());
         }
-        tcp_packet.fill_checksum(&self.ft.dst.ip().into(), &self.ft.src.ip().into());
+
+        if tso_mss.is_none() {
+            // Normal single-segment packet: compute the full checksum.
+            tcp_packet.fill_checksum(&self.ft.dst.ip().into(), &self.ft.src.ip().into());
+        }
+        // For TSO packets the checksum field is left as emitted by
+        // smoltcp (zero / pseudo-header partial). The guest driver
+        // will compute per-segment checksums via NEEDS_CSUM.
+
         let n = ETHERNET_HEADER_LEN + ip_total_len;
-        let checksum_state = match self.ft.dst {
-            SocketAddr::V4(_) => ChecksumState::TCP4,
-            SocketAddr::V6(_) => ChecksumState::TCP6,
+        let checksum_state = match (self.ft.dst, tso_mss) {
+            (SocketAddr::V4(_), Some(mss)) => ChecksumState {
+                tso: Some(mss),
+                ..ChecksumState::TCP4
+            },
+            (SocketAddr::V6(_), Some(mss)) => ChecksumState {
+                tso: Some(mss),
+                ..ChecksumState::TCP6
+            },
+            (SocketAddr::V4(_), None) => ChecksumState::TCP4,
+            (SocketAddr::V6(_), None) => ChecksumState::TCP6,
         };
 
         self.client.recv(&buffer[..n], &checksum_state);
@@ -550,7 +577,7 @@ impl<T: Client> Sender<'_, T> {
 
         trace_tcp_packet(&tcp, 0, "rst xmit");
 
-        self.send_packet(&tcp, None);
+        self.send_packet(&tcp, None, None);
     }
 }
 
@@ -984,7 +1011,7 @@ impl TcpConnectionInner {
             payload: &[],
         };
 
-        sender.send_packet(&tcp, None);
+        sender.send_packet(&tcp, None, None);
         self.tx_send += 1;
     }
 
@@ -1023,7 +1050,9 @@ impl TcpConnectionInner {
             // exceeding:
             // 1. The available buffer length.
             // 2. The current window.
-            // 3. The configured maximum segment size.
+            // 3. The configured maximum segment size (only when the client
+            //    buffer is not large enough for LRO — when it is, we emit one
+            //    large frame and let the guest segment it).
             // 4. The client MTU.
             let tx_segment_end = {
                 let ip_header_len = match sender.ft.dst {
@@ -1032,11 +1061,21 @@ impl TcpConnectionInner {
                 };
                 let header_len = ETHERNET_HEADER_LEN + ip_header_len + tcp.header_len();
                 let mtu = rx_mtu.min(sender.state.buffer.len());
+                let max_payload = mtu - header_len;
+                // When the client buffer can hold more than one MSS of
+                // payload, skip the MSS cap and fill the whole buffer —
+                // the packet will be delivered as an LRO/TSO frame.
+                // Otherwise, apply the MSS limit for normal segmentation.
+                let mss_limit = if max_payload > self.tx_mss {
+                    tx_next + max_payload
+                } else {
+                    tx_next + self.tx_mss
+                };
                 seq_min([
                     tx_payload_end,
                     tx_window_end,
-                    tx_next + self.tx_mss,
-                    tx_next + (mtu - header_len),
+                    mss_limit,
+                    tx_next + max_payload,
                 ])
             };
 
@@ -1067,7 +1106,15 @@ impl TcpConnectionInner {
                 .tx_buffer
                 .view(payload_start..payload_start + payload_len);
 
-            sender.send_packet(&tcp, Some(payload));
+            // When the payload exceeds a single MSS, deliver the frame as a
+            // TSO/LRO packet so the guest can re-segment it.
+            let tso_mss = if payload_len > self.tx_mss {
+                Some(self.tx_mss.min(u16::MAX as usize) as u16)
+            } else {
+                None
+            };
+
+            sender.send_packet(&tcp, Some(payload), tso_mss);
             self.tx_send = tx_next;
             self.needs_ack = false;
         }
@@ -1118,7 +1165,7 @@ impl TcpConnectionInner {
 
         trace_tcp_packet(&tcp, 0, "ack");
 
-        sender.send_packet(&tcp, None);
+        sender.send_packet(&tcp, None, None);
     }
 
     fn handle_listen_syn(
diff --git a/vm/devices/net/net_consomme/src/lib.rs b/vm/devices/net/net_consomme/src/lib.rs
index 2dc2294246..8e2d115b9d 100644
--- a/vm/devices/net/net_consomme/src/lib.rs
+++ b/vm/devices/net/net_consomme/src/lib.rs
@@ -17,6 +17,7 @@ use mesh::rpc::Rpc;
 use mesh::rpc::RpcError;
 use mesh::rpc::RpcSend;
 use net_backend::BufferAccess;
+use net_backend::L3Protocol;
 use net_backend::L4Protocol;
 use net_backend::QueueConfig;
 use net_backend::RssConfig;
@@ -596,28 +597,49 @@ impl consomme::Client for Client<'_> {
         };
         let max = self.pool.capacity(rx_id) as usize;
         if data.len() <= max {
+            let l4_protocol = if checksum.tcp {
+                L4Protocol::Tcp
+            } else if checksum.udp {
+                L4Protocol::Udp
+            } else {
+                L4Protocol::Unknown
+            };
+
+            // Determine L3 protocol and header lengths for GSO metadata.
+            // Parse the Ethernet header to find IP version, then derive
+            // l2_len and l3_len from the packet.
+            let (l3_protocol, l2_len, l3_len, l4_len) = parse_rx_header_lengths(data, checksum);
+
+            let gso_size = checksum.tso.unwrap_or(0);
+
             self.pool.write_packet(
                 rx_id,
                 &RxMetadata {
                     offset: 0,
                     len: data.len(),
-                    ip_checksum: if checksum.ipv4 {
+                    ip_checksum: if checksum.tso.is_some() {
+                        // TSO packets have partial/coalesced checksums;
+                        // the guest must recompute per-segment checksums
+                        // via NEEDS_CSUM.
+                        RxChecksumState::Unknown
+                    } else if checksum.ipv4 {
                         RxChecksumState::Good
                     } else {
                         RxChecksumState::Unknown
                     },
-                    l4_checksum: if checksum.tcp || checksum.udp {
+                    l4_checksum: if checksum.tso.is_some() {
+                        RxChecksumState::Unknown
+                    } else if checksum.tcp || checksum.udp {
                         RxChecksumState::Good
                     } else {
                         RxChecksumState::Unknown
                     },
-                    l4_protocol: if checksum.tcp {
-                        L4Protocol::Tcp
-                    } else if checksum.udp {
-                        L4Protocol::Udp
-                    } else {
-                        L4Protocol::Unknown
-                    },
+                    l4_protocol,
+                    l3_protocol,
+                    gso_size,
+                    l2_len,
+                    l3_len,
+                    l4_len,
                 },
                 data,
             );
@@ -636,3 +658,50 @@ impl consomme::Client for Client<'_> {
         }
     }
 }
+
+/// Parse an Ethernet frame to extract L3 protocol, l2_len, l3_len, and l4_len.
+///
+/// Used to populate `RxMetadata` GSO fields on the receive path so that
+/// the virtio-net device can construct proper virtio headers for LRO packets.
+fn parse_rx_header_lengths(data: &[u8], checksum: &ChecksumState) -> (L3Protocol, u8, u16, u8) {
+    const ETHERTYPE_IPV4: u16 = 0x0800;
+    const ETHERTYPE_IPV6: u16 = 0x86DD;
+
+    if data.len() < 14 {
+        return (L3Protocol::Unknown, 0, 0, 0);
+    }
+
+    let ethertype = u16::from_be_bytes([data[12], data[13]]);
+    let l2_len: u8 = 14;
+
+    match ethertype {
+        ETHERTYPE_IPV4 if checksum.ipv4 && data.len() >= l2_len as usize + 20 => {
+            let ihl = (data[l2_len as usize] & 0x0f) as u16 * 4;
+            let l3_len = ihl.max(20);
+            let l4_start = l2_len as usize + l3_len as usize;
+            // Derive TCP header length from data offset field if TCP
+            let l4_len = if checksum.tcp && data.len() >= l4_start + 20 {
+                let data_offset = (data[l4_start + 12] >> 4) * 4;
+                data_offset.max(20)
+            } else {
+                0
+            };
+            (L3Protocol::Ipv4, l2_len, l3_len, l4_len)
+        }
+        ETHERTYPE_IPV6 if data.len() >= l2_len as usize + 40 => {
+            // Base IPv6 header only. Extension headers are not parsed, but
+            // this is safe because consomme never generates IPv6 extension
+            // headers on the receive path.
+            let l3_len: u16 = 40;
+            let l4_start = l2_len as usize + l3_len as usize;
+            let l4_len = if checksum.tcp && data.len() >= l4_start + 20 {
+                let data_offset = (data[l4_start + 12] >> 4) * 4;
+                data_offset.max(20)
+            } else {
+                0
+            };
+            (L3Protocol::Ipv6, l2_len, l3_len, l4_len)
+        }
+        _ => (L3Protocol::Unknown, 0, 0, 0),
+    }
+}
diff --git a/vm/devices/net/net_mana/src/lib.rs b/vm/devices/net/net_mana/src/lib.rs
index 4483ea19c7..7e38a070e8 100644
--- a/vm/devices/net/net_mana/src/lib.rs
+++ b/vm/devices/net/net_mana/src/lib.rs
@@ -44,6 +44,7 @@ use net_backend::BackendQueueStats;
 use net_backend::BufferAccess;
 use net_backend::Endpoint;
 use net_backend::EndpointAction;
+use net_backend::L3Protocol;
 use net_backend::L4Protocol;
 use net_backend::MultiQueueSupport;
 use net_backend::Queue;
@@ -974,6 +975,11 @@ impl<T: DeviceBacking + Send> Queue for ManaQueue<T> {
                                 ip_checksum,
                                 l4_checksum,
                                 l4_protocol,
+                                l3_protocol: L3Protocol::Unknown,
+                                l2_len: 0,
+                                l3_len: 0,
+                                l4_len: 0,
+                                gso_size: 0,
                             },
                         );
                         if rx.bounced_len_with_padding > 0 {
diff --git a/vm/devices/net/net_tap/src/lib.rs b/vm/devices/net/net_tap/src/lib.rs
index da9bcb69c6..8012532d5c 100644
--- a/vm/devices/net/net_tap/src/lib.rs
+++ b/vm/devices/net/net_tap/src/lib.rs
@@ -14,6 +14,7 @@ use futures::io::AsyncRead;
 use inspect::InspectMut;
 use net_backend::BufferAccess;
 use net_backend::Endpoint;
+use net_backend::L3Protocol;
 use net_backend::L4Protocol;
 use net_backend::Queue;
 use net_backend::QueueConfig;
@@ -116,25 +117,20 @@ pub struct TapEndpoint {
 
 impl TapEndpoint {
     pub fn new(tap: tap::Tap) -> Result<Self, tap::Error> {
-        // Do not enable any RX offloads (TUN_F_CSUM, TUN_F_TSO*, etc.).
+        // Enable RX offloads so the kernel can deliver large coalesced
+        // (GRO/LRO) TCP packets instead of segmenting them. This reduces
+        // per-packet overhead and improves throughput when the guest has
+        // negotiated VIRTIO_NET_F_GUEST_TSO4/6.
         //
-        // The TUN_F_* flags are the TAP equivalent of VIRTIO_NET_F_GUEST_*:
-        // they tell the kernel that our reader can handle partial checksums
-        // (NEEDS_CSUM) and unsegmented GSO packets. Since net_backend's
-        // RxMetadata has no way to represent "checksum needs to be completed"
-        // (only Good/Bad/Unknown), and no concept of receive-side GRO/RSC,
-        // accepting such packets would force us to either lie about checksum
-        // state or complete checksums in software.
+        // TUN_F_CSUM (1) — we can handle NEEDS_CSUM (partial checksum)
+        // TUN_F_TSO4 (2) — we can handle TSOv4 (large IPv4/TCP packets)
+        // TUN_F_TSO6 (4) — we can handle TSOv6 (large IPv6/TCP packets)
         //
-        // With offloads set to 0, the kernel completes all checksums and
-        // segments all GSO packets before delivering them to us. This is
-        // correct and simple. The TX path is unaffected — writes with
-        // NEEDS_CSUM and GSO types in the vnet header are processed by the
-        // kernel regardless of these flags.
-        //
-        // We explicitly set 0 rather than skipping the call, in case a
-        // previous user of this TAP fd set offloads to a non-zero value.
-        tap.set_offloads(0)?;
+        // TUN_F_CSUM is required for TUN_F_TSO4/6.
+        const TUN_F_CSUM: u32 = 1;
+        const TUN_F_TSO4: u32 = 2;
+        const TUN_F_TSO6: u32 = 4;
+        tap.set_offloads(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6)?;
 
         Ok(Self {
             tap: Arc::new(Mutex::new(Some(tap))),
@@ -255,9 +251,23 @@ impl Queue for TapQueue {
                     }
                     let (hdr, _) =
                         VirtioNetHdr::read_from_prefix(&self.buffer[..read_len]).unwrap();
-                    let rx_meta = parse_vnet_hdr(&hdr);
                     let frame_start = size_of::<VirtioNetHdr>();
                     let frame_len = read_len - size_of::<VirtioNetHdr>();
+                    let frame = &self.buffer[frame_start..read_len];
+                    let rx_meta = parse_vnet_hdr(&hdr, frame);
+
+                    // With TUN_F_TSO4/6 the kernel may deliver GRO-coalesced
+                    // frames larger than the guest RX buffer. Drop them
+                    // gracefully instead of panicking in write_packet.
+                    if frame_len > pool.capacity(rx) as usize {
+                        tracing::warn!(
+                            frame_len,
+                            capacity = pool.capacity(rx),
+                            "dropping rx packet: frame exceeds buffer capacity"
+                        );
+                        continue;
+                    }
+
                     pool.write_packet(
                         rx,
                         &RxMetadata {
@@ -491,35 +501,110 @@ fn build_vnet_hdr(meta: &TxMetadata) -> VirtioNetHdr {
     }
 }
 
+/// Parse the EtherType from the start of an Ethernet frame.
+///
+/// Returns `(l2_len, is_ipv4, is_ipv6)`. Handles 802.1Q VLAN tags.
+fn parse_ethertype(frame: &[u8]) -> (u8, bool, bool) {
+    const ETHERTYPE_IPV4: u16 = 0x0800;
+    const ETHERTYPE_IPV6: u16 = 0x86DD;
+    const ETHERTYPE_VLAN: u16 = 0x8100;
+
+    if frame.len() < 14 {
+        return (0, false, false);
+    }
+
+    let ethertype = u16::from_be_bytes([frame[12], frame[13]]);
+    if ethertype == ETHERTYPE_VLAN {
+        // VLAN-tagged: real EtherType is 4 bytes further.
+        if frame.len() < 18 {
+            return (0, false, false);
+        }
+        let inner = u16::from_be_bytes([frame[16], frame[17]]);
+        (18, inner == ETHERTYPE_IPV4, inner == ETHERTYPE_IPV6)
+    } else {
+        (14, ethertype == ETHERTYPE_IPV4, ethertype == ETHERTYPE_IPV6)
+    }
+}
+
 /// Parse a `VirtioNetHdr` from the TAP device into receive metadata.
 ///
-/// Because we do not set any `TUN_F_*` RX offload flags (see
-/// [`TapEndpoint::new`]), the kernel will never send us `NEEDS_CSUM` or GSO
-/// packets. We only need to handle `DATA_VALID` (checksum verified by the
-/// kernel) and the default case (no information).
+/// With `TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6` enabled, the kernel may
+/// deliver large coalesced packets with `NEEDS_CSUM` set and a non-NONE
+/// `gso_type`. We translate these into `RxMetadata` GSO fields so the
+/// virtio-net device can pass them to the guest as LRO packets.
 ///
-/// The `gso_type` field should always be `GSO_NONE` since we didn't enable
-/// receive-side GSO, but we still parse it defensively to extract L4 protocol
-/// information if present.
-fn parse_vnet_hdr(hdr: &VirtioNetHdr) -> RxMetadata {
+/// `frame` is the Ethernet frame bytes (after the vnet header) used to
+/// parse the actual L2 header length, including VLAN tags.
+fn parse_vnet_hdr(hdr: &VirtioNetHdr, frame: &[u8]) -> RxMetadata {
     let (ip_checksum, l4_checksum) = if hdr.flags.data_valid() {
         (RxChecksumState::Good, RxChecksumState::Good)
+    } else if hdr.flags.needs_csum() && hdr.gso_size > 0 {
+        // GSO + NEEDS_CSUM: the L4 checksum is partial (pseudo-header
+        // only). Report as Unknown so the virtio layer does not set
+        // DATA_VALID — it will set NEEDS_CSUM in the virtio header
+        // instead, and the guest will compute per-segment checksums.
+        (RxChecksumState::Unknown, RxChecksumState::Unknown)
+    } else if hdr.flags.needs_csum() {
+        // Non-GSO + NEEDS_CSUM: the data integrity is fine but the L4
+        // checksum field is partial. Since RxMetadata has no way to
+        // propagate NEEDS_CSUM for non-GSO packets, report as Good so
+        // the virtio header gets DATA_VALID and the guest accepts the
+        // packet.
+        (RxChecksumState::Good, RxChecksumState::Good)
     } else {
         (RxChecksumState::Unknown, RxChecksumState::Unknown)
     };
 
-    let l4_protocol = match hdr.gso_type.protocol() {
+    let gso_protocol = hdr.gso_type.protocol();
+
+    let l4_protocol = match gso_protocol {
         VirtioNetHdrGsoProtocol::TCPV4 | VirtioNetHdrGsoProtocol::TCPV6 => L4Protocol::Tcp,
         VirtioNetHdrGsoProtocol::UDP | VirtioNetHdrGsoProtocol::UDP_L4 => L4Protocol::Udp,
         _ => L4Protocol::Unknown,
     };
 
+    // Parse the actual Ethernet header to determine l2_len, handling
+    // VLAN tags. This mirrors the TX path's parse_ethertype logic.
+    let (parsed_l2_len, _, _) = parse_ethertype(frame);
+
+    // Extract GSO metadata when the kernel delivers a coalesced packet.
+    let (l3_protocol, gso_size, l2_len, l3_len, l4_len) = if hdr.gso_size > 0
+        && (gso_protocol == VirtioNetHdrGsoProtocol::TCPV4
+            || gso_protocol == VirtioNetHdrGsoProtocol::TCPV6)
+    {
+        let l3_proto = if gso_protocol == VirtioNetHdrGsoProtocol::TCPV4 {
+            L3Protocol::Ipv4
+        } else {
+            L3Protocol::Ipv6
+        };
+        let l2 = parsed_l2_len;
+        let l3 = if l2 > 0 && hdr.csum_start >= l2 as u16 {
+            hdr.csum_start - l2 as u16
+        } else {
+            0
+        };
+        let l4 = if hdr.hdr_len > hdr.csum_start {
+            let v = hdr.hdr_len - hdr.csum_start;
+            if v <= u8::MAX as u16 { v as u8 } else { 0 }
+        } else {
+            0
+        };
+        (l3_proto, hdr.gso_size, l2, l3, l4)
+    } else {
+        (L3Protocol::Unknown, 0, 0, 0, 0)
+    };
+
     RxMetadata {
         offset: 0,
         len: 0,
         ip_checksum,
         l4_checksum,
         l4_protocol,
+        l3_protocol,
+        gso_size,
+        l2_len,
+        l3_len,
+        l4_len,
     }
 }
 
@@ -600,6 +685,11 @@ mod tests {
         assert_eq!(hdr.gso_type.protocol(), VirtioNetHdrGsoProtocol::NONE);
     }
 
+    // Minimal 14-byte Ethernet header with IPv4 EtherType for use in tests.
+    const ETH_IPV4: [u8; 14] = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x00];
+    // Minimal 14-byte Ethernet header with IPv6 EtherType for use in tests.
+    const ETH_IPV6: [u8; 14] = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x86, 0xDD];
+
     #[test]
     fn rx_metadata_from_vnet_hdr_valid() {
         let hdr = VirtioNetHdr {
@@ -607,31 +697,88 @@ mod tests {
             gso_type: VirtioNetHdrGso::new().with_protocol(VirtioNetHdrGsoProtocol::TCPV4),
             ..Default::default()
         };
-        let meta = parse_vnet_hdr(&hdr);
+        let meta = parse_vnet_hdr(&hdr, &ETH_IPV4);
         assert_eq!(meta.ip_checksum, RxChecksumState::Good);
         assert_eq!(meta.l4_checksum, RxChecksumState::Good);
         assert_eq!(meta.l4_protocol, L4Protocol::Tcp);
     }
 
     #[test]
-    fn rx_metadata_from_vnet_hdr_needs_csum_treated_as_unknown() {
-        // We don't set TUN_F_CSUM so the kernel should never send NEEDS_CSUM,
-        // but if it did, we conservatively treat it as Unknown (not Good).
+    fn rx_metadata_from_vnet_hdr_needs_csum_non_gso_treated_as_good() {
+        // Non-GSO + NEEDS_CSUM: data integrity is fine, L4 checksum is
+        // partial. Report as Good so DATA_VALID is set — RxMetadata has
+        // no way to propagate NEEDS_CSUM for non-GSO packets.
         let hdr = VirtioNetHdr {
             flags: VirtioNetHdrFlags::new().with_needs_csum(true),
             gso_type: VirtioNetHdrGso::new().with_protocol(VirtioNetHdrGsoProtocol::TCPV6),
             ..Default::default()
         };
-        let meta = parse_vnet_hdr(&hdr);
+        let meta = parse_vnet_hdr(&hdr, &ETH_IPV6);
+        assert_eq!(meta.ip_checksum, RxChecksumState::Good);
+        assert_eq!(meta.l4_checksum, RxChecksumState::Good);
+        assert_eq!(meta.l4_protocol, L4Protocol::Tcp);
+    }
+
+    #[test]
+    fn rx_metadata_from_vnet_hdr_needs_csum_gso_treated_as_unknown() {
+        // GSO + NEEDS_CSUM: report as Unknown so DATA_VALID is not set.
+        // The virtio layer will set NEEDS_CSUM in the header instead.
+        let hdr = VirtioNetHdr {
+            flags: VirtioNetHdrFlags::new().with_needs_csum(true),
+            gso_type: VirtioNetHdrGso::new().with_protocol(VirtioNetHdrGsoProtocol::TCPV6),
+            gso_size: 1440,
+            ..Default::default()
+        };
+        let meta = parse_vnet_hdr(&hdr, &ETH_IPV6);
         assert_eq!(meta.ip_checksum, RxChecksumState::Unknown);
         assert_eq!(meta.l4_checksum, RxChecksumState::Unknown);
         assert_eq!(meta.l4_protocol, L4Protocol::Tcp);
     }
 
+    #[test]
+    fn rx_metadata_from_vnet_hdr_gso_tcpv4() {
+        let hdr = VirtioNetHdr {
+            flags: VirtioNetHdrFlags::new().with_needs_csum(true),
+            gso_type: VirtioNetHdrGso::new().with_protocol(VirtioNetHdrGsoProtocol::TCPV4),
+            hdr_len: 14 + 20 + 32, // eth + ipv4 + tcp w/options
+            gso_size: 1460,
+            csum_start: 14 + 20,
+            csum_offset: 16,
+            ..Default::default()
+        };
+        let meta = parse_vnet_hdr(&hdr, &ETH_IPV4);
+        assert_eq!(meta.l3_protocol, L3Protocol::Ipv4);
+        assert_eq!(meta.gso_size, 1460);
+        assert_eq!(meta.l2_len, 14);
+        assert_eq!(meta.l3_len, 20);
+        assert_eq!(meta.l4_len, 32);
+        assert_eq!(meta.l4_protocol, L4Protocol::Tcp);
+    }
+
+    #[test]
+    fn rx_metadata_from_vnet_hdr_gso_tcpv6() {
+        let hdr = VirtioNetHdr {
+            flags: VirtioNetHdrFlags::new().with_needs_csum(true),
+            gso_type: VirtioNetHdrGso::new().with_protocol(VirtioNetHdrGsoProtocol::TCPV6),
+            hdr_len: 14 + 40 + 20,
+            gso_size: 1440,
+            csum_start: 14 + 40,
+            csum_offset: 16,
+            ..Default::default()
+        };
+        let meta = parse_vnet_hdr(&hdr, &ETH_IPV6);
+        assert_eq!(meta.l3_protocol, L3Protocol::Ipv6);
+        assert_eq!(meta.gso_size, 1440);
+        assert_eq!(meta.l2_len, 14);
+        assert_eq!(meta.l3_len, 40);
+        assert_eq!(meta.l4_len, 20);
+        assert_eq!(meta.l4_protocol, L4Protocol::Tcp);
+    }
+
     #[test]
     fn rx_metadata_from_vnet_hdr_none() {
         let hdr = VirtioNetHdr::default();
-        let meta = parse_vnet_hdr(&hdr);
+        let meta = parse_vnet_hdr(&hdr, &[]);
         assert_eq!(meta.ip_checksum, RxChecksumState::Unknown);
         assert_eq!(meta.l4_checksum, RxChecksumState::Unknown);
         assert_eq!(meta.l4_protocol, L4Protocol::Unknown);
@@ -644,7 +791,7 @@ mod tests {
             gso_type: VirtioNetHdrGso::new().with_protocol(VirtioNetHdrGsoProtocol::UDP),
             ..Default::default()
         };
-        let meta = parse_vnet_hdr(&hdr);
+        let meta = parse_vnet_hdr(&hdr, &[]);
         assert_eq!(meta.l4_protocol, L4Protocol::Udp);
     }
 
diff --git a/vm/devices/virtio/virtio_net/src/buffers.rs b/vm/devices/virtio/virtio_net/src/buffers.rs
index 56e475461d..8d69478d32 100644
--- a/vm/devices/virtio/virtio_net/src/buffers.rs
+++ b/vm/devices/virtio/virtio_net/src/buffers.rs
@@ -3,10 +3,13 @@
 
 use crate::VirtioNetHeader;
 use crate::VirtioNetHeaderFlags;
+use crate::VirtioNetHeaderGso;
+use crate::VirtioNetHeaderGsoProtocol;
 use crate::header_size;
 use guestmem::GuestMemory;
 use inspect::Inspect;
 use net_backend::BufferAccess;
+use net_backend::L3Protocol;
 use net_backend::RxBufferSegment;
 use net_backend::RxId;
 use net_backend::RxMetadata;
@@ -27,6 +30,10 @@ pub struct VirtioWorkPool {
     mem: GuestMemory,
     #[inspect(skip)]
     rx_packets: Vec<Option<RxPacket>>,
+    /// Whether the guest negotiated VIRTIO_NET_F_GUEST_TSO4.
+    guest_tso4: bool,
+    /// Whether the guest negotiated VIRTIO_NET_F_GUEST_TSO6.
+    guest_tso6: bool,
 }
 
 impl VirtioWorkPool {
@@ -38,10 +45,12 @@ impl VirtioWorkPool {
     }
 
     /// Create a new instance.
-    pub fn new(mem: GuestMemory, queue_size: u16) -> Self {
+    pub fn new(mem: GuestMemory, queue_size: u16, guest_tso4: bool, guest_tso6: bool) -> Self {
         Self {
             mem,
             rx_packets: (0..queue_size).map(|_| None).collect(),
+            guest_tso4,
+            guest_tso6,
         }
     }
 
@@ -170,8 +179,67 @@ impl BufferAccess for VirtioWorkPool {
         let data_valid = metadata.ip_checksum.is_valid() && metadata.l4_checksum.is_valid();
         let flags = VirtioNetHeaderFlags::new().with_data_valid(data_valid);
 
+        // Build GSO fields when the backend indicates a large/coalesced packet
+        // and the guest has negotiated the corresponding GUEST_TSO feature.
+        let gso_allowed = match metadata.l3_protocol {
+            L3Protocol::Ipv4 => self.guest_tso4,
+            L3Protocol::Ipv6 => self.guest_tso6,
+            L3Protocol::Unknown => false,
+        };
+        let (gso_type, gso_size, hdr_len, csum_start, csum_offset) = if metadata.gso_size > 0
+            && metadata.l2_len > 0
+            && metadata.l3_len > 0
+            && metadata.l4_len > 0
+            && gso_allowed
+        {
+            let gso_protocol = match metadata.l3_protocol {
+                L3Protocol::Ipv4 => VirtioNetHeaderGsoProtocol::TCPV4,
+                L3Protocol::Ipv6 => VirtioNetHeaderGsoProtocol::TCPV6,
+                L3Protocol::Unknown => VirtioNetHeaderGsoProtocol::NONE,
+            };
+            let gso_type_byte: u8 = VirtioNetHeaderGso::new().with_protocol(gso_protocol).into();
+            let total_hdr = metadata.l2_len as u16 + metadata.l3_len + metadata.l4_len as u16;
+            let csum_start = metadata.l2_len as u16 + metadata.l3_len;
+            // TCP checksum offset within TCP header is 16.
+            let csum_offset: u16 = 16;
+            (
+                gso_type_byte,
+                metadata.gso_size,
+                total_hdr,
+                csum_start,
+                csum_offset,
+            )
+        } else {
+            if metadata.gso_size > 0 {
+                tracelimit::warn_ratelimited!(
+                    gso_size = metadata.gso_size,
+                    l2_len = metadata.l2_len,
+                    l3_len = metadata.l3_len,
+                    l4_len = metadata.l4_len,
+                    ?gso_allowed,
+                    "cannot emit GSO metadata: missing header lengths or guest feature"
+                );
+            }
+            (0, 0, 0, 0, 0)
+        };
+
+        // When GSO is active, set NEEDS_CSUM so the guest computes
+        // per-segment checksums, and clear DATA_VALID to avoid the
+        // contradictory combination that could cause the guest to
+        // skip required per-segment checksum computation.
+        let flags = if gso_size > 0 {
+            flags.with_needs_csum(true).with_data_valid(false)
+        } else {
+            flags
+        };
+
         let virtio_net_header = VirtioNetHeader {
             flags: flags.into(),
+            gso_type,
+            gso_size,
+            hdr_len,
+            csum_start,
+            csum_offset,
             num_buffers: 1,
             ..FromZeros::new_zeroed()
         };
diff --git a/vm/devices/virtio/virtio_net/src/lib.rs b/vm/devices/virtio/virtio_net/src/lib.rs
index cb5227f149..183a9434be 100644
--- a/vm/devices/virtio/virtio_net/src/lib.rs
+++ b/vm/devices/virtio/virtio_net/src/lib.rs
@@ -268,6 +268,8 @@ impl VirtioDevice for Device {
             .with_mac(true)
             .with_csum(csum)
             .with_guest_csum(true)
+            .with_guest_tso4(true)
+            .with_guest_tso6(true)
             .with_host_tso4(host_tso)
             .with_host_tso6(host_tso);
 
@@ -505,10 +507,20 @@ struct ActiveState {
 }
 
 impl ActiveState {
-    fn new(mem: GuestMemory, rx_queue_size: u16, tx_queue_size: u16) -> Self {
+    fn new(
+        mem: GuestMemory,
+        rx_queue_size: u16,
+        tx_queue_size: u16,
+        negotiated_features: NetworkFeaturesBank0,
+    ) -> Self {
         Self {
             pending_tx_packets: (0..tx_queue_size).map(|_| None).collect(),
-            pending_rx_packets: VirtioWorkPool::new(mem, rx_queue_size),
+            pending_rx_packets: VirtioWorkPool::new(
+                mem,
+                rx_queue_size,
+                negotiated_features.guest_tso4(),
+                negotiated_features.guest_tso6(),
+            ),
             data: ProcessingData::new(rx_queue_size, tx_queue_size),
             stats: Default::default(),
         }
@@ -635,6 +647,7 @@ impl Device {
             guest_memory.clone(),
             virtio_state.rx_queue_size,
             virtio_state.tx_queue_size,
+            negotiated_features,
         );
         let worker = Worker {
             virtio_state,