diff --git a/crates/kernel/src/process.rs b/crates/kernel/src/process.rs index 99e1adf13..1c7ecd0ab 100644 --- a/crates/kernel/src/process.rs +++ b/crates/kernel/src/process.rs @@ -1285,7 +1285,16 @@ mod tests { udp.dgram_queue.push(Datagram { data: b"hello".to_vec(), src_addr: [127, 0, 0, 1], + src_addr6: [0; 16], + dst_addr: [127, 0, 0, 1], + dst_addr6: [0; 16], src_port: 12345, + src_sock_idx: None, + ipv6_tclass: 0, + src_pid: 400, + src_uid: 0, + src_gid: 0, + ancillary_fds: Vec::new(), }); let mut tcp = SocketInfo::new(SocketDomain::Inet, SocketType::Stream, 0); tcp.oob_byte = Some(0xAB); diff --git a/crates/kernel/src/socket.rs b/crates/kernel/src/socket.rs index fa5acd042..f8d989fc8 100644 --- a/crates/kernel/src/socket.rs +++ b/crates/kernel/src/socket.rs @@ -91,7 +91,20 @@ pub enum SocketState { pub struct Datagram { pub data: Vec, pub src_addr: [u8; 4], + pub src_addr6: [u8; 16], + pub dst_addr: [u8; 4], + pub dst_addr6: [u8; 16], pub src_port: u16, + pub src_sock_idx: Option, + /// IPv6 traffic class associated with this datagram. + pub ipv6_tclass: u32, + /// Sender credentials captured when the datagram was queued. AF_UNIX + /// SO_PASSCRED reports these with SCM_CREDENTIALS. + pub src_pid: u32, + pub src_uid: u32, + pub src_gid: u32, + /// Ancillary file descriptors sent with this datagram via SCM_RIGHTS. + pub ancillary_fds: Vec, } /// One AF_INET UDP endpoint bound in the in-kernel virtual network. @@ -104,6 +117,18 @@ pub struct UdpEndpoint { pub reuse_addr: bool, } +/// IPv4 multicast group state for an AF_INET datagram socket. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Ipv4MulticastMembership { + pub group: [u8; 4], + /// Interface address used for matching local delivery. 0.0.0.0 means the + /// kernel default interface. 127.0.0.1 represents loopback. + pub interface_addr: [u8; 4], + pub any_source: bool, + pub blocked_sources: Vec<[u8; 4]>, + pub included_sources: Vec<[u8; 4]>, +} + struct UdpEndpointTable(UnsafeCell>>); unsafe impl Sync for UdpEndpointTable {} @@ -264,12 +289,25 @@ pub struct SocketInfo { pub host_net_handle: Option, /// Stored socket options as (level, optname, value) tuples. pub options: Vec<(u32, u32, u32)>, + /// SO_LINGER state. This is a structured option (`struct linger`), so it + /// is kept separately from integer-valued socket options. + pub linger_onoff: i32, + pub linger_seconds: i32, + /// SO_BINDTODEVICE binds a socket to a named virtual network interface. + pub bind_device: Option>, + /// TCP_CONGESTION algorithm name for this socket. Kandelo's virtual TCP + /// stack currently exposes the standard Linux default, "cubic". + pub tcp_congestion: Vec, /// Bound IPv4 address (for AF_INET sockets). pub bind_addr: [u8; 4], + /// Bound IPv6 address (for AF_INET6 sockets). + pub bind_addr6: [u8; 16], /// Bound port (for AF_INET sockets). pub bind_port: u16, /// Peer IPv4 address (for connected AF_INET sockets). pub peer_addr: [u8; 4], + /// Peer IPv6 address (for connected AF_INET6 sockets). + pub peer_addr6: [u8; 16], /// Peer port (for connected AF_INET sockets). pub peer_port: u16, /// Pending connection socket indices (for listening sockets). @@ -287,6 +325,11 @@ pub struct SocketInfo { pub accept_wake_idx: Option, /// Received UDP datagrams (for DGRAM sockets). pub dgram_queue: Vec, + /// Joined IPv4 multicast groups and source filters. + pub ipv4_multicast_memberships: Vec, + /// Received netlink datagrams. Netlink sockets are datagram-like and are + /// used by musl for route/interface enumeration. + pub netlink_queue: Vec>, /// Whether recv/send pipe indices refer to the global pipe table. Kept in /// serialized state for compatibility; runtime socket buffers are global. pub global_pipes: bool, @@ -318,14 +361,22 @@ impl SocketInfo { shut_wr: false, host_net_handle: None, options: Vec::new(), + linger_onoff: 0, + linger_seconds: 0, + bind_device: None, + tcp_congestion: b"cubic".to_vec(), bind_addr: [0; 4], + bind_addr6: [0; 16], bind_port: 0, peer_addr: [0; 4], + peer_addr6: [0; 16], peer_port: 0, listen_backlog: Vec::new(), shared_backlog_idx: None, accept_wake_idx: None, dgram_queue: Vec::new(), + ipv4_multicast_memberships: Vec::new(), + netlink_queue: Vec::new(), global_pipes: true, oob_byte: None, recv_timeout_us: 0, @@ -391,14 +442,22 @@ impl Clone for SocketInfo { shut_wr: self.shut_wr, host_net_handle: self.host_net_handle, options: self.options.clone(), + linger_onoff: self.linger_onoff, + linger_seconds: self.linger_seconds, + bind_device: self.bind_device.clone(), + tcp_congestion: self.tcp_congestion.clone(), bind_addr: self.bind_addr, + bind_addr6: self.bind_addr6, bind_port: self.bind_port, peer_addr: self.peer_addr, + peer_addr6: self.peer_addr6, peer_port: self.peer_port, listen_backlog: Vec::new(), // consume-once: don't double-accept shared_backlog_idx: self.shared_backlog_idx, accept_wake_idx: self.accept_wake_idx, dgram_queue: Vec::new(), // consume-once: don't double-deliver + ipv4_multicast_memberships: self.ipv4_multicast_memberships.clone(), + netlink_queue: Vec::new(), // consume-once: don't double-deliver global_pipes: self.global_pipes, oob_byte: None, // consume-once: don't double-deliver recv_timeout_us: self.recv_timeout_us, diff --git a/crates/kernel/src/syscalls.rs b/crates/kernel/src/syscalls.rs index 0a727d567..aad3036cd 100644 --- a/crates/kernel/src/syscalls.rs +++ b/crates/kernel/src/syscalls.rs @@ -2053,8 +2053,10 @@ pub fn sys_close(proc: &mut Process, host: &mut dyn HostIO, fd: i32) -> Result<( crate::socket::udp_unregister(proc.pid, sock_idx); let _ = host.host_udp_unbind(sock_idx as i32); } - if sock.domain == crate::socket::SocketDomain::Inet - && sock.sock_type == crate::socket::SocketType::Stream + if matches!( + sock.domain, + crate::socket::SocketDomain::Inet | crate::socket::SocketDomain::Inet6 + ) && sock.sock_type == crate::socket::SocketType::Stream { crate::socket::tcp_unregister(proc.pid, sock_idx); } @@ -6158,6 +6160,26 @@ fn parse_sockaddr_in(addr: &[u8]) -> Result<([u8; 4], u16), Errno> { )) } +fn parse_sockaddr_in6(addr: &[u8]) -> Result<([u8; 16], u16), Errno> { + use wasm_posix_shared::socket::AF_INET6; + + // struct sockaddr_in6: + // sa_family_t sin6_family (2, little-endian on wasm32) + // in_port_t sin6_port (2, network byte order) + // uint32_t sin6_flowinfo + // struct in6_addr sin6_addr + // uint32_t sin6_scope_id + if addr.len() < 28 { + return Err(Errno::EINVAL); + } + if sockaddr_family(addr)? as u32 != AF_INET6 { + return Err(Errno::EAFNOSUPPORT); + } + let mut ip = [0u8; 16]; + ip.copy_from_slice(&addr[8..24]); + Ok((ip, u16::from_be_bytes([addr[2], addr[3]]))) +} + fn write_sockaddr_in(buf: &mut [u8], addr: [u8; 4], port: u16) -> usize { let mut sa = [0u8; 16]; sa[0] = 2; // AF_INET, little-endian @@ -6174,10 +6196,43 @@ fn write_sockaddr_in(buf: &mut [u8], addr: [u8; 4], port: u16) -> usize { 16 } +fn write_sockaddr_in6(buf: &mut [u8], addr: [u8; 16], port: u16) -> usize { + let mut sa = [0u8; 28]; + sa[0] = 10; // AF_INET6, little-endian + sa[1] = 0; + let port_be = port.to_be_bytes(); + sa[2] = port_be[0]; + sa[3] = port_be[1]; + // flowinfo remains zero + sa[8..24].copy_from_slice(&addr); + // scope_id remains zero + let n = buf.len().min(28); + buf[..n].copy_from_slice(&sa[..n]); + 28 +} + fn is_loopback_addr(addr: [u8; 4]) -> bool { addr[0] == 127 } +fn is_loopback_addr6(addr: [u8; 16]) -> bool { + addr == [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1] +} + +fn is_unspecified_addr6(addr: [u8; 16]) -> bool { + addr == [0; 16] +} + +fn ipv6_v6only(sock: &crate::socket::SocketInfo) -> bool { + let value = sock + .get_option( + wasm_posix_shared::socket::IPPROTO_IPV6, + wasm_posix_shared::socket::IPV6_V6ONLY, + ) + .unwrap_or(0); + value != 0 +} + fn is_supported_udp_bind_addr(addr: [u8; 4]) -> bool { addr == [0, 0, 0, 0] || is_loopback_addr(addr) @@ -6186,13 +6241,31 @@ fn is_supported_udp_bind_addr(addr: [u8; 4]) -> bool { } fn is_supported_udp_route_addr(addr: [u8; 4]) -> bool { - is_loopback_addr(addr) || is_virtual_network_addr(addr) + addr == [0, 0, 0, 0] + || is_loopback_addr(addr) + || is_virtual_network_addr(addr) + || is_ipv4_multicast_addr(addr) } fn is_virtual_network_addr(addr: [u8; 4]) -> bool { addr[0] == 10 && addr[1] == 88 } +fn is_ipv4_multicast_addr(addr: [u8; 4]) -> bool { + (224..=239).contains(&addr[0]) +} + +fn udp_canonical_dst_addr(dst_addr: [u8; 4]) -> [u8; 4] { + // Linux treats UDP connect/sendto to INADDR_ANY as a route to local + // loopback (getpeername() reports 127.0.0.1). Preserve that generic + // socket behavior instead of reporting ENETUNREACH. + if dst_addr == [0, 0, 0, 0] { + [127, 0, 0, 1] + } else { + dst_addr + } +} + fn udp_route_local_addr(dst_addr: [u8; 4]) -> [u8; 4] { if is_loopback_addr(dst_addr) { [127, 0, 0, 1] @@ -6201,6 +6274,26 @@ fn udp_route_local_addr(dst_addr: [u8; 4]) -> [u8; 4] { } } +#[cfg_attr(not(target_arch = "wasm32"), allow(dead_code))] +pub(crate) fn ipv4_multicast_interface_from_index(ifindex: u32) -> Result<[u8; 4], Errno> { + match ifindex { + 0 => Ok([0, 0, 0, 0]), + // Kandelo exposes a Linux-like loopback interface as index 1. + 1 => Ok([127, 0, 0, 1]), + _ => Err(Errno::ENODEV), + } +} + +fn ipv4_multicast_interface_matches(interface_addr: [u8; 4], src_addr: [u8; 4]) -> bool { + if interface_addr == [0, 0, 0, 0] { + true + } else if is_loopback_addr(interface_addr) { + is_loopback_addr(src_addr) + } else { + interface_addr == src_addr + } +} + fn udp_reuse_addr(sock: &crate::socket::SocketInfo) -> bool { use wasm_posix_shared::socket::{SO_REUSEADDR, SOL_SOCKET}; @@ -6322,6 +6415,151 @@ fn udp_take_socket_error(proc: &mut Process, sock_idx: usize) -> Result<(), Errn Err(Errno::from_u32(err).unwrap_or(Errno::EIO)) } +fn ipv4_multicast_membership_mut( + sock: &mut crate::socket::SocketInfo, + group: [u8; 4], + interface_addr: [u8; 4], +) -> &mut crate::socket::Ipv4MulticastMembership { + if let Some(idx) = sock + .ipv4_multicast_memberships + .iter() + .position(|m| m.group == group && m.interface_addr == interface_addr) + { + return &mut sock.ipv4_multicast_memberships[idx]; + } + sock.ipv4_multicast_memberships + .push(crate::socket::Ipv4MulticastMembership { + group, + interface_addr, + any_source: false, + blocked_sources: Vec::new(), + included_sources: Vec::new(), + }); + sock.ipv4_multicast_memberships + .last_mut() + .expect("membership was just pushed") +} + +fn ipv4_multicast_leave_if_empty(sock: &mut crate::socket::SocketInfo, idx: usize) { + if let Some(m) = sock.ipv4_multicast_memberships.get(idx) { + if !m.any_source && m.included_sources.is_empty() { + sock.ipv4_multicast_memberships.remove(idx); + } + } +} + +/// Apply an IPv4 multicast membership/source-filter socket option. +/// +/// Kandelo models the POSIX/Linux observable contract for local UDP +/// multicast: joining a group on loopback lets datagrams sent to that group +/// and interface be received by sockets bound to the destination port, source +/// filters suppress or include sources, and sends to groups with no local +/// listeners still succeed like UDP datagrams. +pub fn sys_setsockopt_ipv4_multicast( + proc: &mut Process, + fd: i32, + optname: u32, + group: [u8; 4], + interface_addr: [u8; 4], + source: Option<[u8; 4]>, +) -> Result<(), Errno> { + use crate::socket::{SocketDomain, SocketType}; + use wasm_posix_shared::socket::*; + + if !is_ipv4_multicast_addr(group) { + return Err(Errno::EINVAL); + } + + let entry = proc.fd_table.get(fd)?; + let ofd = proc.ofd_table.get(entry.ofd_ref.0).ok_or(Errno::EBADF)?; + if ofd.file_type != FileType::Socket { + return Err(Errno::ENOTSOCK); + } + let sock_idx = (-(ofd.host_handle + 1)) as usize; + let sock = proc.sockets.get_mut(sock_idx).ok_or(Errno::EBADF)?; + if sock.domain != SocketDomain::Inet || sock.sock_type != SocketType::Dgram { + return Err(Errno::ENOPROTOOPT); + } + + match optname { + IP_ADD_MEMBERSHIP | MCAST_JOIN_GROUP => { + let membership = ipv4_multicast_membership_mut(sock, group, interface_addr); + membership.any_source = true; + sock.set_option(IPPROTO_IP, optname, 1); + Ok(()) + } + IP_DROP_MEMBERSHIP | MCAST_LEAVE_GROUP => { + sock.ipv4_multicast_memberships + .retain(|m| !(m.group == group && m.interface_addr == interface_addr)); + sock.set_option(IPPROTO_IP, optname, 1); + Ok(()) + } + IP_BLOCK_SOURCE | MCAST_BLOCK_SOURCE => { + let source = source.ok_or(Errno::EINVAL)?; + let membership = ipv4_multicast_membership_mut(sock, group, interface_addr); + if !membership.blocked_sources.contains(&source) { + membership.blocked_sources.push(source); + } + sock.set_option(IPPROTO_IP, optname, 1); + Ok(()) + } + IP_UNBLOCK_SOURCE | MCAST_UNBLOCK_SOURCE => { + let source = source.ok_or(Errno::EINVAL)?; + if let Some(membership) = sock + .ipv4_multicast_memberships + .iter_mut() + .find(|m| m.group == group && m.interface_addr == interface_addr) + { + membership.blocked_sources.retain(|s| *s != source); + } + sock.set_option(IPPROTO_IP, optname, 1); + Ok(()) + } + IP_ADD_SOURCE_MEMBERSHIP | MCAST_JOIN_SOURCE_GROUP => { + let source = source.ok_or(Errno::EINVAL)?; + let membership = ipv4_multicast_membership_mut(sock, group, interface_addr); + if !membership.included_sources.contains(&source) { + membership.included_sources.push(source); + } + sock.set_option(IPPROTO_IP, optname, 1); + Ok(()) + } + IP_DROP_SOURCE_MEMBERSHIP | MCAST_LEAVE_SOURCE_GROUP => { + let source = source.ok_or(Errno::EINVAL)?; + if let Some(idx) = sock + .ipv4_multicast_memberships + .iter() + .position(|m| m.group == group && m.interface_addr == interface_addr) + { + sock.ipv4_multicast_memberships[idx] + .included_sources + .retain(|s| *s != source); + ipv4_multicast_leave_if_empty(sock, idx); + } + sock.set_option(IPPROTO_IP, optname, 1); + Ok(()) + } + _ => Err(Errno::ENOPROTOOPT), + } +} + +fn udp_socket_accepts_multicast_datagram( + sock: &crate::socket::SocketInfo, + group: [u8; 4], + src_addr: [u8; 4], + src_port: u16, +) -> bool { + if !udp_socket_accepts_datagram(sock, src_addr, src_port) { + return false; + } + sock.ipv4_multicast_memberships.iter().any(|membership| { + membership.group == group + && ipv4_multicast_interface_matches(membership.interface_addr, src_addr) + && ((membership.any_source && !membership.blocked_sources.contains(&src_addr)) + || membership.included_sources.contains(&src_addr)) + }) +} + fn udp_purge_unaccepted_datagrams(proc: &mut Process, sock_idx: usize) -> Result<(), Errno> { let (peer_addr, peer_port, connected) = { let sock = proc.sockets.get(sock_idx).ok_or(Errno::EBADF)?; @@ -6350,6 +6588,7 @@ fn udp_send_datagram( ) -> Result { use crate::socket::{Datagram, SocketState}; + let dst_addr = udp_canonical_dst_addr(dst_addr); let (state, shut_wr) = { let sock = proc.sockets.get(sock_idx).ok_or(Errno::EBADF)?; (sock.state, sock.shut_wr) @@ -6371,6 +6610,48 @@ fn udp_send_datagram( let sock = proc.sockets.get(sock_idx).ok_or(Errno::EBADF)?; (sock.bind_addr, sock.bind_port) }; + let src_addr = if src_addr == [0, 0, 0, 0] && is_loopback_addr(dst_addr) { + udp_route_local_addr(dst_addr) + } else { + src_addr + }; + if is_ipv4_multicast_addr(dst_addr) { + let datagram = Datagram { + data: buf.to_vec(), + src_addr, + src_addr6: [0; 16], + dst_addr, + dst_addr6: [0; 16], + src_port, + src_sock_idx: Some(sock_idx), + ipv6_tclass: 0, + src_pid: proc.pid, + src_uid: proc.uid, + src_gid: proc.gid, + ancillary_fds: Vec::new(), + }; + + let endpoints = crate::socket::udp_lookup(dst_addr, dst_port); + for endpoint in endpoints { + if endpoint.pid != proc.pid { + continue; + } + let accepts = proc + .sockets + .get(endpoint.sock_idx) + .map(|sock| { + udp_socket_accepts_multicast_datagram(sock, dst_addr, src_addr, src_port) + }) + .unwrap_or(false); + if !accepts { + continue; + } + if let Some(target) = proc.sockets.get_mut(endpoint.sock_idx) { + udp_queue_datagram(target, datagram.clone()); + } + } + return Ok(buf.len()); + } if !is_loopback_addr(dst_addr) { return match host.host_udp_send(&src_addr, src_port, &dst_addr, dst_port, buf) { Ok(n) => Ok(n), @@ -6387,7 +6668,16 @@ fn udp_send_datagram( let datagram = Datagram { data: buf.to_vec(), src_addr, + src_addr6: [0; 16], + dst_addr, + dst_addr6: [0; 16], src_port, + src_sock_idx: Some(sock_idx), + ipv6_tclass: 0, + src_pid: proc.pid, + src_uid: proc.uid, + src_gid: proc.gid, + ancillary_fds: Vec::new(), }; let mut delivered = false; @@ -6420,6 +6710,235 @@ fn udp_send_datagram( Ok(buf.len()) } +fn unix_dgram_send_to_sock( + proc: &mut Process, + src_sock_idx: usize, + dst_sock_idx: usize, + buf: &[u8], +) -> Result { + use crate::socket::Datagram; + + let shut_wr = proc + .sockets + .get(src_sock_idx) + .ok_or(Errno::EBADF)? + .shut_wr; + if shut_wr { + return Err(Errno::EPIPE); + } + + let datagram = Datagram { + data: buf.to_vec(), + src_addr: [0; 4], + src_addr6: [0; 16], + dst_addr: [0; 4], + dst_addr6: [0; 16], + src_port: 0, + src_sock_idx: Some(src_sock_idx), + ipv6_tclass: 0, + src_pid: proc.pid, + src_uid: proc.uid, + src_gid: proc.gid, + ancillary_fds: Vec::new(), + }; + let target = proc.sockets.get_mut(dst_sock_idx).ok_or(Errno::ECONNREFUSED)?; + udp_queue_datagram(target, datagram); + Ok(buf.len()) +} + +fn udp6_local_bind_conflicts( + proc: &Process, + sock_idx: usize, + addr: [u8; 16], + port: u16, + reuse_addr: bool, +) -> bool { + use crate::socket::{SocketDomain, SocketType}; + + for idx in 0..proc.sockets.len() { + let Some(sock) = proc.sockets.get(idx) else { + continue; + }; + if idx != sock_idx + && sock.domain == SocketDomain::Inet6 + && sock.sock_type == SocketType::Dgram + && sock.bind_port == port + && (is_unspecified_addr6(sock.bind_addr6) + || is_unspecified_addr6(addr) + || sock.bind_addr6 == addr) + && !(sock.get_option( + wasm_posix_shared::socket::SOL_SOCKET, + wasm_posix_shared::socket::SO_REUSEADDR, + ) != Some(0) + && reuse_addr) + { + return true; + } + } + false +} + +fn udp6_bind_socket( + proc: &mut Process, + sock_idx: usize, + addr: [u8; 16], + port: u16, +) -> Result<(), Errno> { + use crate::socket::SocketState; + + if !(is_loopback_addr6(addr) || is_unspecified_addr6(addr)) { + return Err(Errno::EADDRNOTAVAIL); + } + + let assigned_port = if port == 0 { + let p = proc.next_ephemeral_port; + proc.next_ephemeral_port = proc.next_ephemeral_port.wrapping_add(1); + if proc.next_ephemeral_port == 0 { + proc.next_ephemeral_port = 49152; + } + p + } else { + port + }; + + let reuse_addr = proc + .sockets + .get(sock_idx) + .and_then(|sock| { + sock.get_option( + wasm_posix_shared::socket::SOL_SOCKET, + wasm_posix_shared::socket::SO_REUSEADDR, + ) + }) + .unwrap_or(0) + != 0; + if udp6_local_bind_conflicts(proc, sock_idx, addr, assigned_port, reuse_addr) { + return Err(Errno::EADDRINUSE); + } + + let sock = proc.sockets.get_mut(sock_idx).ok_or(Errno::EBADF)?; + sock.bind_addr6 = addr; + sock.bind_port = assigned_port; + if sock.state == SocketState::Unbound { + sock.state = SocketState::Bound; + } + Ok(()) +} + +fn udp6_ensure_bound( + proc: &mut Process, + sock_idx: usize, + addr: [u8; 16], +) -> Result<(), Errno> { + let already_bound = proc.sockets.get(sock_idx).ok_or(Errno::EBADF)?.bind_port != 0; + if already_bound { + return Ok(()); + } + udp6_bind_socket(proc, sock_idx, addr, 0) +} + +fn udp6_socket_accepts_datagram( + sock: &crate::socket::SocketInfo, + src_addr: [u8; 16], + src_port: u16, +) -> bool { + use crate::socket::SocketState; + + if sock.state == SocketState::Connected { + return sock.peer_addr6 == src_addr && sock.peer_port == src_port; + } + true +} + +fn udp6_send_datagram( + proc: &mut Process, + sock_idx: usize, + buf: &[u8], + dst_addr: [u8; 16], + dst_port: u16, +) -> Result { + use crate::socket::{Datagram, SocketDomain, SocketState, SocketType}; + + let dst_addr = if is_unspecified_addr6(dst_addr) { + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1] + } else { + dst_addr + }; + if !is_loopback_addr6(dst_addr) { + return Err(Errno::ENETUNREACH); + } + + let (state, shut_wr) = { + let sock = proc.sockets.get(sock_idx).ok_or(Errno::EBADF)?; + (sock.state, sock.shut_wr) + }; + if shut_wr { + return Err(Errno::EPIPE); + } + if state == SocketState::Connected { + udp_take_socket_error(proc, sock_idx)?; + } + + let loopback = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]; + let auto_bind_addr = if state == SocketState::Connected { + loopback + } else { + [0; 16] + }; + udp6_ensure_bound(proc, sock_idx, auto_bind_addr)?; + + let (src_addr, src_port) = { + let sock = proc.sockets.get(sock_idx).ok_or(Errno::EBADF)?; + (sock.bind_addr6, sock.bind_port) + }; + let datagram = Datagram { + data: buf.to_vec(), + src_addr: [0; 4], + src_addr6: src_addr, + dst_addr: [0; 4], + dst_addr6: dst_addr, + src_port, + src_sock_idx: Some(sock_idx), + ipv6_tclass: 0, + src_pid: proc.pid, + src_uid: proc.uid, + src_gid: proc.gid, + ancillary_fds: Vec::new(), + }; + + let mut delivered = false; + let sock_count = proc.sockets.len(); + for idx in 0..sock_count { + let accepts = proc + .sockets + .get(idx) + .map(|sock| { + sock.domain == SocketDomain::Inet6 + && sock.sock_type == SocketType::Dgram + && sock.bind_port == dst_port + && (is_unspecified_addr6(sock.bind_addr6) || sock.bind_addr6 == dst_addr) + && udp6_socket_accepts_datagram(sock, src_addr, src_port) + }) + .unwrap_or(false); + if !accepts { + continue; + } + if let Some(target) = proc.sockets.get_mut(idx) { + udp_queue_datagram(target, datagram.clone()); + delivered = true; + break; + } + } + + if !delivered && state == SocketState::Connected { + if let Some(sock) = proc.sockets.get_mut(sock_idx) { + sock.connect_error = Errno::ECONNREFUSED as u32; + } + } + + Ok(buf.len()) +} + pub fn inject_udp_datagram_into( proc: &mut Process, dst_addr: [u8; 4], @@ -6433,7 +6952,16 @@ pub fn inject_udp_datagram_into( let datagram = Datagram { data: data.to_vec(), src_addr, + src_addr6: [0; 16], + dst_addr, + dst_addr6: [0; 16], src_port, + src_sock_idx: None, + ipv6_tclass: 0, + src_pid: 0, + src_uid: 0, + src_gid: 0, + ancillary_fds: Vec::new(), }; let endpoints = crate::socket::udp_lookup(dst_addr, dst_port); for endpoint in endpoints { @@ -6475,17 +7003,14 @@ pub fn sys_getsockname(proc: &Process, fd: i32, buf: &mut [u8]) -> Result Ok(write_sockaddr_in(buf, sock.bind_addr, sock.bind_port)), - SocketDomain::Inet6 => { - if buf.len() >= 2 { - buf[0] = 10; // AF_INET6 - buf[1] = 0; - } - Ok(2) - } + SocketDomain::Inet6 => Ok(write_sockaddr_in6(buf, sock.bind_addr6, sock.bind_port)), SocketDomain::Unix => { if let Some(ref path) = sock.bind_path { - // sockaddr_un: family(2) + path (null-terminated) - let total_len = 2 + path.len() + 1; // +1 for null terminator + // sockaddr_un: family(2) + path. Filesystem paths are + // null-terminated; Linux abstract namespace paths start with + // NUL and use the addrlen as their length (no terminator). + let abstract_unix = path.first().copied() == Some(0); + let total_len = 2 + path.len() + if abstract_unix { 0 } else { 1 }; let n = buf.len().min(total_len); if n >= 1 { buf[0] = 1; @@ -6497,8 +7022,8 @@ pub fn sys_getsockname(proc: &Process, fd: i32, buf: &mut [u8]) -> Result 0 { buf[2..2 + path_copy].copy_from_slice(&path[..path_copy]); } - // Null terminate if room - if n > 2 + path_copy { + // Null terminate filesystem paths if room. + if !abstract_unix && n > 2 + path_copy { buf[2 + path_copy] = 0; } Ok(total_len) @@ -6543,13 +7068,7 @@ pub fn sys_getpeername(proc: &Process, fd: i32, buf: &mut [u8]) -> Result Ok(write_sockaddr_in(buf, sock.peer_addr, sock.peer_port)), - SocketDomain::Inet6 => { - if buf.len() >= 2 { - buf[0] = 10; // AF_INET6 - buf[1] = 0; - } - Ok(2) - } + SocketDomain::Inet6 => Ok(write_sockaddr_in6(buf, sock.peer_addr6, sock.peer_port)), SocketDomain::Unix => { if buf.len() >= 2 { buf[0] = 1; // AF_UNIX @@ -6640,16 +7159,31 @@ pub fn sys_send( let sock_idx = (-(ofd.host_handle + 1)) as usize; let sock = proc.sockets.get(sock_idx).ok_or(Errno::EBADF)?; if sock.sock_type == SocketType::Dgram { - if sock.domain == SocketDomain::Inet { - if sock.state != SocketState::Connected { - return Err(Errno::EDESTADDRREQ); + match sock.domain { + SocketDomain::Inet => { + if sock.state != SocketState::Connected { + return Err(Errno::EDESTADDRREQ); + } + let dst_addr = sock.peer_addr; + let dst_port = sock.peer_port; + return udp_send_datagram(proc, host, sock_idx, buf, dst_addr, dst_port); } - let dst_addr = sock.peer_addr; - let dst_port = sock.peer_port; - return udp_send_datagram(proc, host, sock_idx, buf, dst_addr, dst_port); - } - if sock.domain == SocketDomain::Unix && sock.state == SocketState::Connected { - return Ok(buf.len()); + SocketDomain::Inet6 => { + if sock.state != SocketState::Connected { + return Err(Errno::EDESTADDRREQ); + } + let dst_addr = sock.peer_addr6; + let dst_port = sock.peer_port; + return udp6_send_datagram(proc, sock_idx, buf, dst_addr, dst_port); + } + SocketDomain::Unix if sock.state == SocketState::Connected => { + let peer_idx = sock.peer_idx; + if let Some(peer_idx) = peer_idx { + return unix_dgram_send_to_sock(proc, sock_idx, peer_idx, buf); + } + return Ok(buf.len()); + } + SocketDomain::Unix => {} } } if sock.state != SocketState::Connected { @@ -6721,7 +7255,12 @@ pub fn sys_recv( } let sock_idx = (-(ofd.host_handle + 1)) as usize; let sock = proc.sockets.get(sock_idx).ok_or(Errno::EBADF)?; - if sock.sock_type == SocketType::Dgram && sock.domain == SocketDomain::Inet { + if sock.sock_type == SocketType::Dgram + && matches!( + sock.domain, + SocketDomain::Inet | SocketDomain::Inet6 | SocketDomain::Unix + ) + { let (n, _) = sys_recvfrom(proc, host, fd, buf, flags, &mut [])?; return Ok(n); } @@ -6825,18 +7364,44 @@ pub fn sys_getsockopt(proc: &mut Process, fd: i32, level: u32, optname: u32) -> 0 }), SO_RCVBUF | SO_SNDBUF => Ok(DEFAULT_PIPE_CAPACITY as u32), - SO_REUSEADDR | SO_KEEPALIVE | SO_LINGER | SO_BROADCAST => { + SO_REUSEADDR | SO_REUSEPORT | SO_KEEPALIVE | SO_BROADCAST | SO_PASSCRED + | SO_ATTACH_REUSEPORT_CBPF | SO_ZEROCOPY => { Ok(sock.get_option(level, optname).unwrap_or(0)) } + // SO_LINGER and SO_BINDTODEVICE are structured/string-valued and + // handled by dedicated wasm ABI wrappers. + SO_LINGER | SO_BINDTODEVICE => Err(Errno::ENOPROTOOPT), // SO_RCVTIMEO/SO_SNDTIMEO handled by sys_getsockopt_timeout _ => Err(Errno::ENOPROTOOPT), }, + IPPROTO_IP => match optname { + IP_TOS | IP_PKTINFO | IP_MTU_DISCOVER | IP_MULTICAST_IF | IP_MULTICAST_TTL + | IP_MULTICAST_LOOP | IP_MULTICAST_ALL | MCAST_JOIN_GROUP | MCAST_LEAVE_GROUP + | MCAST_BLOCK_SOURCE | MCAST_UNBLOCK_SOURCE | MCAST_JOIN_SOURCE_GROUP + | MCAST_LEAVE_SOURCE_GROUP => { + Ok(sock.get_option(level, optname).unwrap_or_else(|| match optname { + IP_MULTICAST_TTL | IP_MULTICAST_LOOP => 1, + _ => 0, + })) + } + IP_MTU => Ok(1500), + _ => Err(Errno::ENOPROTOOPT), + }, + IPPROTO_IPV6 => match optname { + IPV6_MULTICAST_IF | IPV6_MULTICAST_HOPS | IPV6_MULTICAST_LOOP + | IPV6_RECVPKTINFO | IPV6_RECVTCLASS | IPV6_DONTFRAG | IPV6_TCLASS + | IPV6_V6ONLY => { + Ok(sock.get_option(level, optname).unwrap_or(0)) + } + _ => Err(Errno::ENOPROTOOPT), + }, IPPROTO_TCP => match optname { TCP_NODELAY | TCP_CORK | TCP_KEEPIDLE | TCP_KEEPINTVL | TCP_KEEPCNT | TCP_DEFER_ACCEPT | TCP_QUICKACK | TCP_USER_TIMEOUT => { Ok(sock.get_option(level, optname).unwrap_or(0)) } - // TCP_INFO handled separately by sys_getsockopt_tcp_info + // TCP_INFO and TCP_CONGESTION handled separately. + TCP_CONGESTION => Err(Errno::ENOPROTOOPT), _ => Err(Errno::ENOPROTOOPT), }, _ => Err(Errno::ENOPROTOOPT), @@ -6922,6 +7487,121 @@ pub fn sys_getsockopt_timeout(proc: &Process, fd: i32, optname: u32) -> Result Result<(i32, i32), Errno> { + let entry = proc.fd_table.get(fd)?; + let ofd = proc.ofd_table.get(entry.ofd_ref.0).ok_or(Errno::EBADF)?; + if ofd.file_type != FileType::Socket { + return Err(Errno::ENOTSOCK); + } + let sock_idx = (-(ofd.host_handle + 1)) as usize; + let sock = proc.sockets.get(sock_idx).ok_or(Errno::EBADF)?; + Ok((sock.linger_onoff, sock.linger_seconds)) +} + +/// Set SO_LINGER's structured state. +pub fn sys_setsockopt_linger( + proc: &mut Process, + fd: i32, + l_onoff: i32, + l_linger: i32, +) -> Result<(), Errno> { + let entry = proc.fd_table.get(fd)?; + let ofd = proc.ofd_table.get(entry.ofd_ref.0).ok_or(Errno::EBADF)?; + if ofd.file_type != FileType::Socket { + return Err(Errno::ENOTSOCK); + } + let sock_idx = (-(ofd.host_handle + 1)) as usize; + let sock = proc.sockets.get_mut(sock_idx).ok_or(Errno::EBADF)?; + sock.linger_onoff = l_onoff; + sock.linger_seconds = l_linger; + sock.set_option( + wasm_posix_shared::socket::SOL_SOCKET, + wasm_posix_shared::socket::SO_LINGER, + if l_onoff != 0 { 1 } else { 0 }, + ); + Ok(()) +} + +/// Set SO_BINDTODEVICE to a named virtual interface. +pub fn sys_setsockopt_bindtodevice( + proc: &mut Process, + fd: i32, + device: &[u8], +) -> Result<(), Errno> { + let entry = proc.fd_table.get(fd)?; + let ofd = proc.ofd_table.get(entry.ofd_ref.0).ok_or(Errno::EBADF)?; + if ofd.file_type != FileType::Socket { + return Err(Errno::ENOTSOCK); + } + let name = device.split(|&b| b == 0).next().unwrap_or(device); + if name != b"lo" && name != b"eth0" { + return Err(Errno::ENODEV); + } + let sock_idx = (-(ofd.host_handle + 1)) as usize; + let sock = proc.sockets.get_mut(sock_idx).ok_or(Errno::EBADF)?; + sock.bind_device = Some(name.to_vec()); + sock.set_option( + wasm_posix_shared::socket::SOL_SOCKET, + wasm_posix_shared::socket::SO_BINDTODEVICE, + 1, + ); + Ok(()) +} + +/// Get SO_BINDTODEVICE's bound interface name. +pub fn sys_getsockopt_bindtodevice(proc: &Process, fd: i32) -> Result, Errno> { + let entry = proc.fd_table.get(fd)?; + let ofd = proc.ofd_table.get(entry.ofd_ref.0).ok_or(Errno::EBADF)?; + if ofd.file_type != FileType::Socket { + return Err(Errno::ENOTSOCK); + } + let sock_idx = (-(ofd.host_handle + 1)) as usize; + let sock = proc.sockets.get(sock_idx).ok_or(Errno::EBADF)?; + Ok(sock.bind_device.clone().unwrap_or_default()) +} + +/// Get TCP_CONGESTION's algorithm name. +pub fn sys_getsockopt_tcp_congestion(proc: &Process, fd: i32) -> Result, Errno> { + let entry = proc.fd_table.get(fd)?; + let ofd = proc.ofd_table.get(entry.ofd_ref.0).ok_or(Errno::EBADF)?; + if ofd.file_type != FileType::Socket { + return Err(Errno::ENOTSOCK); + } + let sock_idx = (-(ofd.host_handle + 1)) as usize; + let sock = proc.sockets.get(sock_idx).ok_or(Errno::EBADF)?; + Ok(sock.tcp_congestion.clone()) +} + +/// Set TCP_CONGESTION's algorithm name. +pub fn sys_setsockopt_tcp_congestion( + proc: &mut Process, + fd: i32, + name: &[u8], +) -> Result<(), Errno> { + let entry = proc.fd_table.get(fd)?; + let ofd = proc.ofd_table.get(entry.ofd_ref.0).ok_or(Errno::EBADF)?; + if ofd.file_type != FileType::Socket { + return Err(Errno::ENOTSOCK); + } + let name = name.split(|&b| b == 0).next().unwrap_or(name); + if name.is_empty() { + return Err(Errno::ENOENT); + } + if name != b"cubic" && name != b"reno" && name != b"newreno" { + return Err(Errno::ENOENT); + } + let sock_idx = (-(ofd.host_handle + 1)) as usize; + let sock = proc.sockets.get_mut(sock_idx).ok_or(Errno::EBADF)?; + sock.tcp_congestion = name.to_vec(); + sock.set_option( + wasm_posix_shared::socket::IPPROTO_TCP, + wasm_posix_shared::socket::TCP_CONGESTION, + 1, + ); + Ok(()) +} + /// Set socket option value. pub fn sys_setsockopt( proc: &mut Process, @@ -6943,11 +7623,43 @@ pub fn sys_setsockopt( match level { SOL_SOCKET => match optname { - SO_REUSEADDR | SO_KEEPALIVE | SO_RCVBUF | SO_SNDBUF | SO_LINGER | SO_BROADCAST => { + SO_REUSEADDR | SO_REUSEPORT | SO_KEEPALIVE | SO_RCVBUF | SO_SNDBUF + | SO_BROADCAST | SO_PASSCRED | SO_ATTACH_REUSEPORT_CBPF | SO_ZEROCOPY => { + sock.set_option(level, optname, value); + Ok(()) + } + SO_LINGER | SO_BINDTODEVICE => Err(Errno::ENOPROTOOPT), + // SO_RCVTIMEO/SO_SNDTIMEO handled by sys_setsockopt_timeout + _ => Err(Errno::ENOPROTOOPT), + }, + IPPROTO_IP => match optname { + IP_TOS | IP_PKTINFO | IP_MTU_DISCOVER | IP_MULTICAST_IF | IP_MULTICAST_TTL + | IP_MULTICAST_LOOP | IP_MULTICAST_ALL | MCAST_JOIN_GROUP | MCAST_LEAVE_GROUP + | MCAST_BLOCK_SOURCE | MCAST_UNBLOCK_SOURCE | MCAST_JOIN_SOURCE_GROUP + | MCAST_LEAVE_SOURCE_GROUP => { + sock.set_option(level, optname, value); + Ok(()) + } + IP_MTU => Err(Errno::ENOPROTOOPT), + _ => Err(Errno::ENOPROTOOPT), + }, + IPPROTO_IPV6 => match optname { + IPV6_V6ONLY => { + if sock.domain != crate::socket::SocketDomain::Inet6 { + return Err(Errno::ENOPROTOOPT); + } + if sock.bind_port != 0 { + return Err(Errno::EINVAL); + } + sock.set_option(level, optname, if value != 0 { 1 } else { 0 }); + Ok(()) + } + IPV6_MULTICAST_IF | IPV6_MULTICAST_HOPS | IPV6_MULTICAST_LOOP + | IPV6_PKTINFO | IPV6_RECVPKTINFO | IPV6_RECVTCLASS | IPV6_DONTFRAG + | IPV6_TCLASS => { sock.set_option(level, optname, value); Ok(()) } - // SO_RCVTIMEO/SO_SNDTIMEO handled by sys_setsockopt_timeout _ => Err(Errno::ENOPROTOOPT), }, IPPROTO_TCP => match optname { @@ -6956,6 +7668,7 @@ pub fn sys_setsockopt( sock.set_option(level, optname, value); Ok(()) } + TCP_CONGESTION => Err(Errno::ENOPROTOOPT), _ => Err(Errno::ENOPROTOOPT), }, _ => Err(Errno::ENOPROTOOPT), @@ -7046,6 +7759,40 @@ pub fn sys_bind( sock.state = SocketState::Bound; Ok(()) } + SocketDomain::Inet6 => { + let (ip, port) = parse_sockaddr_in6(addr)?; + if sock.sock_type == SocketType::Dgram { + return udp6_bind_socket(proc, sock_idx, ip, port); + } + if !(is_loopback_addr6(ip) || is_unspecified_addr6(ip)) { + return Err(Errno::EADDRNOTAVAIL); + } + + let assigned_port = if port == 0 { + let p = proc.next_ephemeral_port; + proc.next_ephemeral_port = proc.next_ephemeral_port.wrapping_add(1); + if proc.next_ephemeral_port == 0 { + proc.next_ephemeral_port = 49152; + } + p + } else { + port + }; + + // Linux defaults AF_INET6 sockets to dual-stack unless + // IPV6_V6ONLY is enabled. A wildcard IPv6 stream bind therefore + // also occupies the IPv4 port space and must conflict with an + // AF_INET bind to the same port. Specific ::1 binds stay IPv6-only + // and can coexist with 127.0.0.1. + if is_unspecified_addr6(ip) && !ipv6_v6only(sock) { + crate::socket::tcp_register(proc.pid, sock_idx, [0, 0, 0, 0], assigned_port)?; + } + let sock = proc.sockets.get_mut(sock_idx).ok_or(Errno::EBADF)?; + sock.bind_addr6 = ip; + sock.bind_port = assigned_port; + sock.state = SocketState::Bound; + Ok(()) + } SocketDomain::Unix => { // sockaddr_un: family(2) + sun_path (null-terminated, up to 108 bytes) if addr.len() < 3 { @@ -7053,15 +7800,28 @@ pub fn sys_bind( } // Extract path: starts at offset 2, null-terminated let path_bytes = &addr[2..]; - let path_end = path_bytes - .iter() - .position(|&b| b == 0) - .unwrap_or(path_bytes.len()); - if path_end == 0 { - return Err(Errno::EINVAL); - } - let sun_path = &path_bytes[..path_end]; - let resolved = crate::path::resolve_path(sun_path, &proc.cwd); + let (resolved, abstract_unix) = if path_bytes.first().copied() == Some(0) { + if path_bytes.len() < 2 { + return Err(Errno::EINVAL); + } + // Linux abstract namespace sockets are identified by the raw + // bytes after sun_family, including the leading NUL. No + // filesystem inode is created and embedded/trailing NUL bytes + // are part of the address. + (path_bytes.to_vec(), true) + } else { + let path_end = path_bytes + .iter() + .position(|&b| b == 0) + .unwrap_or(path_bytes.len()); + if path_end == 0 { + return Err(Errno::EINVAL); + } + ( + crate::path::resolve_path(&path_bytes[..path_end], &proc.cwd), + false, + ) + }; // POSIX: bind() must create a filesystem inode at sun_path so // chmod/stat/ls find a node there. Do that first via host O_CREAT| @@ -7071,22 +7831,26 @@ pub fn sys_bind( // here after the package-management rebase dropped it; the same // code lives at the merge base but didn't survive into the // rebased branch.) - use wasm_posix_shared::flags::{O_CREAT, O_EXCL, O_WRONLY}; - check_open_permissions(proc, host, &resolved, O_CREAT | O_EXCL | O_WRONLY)?; - let h = match host.host_open(&resolved, O_CREAT | O_EXCL | O_WRONLY, 0o600) { - Ok(h) => h, - Err(Errno::EEXIST) => return Err(Errno::EADDRINUSE), - Err(e) => return Err(e), - }; - host.host_chown(&resolved, proc.euid, proc.egid)?; - let _ = host.host_close(h); + if !abstract_unix { + use wasm_posix_shared::flags::{O_CREAT, O_EXCL, O_WRONLY}; + check_open_permissions(proc, host, &resolved, O_CREAT | O_EXCL | O_WRONLY)?; + let h = match host.host_open(&resolved, O_CREAT | O_EXCL | O_WRONLY, 0o600) { + Ok(h) => h, + Err(Errno::EEXIST) => return Err(Errno::EADDRINUSE), + Err(e) => return Err(e), + }; + host.host_chown(&resolved, proc.euid, proc.egid)?; + let _ = host.host_close(h); + } // Register in global Unix socket registry. If a stale entry exists // (host had no inode but registry did — shouldn't happen normally) // unwind the host inode so we don't leak. let registry = unsafe { crate::unix_socket::global_unix_socket_registry() }; if !registry.register(resolved.clone(), proc.pid, sock_idx) { - let _ = host.host_unlink(&resolved); + if !abstract_unix { + let _ = host.host_unlink(&resolved); + } return Err(Errno::EADDRINUSE); } @@ -7095,7 +7859,6 @@ pub fn sys_bind( sock.state = SocketState::Bound; Ok(()) } - SocketDomain::Inet6 => Err(Errno::EADDRNOTAVAIL), } } @@ -7116,13 +7879,65 @@ pub fn sys_listen( return Err(Errno::ENOTSOCK); } let sock_idx = (-(ofd.host_handle + 1)) as usize; - let sock = proc.sockets.get_mut(sock_idx).ok_or(Errno::EBADF)?; - if sock.sock_type != SocketType::Stream { + let (domain, sock_type, state, v6only) = { + let sock = proc.sockets.get(sock_idx).ok_or(Errno::EBADF)?; + (sock.domain, sock.sock_type, sock.state, ipv6_v6only(sock)) + }; + if sock_type != SocketType::Stream { return Err(Errno::EOPNOTSUPP); } - if sock.state != SocketState::Bound && sock.state != SocketState::Listening { + if state == SocketState::Unbound { + // Linux auto-binds unbound INET stream sockets on listen(2). This is + // observable through getsockname() and lets standard socket option + // probes listen without an explicit bind. + match domain { + SocketDomain::Inet | SocketDomain::Inet6 => { + let assigned_port = { + let p = proc.next_ephemeral_port; + proc.next_ephemeral_port = proc.next_ephemeral_port.wrapping_add(1); + if proc.next_ephemeral_port == 0 { + proc.next_ephemeral_port = 49152; + } + p + }; + if domain == SocketDomain::Inet + || (domain == SocketDomain::Inet6 && !v6only) + { + // listen(2) on an unbound INET stream socket auto-binds + // to the wildcard address. Linux's default AF_INET6 + // wildcard listener is dual-stack, so reserve the IPv4 + // wildcard port as well unless IPV6_V6ONLY was set before + // listen(). + crate::socket::tcp_register(proc.pid, sock_idx, [0, 0, 0, 0], assigned_port)?; + } + let sock = proc.sockets.get_mut(sock_idx).ok_or(Errno::EBADF)?; + sock.bind_addr = [0, 0, 0, 0]; + sock.bind_addr6 = [0; 16]; + sock.bind_port = assigned_port; + sock.state = SocketState::Bound; + } + SocketDomain::Unix => return Err(Errno::EINVAL), + } + } else if state != SocketState::Bound && state != SocketState::Listening { return Err(Errno::EINVAL); } + + let sock = proc.sockets.get_mut(sock_idx).ok_or(Errno::EBADF)?; + if let Some(value) = sock.get_option( + wasm_posix_shared::socket::IPPROTO_TCP, + wasm_posix_shared::socket::TCP_DEFER_ACCEPT, + ) { + if value > 0 { + // Linux rounds TCP_DEFER_ACCEPT to an internal retransmission + // timeout. Preserve the observable contract that getsockopt() + // after listen() returns a value greater than the requested one. + sock.set_option( + wasm_posix_shared::socket::IPPROTO_TCP, + wasm_posix_shared::socket::TCP_DEFER_ACCEPT, + value.saturating_add(1), + ); + } + } sock.state = SocketState::Listening; if sock.accept_wake_idx.is_none() { sock.accept_wake_idx = Some(crate::wakeup::alloc_accept_wake_idx()); @@ -7132,16 +7947,31 @@ pub fn sys_listen( // children will inherit. This way every process sharing this listener // pulls from the same queue (POSIX semantics) — see socket.rs. let domain = sock.domain; - if domain == SocketDomain::Inet && sock.shared_backlog_idx.is_none() { + if matches!( + domain, + SocketDomain::Inet | SocketDomain::Inet6 + ) + && sock.shared_backlog_idx.is_none() + { let backlog_idx = unsafe { crate::socket::shared_listener_backlog_table().alloc() }; sock.shared_backlog_idx = Some(backlog_idx); } - // Notify the host for AF_INET sockets so it can open a real TCP server + // Notify the host so it can open a real TCP server. The bridge transport is + // IPv4 today; AF_INET6 loopback listeners are registered on the IPv4 + // loopback transport while the guest-facing socket remains AF_INET6. This + // gives cross-process ::1 loopback the same accept/backlog semantics as + // 127.0.0.1 without exposing host-network details to the guest. let port = sock.bind_port; let addr = sock.bind_addr; - if domain == SocketDomain::Inet { - let _ = host.host_net_listen(fd, port, &addr); + match domain { + SocketDomain::Inet => { + let _ = host.host_net_listen(fd, port, &addr); + } + SocketDomain::Inet6 => { + let _ = host.host_net_listen(fd, port, &[127, 0, 0, 1]); + } + SocketDomain::Unix => {} } Ok(()) } @@ -7158,6 +7988,11 @@ pub fn sys_accept(proc: &mut Process, _host: &mut dyn HostIO, fd: i32) -> Result if ofd.file_type != FileType::Socket { return Err(Errno::ENOTSOCK); } + // Kandelo's POSIX socket ABI follows platforms where an accepted socket + // preserves the listening socket's nonblocking status. Keep the accepted + // OFD status flags in sync with that contract so stream metadata and + // read/write behavior agree. + let accepted_status_flags = O_RDWR | (ofd.status_flags & O_NONBLOCK); let sock_idx = (-(ofd.host_handle + 1)) as usize; let sock = proc.sockets.get(sock_idx).ok_or(Errno::EBADF)?; @@ -7189,7 +8024,7 @@ pub fn sys_accept(proc: &mut Process, _host: &mut dyn HostIO, fd: i32) -> Result let host_handle = -((accepted_sock_idx as i64) + 1); let ofd_idx = proc.ofd_table.create( FileType::Socket, - O_RDWR, + accepted_status_flags, host_handle, b"/dev/socket".to_vec(), ); @@ -7213,7 +8048,7 @@ pub fn sys_accept(proc: &mut Process, _host: &mut dyn HostIO, fd: i32) -> Result let host_handle = -((accepted_sock_idx as i64) + 1); let ofd_idx = proc.ofd_table.create( FileType::Socket, - O_RDWR, + accepted_status_flags, host_handle, b"/dev/socket".to_vec(), ); @@ -7266,8 +8101,9 @@ pub fn sys_connect( return Ok(()); } - let (ip, port) = parse_sockaddr_in(addr)?; - if ip == [0, 0, 0, 0] && port == 0 { + let (raw_ip, port) = parse_sockaddr_in(addr)?; + let ip = udp_canonical_dst_addr(raw_ip); + if raw_ip == [0, 0, 0, 0] && port == 0 { return Err(Errno::EADDRNOTAVAIL); } if ip == [255, 255, 255, 255] { @@ -7298,6 +8134,116 @@ pub fn sys_connect( if sock.state == SocketState::Connected { return Err(Errno::EISCONN); } + + if sock.domain == SocketDomain::Inet6 { + if sock.sock_type == SocketType::Dgram { + let (raw_ip6, port) = parse_sockaddr_in6(addr)?; + if is_unspecified_addr6(raw_ip6) && port == 0 { + return Err(Errno::EADDRNOTAVAIL); + } + let ip6 = if is_unspecified_addr6(raw_ip6) { + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1] + } else { + raw_ip6 + }; + if !is_loopback_addr6(ip6) { + return Err(Errno::ENETUNREACH); + } + udp6_ensure_bound( + proc, + sock_idx, + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], + )?; + let sock = proc.sockets.get_mut(sock_idx).ok_or(Errno::EBADF)?; + sock.peer_addr6 = ip6; + sock.peer_port = port; + sock.state = SocketState::Connected; + return Ok(()); + } + if sock.sock_type != SocketType::Stream { + return Err(Errno::EOPNOTSUPP); + } + let (ip6, port) = parse_sockaddr_in6(addr)?; + if !(is_loopback_addr6(ip6) || is_unspecified_addr6(ip6)) { + return Err(Errno::EADDRNOTAVAIL); + } + + // AF_INET6 support is currently local-loopback. That is enough + // to model standard bind/listen/getsockname behavior and makes + // unsupported remote IPv6 fail deterministically instead of + // being mis-parsed as IPv4. + let mut listener_idx = None; + let sock_count = proc.sockets.len(); + for i in 0..sock_count { + if let Some(s) = proc.sockets.get(i) { + if s.domain == SocketDomain::Inet6 + && s.state == SocketState::Listening + && s.bind_port == port + && s.sock_type == SocketType::Stream + && (is_unspecified_addr6(s.bind_addr6) || s.bind_addr6 == ip6) + { + listener_idx = Some(i); + break; + } + } + } + let listener_idx = listener_idx.ok_or(Errno::ECONNREFUSED)?; + + let (pipe_a_idx, pipe_b_idx) = + proc.alloc_pipe_pair(PipeBuffer::new(65536), PipeBuffer::new(65536)); + + let client_sock = proc.sockets.get(sock_idx).ok_or(Errno::EBADF)?; + let client_addr6 = if is_unspecified_addr6(client_sock.bind_addr6) { + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1] + } else { + client_sock.bind_addr6 + }; + let mut client_port = client_sock.bind_port; + if client_port == 0 { + client_port = proc.next_ephemeral_port; + proc.next_ephemeral_port = proc.next_ephemeral_port.wrapping_add(1); + if proc.next_ephemeral_port == 0 { + proc.next_ephemeral_port = 49152; + } + } + + let listener = proc.sockets.get(listener_idx).ok_or(Errno::EBADF)?; + let mut accepted_sock = SocketInfo::new(SocketDomain::Inet6, SocketType::Stream, 0); + accepted_sock.state = SocketState::Connected; + accepted_sock.recv_buf_idx = Some(pipe_a_idx); + accepted_sock.send_buf_idx = Some(pipe_b_idx); + accepted_sock.bind_addr6 = listener.bind_addr6; + accepted_sock.bind_port = listener.bind_port; + accepted_sock.peer_addr6 = client_addr6; + accepted_sock.peer_port = client_port; + let accepted_idx = proc.sockets.alloc(accepted_sock); + + let listener = proc.sockets.get_mut(listener_idx).ok_or(Errno::EBADF)?; + listener.listen_backlog.push(accepted_idx); + let accept_wake_idx = listener.accept_wake_idx; + + let client = proc.sockets.get_mut(sock_idx).ok_or(Errno::EBADF)?; + client.send_buf_idx = Some(pipe_a_idx); + client.recv_buf_idx = Some(pipe_b_idx); + client.state = SocketState::Connected; + client.peer_addr6 = ip6; + client.peer_port = port; + client.peer_idx = Some(accepted_idx); + if client.bind_port == 0 { + client.bind_port = client_port; + client.bind_addr6 = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]; + } + + let accepted = proc.sockets.get_mut(accepted_idx).ok_or(Errno::EBADF)?; + accepted.peer_idx = Some(sock_idx); + + if let Some(idx) = accept_wake_idx { + crate::wakeup::push_accept(idx); + } + + return Ok(()); + } + // Parse sockaddr_in: family(2) + port(2 big-endian) + addr(4) if addr.len() < 8 { return Err(Errno::EINVAL); @@ -7450,8 +8396,34 @@ pub fn sys_connect( SocketDomain::Unix => { let sock = proc.sockets.get(sock_idx).ok_or(Errno::EBADF)?; if sock.sock_type == SocketType::Dgram { - // SOCK_DGRAM connect on AF_UNIX succeeds as a bit-bucket (syslog pattern) + let peer_idx = if addr.len() >= 3 { + let path_bytes = &addr[2..]; + let resolved = if path_bytes.first().copied() == Some(0) { + if path_bytes.len() < 2 { + return Err(Errno::EINVAL); + } + path_bytes.to_vec() + } else { + let path_end = path_bytes + .iter() + .position(|&b| b == 0) + .unwrap_or(path_bytes.len()); + if path_end == 0 { + return Err(Errno::EINVAL); + } + crate::path::resolve_path(&path_bytes[..path_end], &proc.cwd) + }; + let registry = unsafe { crate::unix_socket::global_unix_socket_registry() }; + registry.lookup(&resolved).map(|entry| entry.sock_idx) + } else { + None + }; + + // Preserve the existing syslog-friendly behavior for absent + // datagram peers (connected sends become a bit bucket), but + // deliver to registered AF_UNIX datagram sockets when present. let sock = proc.sockets.get_mut(sock_idx).ok_or(Errno::EBADF)?; + sock.peer_idx = peer_idx; sock.state = SocketState::Connected; return Ok(()); } @@ -7464,14 +8436,21 @@ pub fn sys_connect( return Err(Errno::EINVAL); } let path_bytes = &addr[2..]; - let path_end = path_bytes - .iter() - .position(|&b| b == 0) - .unwrap_or(path_bytes.len()); - if path_end == 0 { - return Err(Errno::EINVAL); - } - let resolved = crate::path::resolve_path(&path_bytes[..path_end], &proc.cwd); + let resolved = if path_bytes.first().copied() == Some(0) { + if path_bytes.len() < 2 { + return Err(Errno::EINVAL); + } + path_bytes.to_vec() + } else { + let path_end = path_bytes + .iter() + .position(|&b| b == 0) + .unwrap_or(path_bytes.len()); + if path_end == 0 { + return Err(Errno::EINVAL); + } + crate::path::resolve_path(&path_bytes[..path_end], &proc.cwd) + }; // Look up the path in the global Unix socket registry let registry = unsafe { crate::unix_socket::global_unix_socket_registry() }; @@ -7572,14 +8551,6 @@ pub fn sys_sendto( let sock_idx = (-(ofd.host_handle + 1)) as usize; let sock = proc.sockets.get(sock_idx).ok_or(Errno::EBADF)?; - // AF_UNIX DGRAM connected sockets: bit-bucket (syslog pattern via send→sendto) - if sock.domain == SocketDomain::Unix - && sock.sock_type == SocketType::Dgram - && sock.state == SocketState::Connected - { - return Ok(buf.len()); - } - if addr.is_empty() { if sock.state == SocketState::Connected { return sys_send(proc, _host, fd, buf, _flags); @@ -7587,12 +8558,47 @@ pub fn sys_sendto( return Err(Errno::EDESTADDRREQ); } - if sock.domain != SocketDomain::Inet || sock.sock_type != SocketType::Dgram { + if sock.sock_type != SocketType::Dgram { return Err(Errno::EOPNOTSUPP); } - let (dst_ip, dst_port) = parse_sockaddr_in(addr)?; - udp_send_datagram(proc, _host, sock_idx, buf, dst_ip, dst_port) + match sock.domain { + SocketDomain::Inet => { + let (dst_ip, dst_port) = parse_sockaddr_in(addr)?; + udp_send_datagram(proc, _host, sock_idx, buf, dst_ip, dst_port) + } + SocketDomain::Inet6 => { + let (dst_ip, dst_port) = parse_sockaddr_in6(addr)?; + udp6_send_datagram(proc, sock_idx, buf, dst_ip, dst_port) + } + SocketDomain::Unix => { + if addr.len() < 3 { + return Err(Errno::EINVAL); + } + let path_bytes = &addr[2..]; + let resolved = if path_bytes.first().copied() == Some(0) { + if path_bytes.len() < 2 { + return Err(Errno::EINVAL); + } + path_bytes.to_vec() + } else { + let path_end = path_bytes + .iter() + .position(|&b| b == 0) + .unwrap_or(path_bytes.len()); + if path_end == 0 { + return Err(Errno::EINVAL); + } + crate::path::resolve_path(&path_bytes[..path_end], &proc.cwd) + }; + let registry = unsafe { crate::unix_socket::global_unix_socket_registry() }; + let peer_idx = registry + .lookup(&resolved) + .map(|entry| entry.sock_idx) + .ok_or(Errno::ECONNREFUSED)?; + unix_dgram_send_to_sock(proc, sock_idx, peer_idx, buf) + } + } } /// Receive a message from a socket with sender address. @@ -7622,7 +8628,10 @@ pub fn sys_recvfrom( let n = sys_recv(proc, _host, fd, buf, _flags)?; return Ok((n, 0)); } - if sock.domain != SocketDomain::Inet { + if !matches!( + sock.domain, + SocketDomain::Inet | SocketDomain::Inet6 | SocketDomain::Unix + ) { return Err(Errno::EOPNOTSUPP); } if sock.shut_rd { @@ -7631,10 +8640,15 @@ pub fn sys_recvfrom( let sock = proc.sockets.get_mut(sock_idx).ok_or(Errno::EBADF)?; let datagram_idx = sock.dgram_queue.iter().position(|d| { - if sock.state == SocketState::Connected { - d.src_addr == sock.peer_addr && d.src_port == sock.peer_port - } else { - true + if sock.state != SocketState::Connected { + return true; + } + match sock.domain { + SocketDomain::Inet => d.src_addr == sock.peer_addr && d.src_port == sock.peer_port, + SocketDomain::Inet6 => { + d.src_addr6 == sock.peer_addr6 && d.src_port == sock.peer_port + } + SocketDomain::Unix => sock.peer_idx.map_or(true, |peer| d.src_sock_idx == Some(peer)), } }); if datagram_idx.is_none() { @@ -7656,10 +8670,22 @@ pub fn sys_recvfrom( let copy_len = buf.len().min(datagram.data.len()); buf[..copy_len].copy_from_slice(&datagram.data[..copy_len]); - // Write sender sockaddr_in to addr_buf + // Write sender sockaddr to addr_buf let mut addr_written = 0; if !addr_buf.is_empty() { - addr_written = write_sockaddr_in(addr_buf, datagram.src_addr, datagram.src_port); + addr_written = match sock.domain { + SocketDomain::Inet => write_sockaddr_in(addr_buf, datagram.src_addr, datagram.src_port), + SocketDomain::Inet6 => { + write_sockaddr_in6(addr_buf, datagram.src_addr6, datagram.src_port) + } + SocketDomain::Unix => { + if addr_buf.len() >= 2 { + addr_buf[0] = 1; + addr_buf[1] = 0; + } + 2 + } + }; } Ok((copy_len, addr_written)) @@ -13650,6 +14676,31 @@ mod tests { assert_eq!(val, AF_UNIX); } + #[test] + fn test_getsockopt_bindtodevice_defaults_to_empty_name() { + let mut proc = Process::new(1); + let mut host = MockHostIO::new(); + use wasm_posix_shared::socket::*; + let fd = sys_socket(&mut proc, &mut host, AF_INET, SOCK_DGRAM, 0).unwrap(); + + let name = sys_getsockopt_bindtodevice(&proc, fd).unwrap(); + + assert!(name.is_empty()); + } + + #[test] + fn test_getsockopt_bindtodevice_returns_explicit_binding() { + let mut proc = Process::new(1); + let mut host = MockHostIO::new(); + use wasm_posix_shared::socket::*; + let fd = sys_socket(&mut proc, &mut host, AF_INET, SOCK_DGRAM, 0).unwrap(); + + sys_setsockopt_bindtodevice(&mut proc, fd, b"lo\0").unwrap(); + let name = sys_getsockopt_bindtodevice(&proc, fd).unwrap(); + + assert_eq!(name, b"lo"); + } + #[test] fn test_getsockopt_not_socket() { let mut proc = Process::new(1); @@ -13675,9 +14726,9 @@ mod tests { let mut host = MockHostIO::new(); use wasm_posix_shared::socket::*; let fd = sys_socket(&mut proc, &mut host, AF_UNIX, SOCK_STREAM, 0).unwrap(); - assert!(sys_setsockopt(&mut proc, fd, SOL_SOCKET, SO_LINGER, 5).is_ok()); - let val = sys_getsockopt(&mut proc, fd, SOL_SOCKET, SO_LINGER).unwrap(); - assert_eq!(val, 5); + sys_setsockopt_linger(&mut proc, fd, 1, 5).unwrap(); + let val = sys_getsockopt_linger(&proc, fd).unwrap(); + assert_eq!(val, (1, 5)); } #[test] @@ -16731,6 +17782,63 @@ mod tests { registry.cleanup_process(9022); } + #[test] + fn test_abstract_unix_socket_bind_is_not_filesystem_backed() { + let _lock = UNIX_REGISTRY_LOCK.lock().unwrap(); + let mut proc = Process::new(1); + let mut host = MockHostIO::new(); + proc.pid = 9023; + let fd = sys_socket(&mut proc, &mut host, 1, 1, 0).unwrap(); + let mut addr = [0u8; 16]; + addr[0] = 1; // AF_UNIX + addr[2] = 0; // Linux abstract namespace marker + addr[3..8].copy_from_slice(b"abs01"); + let addrlen = 8; + + sys_bind(&mut proc, &mut host, fd, &addr[..addrlen]).unwrap(); + assert_eq!( + host.next_handle, 100, + "abstract AF_UNIX bind must not create a host filesystem inode", + ); + + let mut name = [0u8; 32]; + let n = sys_getsockname(&proc, fd, &mut name).unwrap(); + assert_eq!(n, addrlen); + assert_eq!(&name[..addrlen], &addr[..addrlen]); + + unsafe { crate::unix_socket::global_unix_socket_registry() }.cleanup_process(9023); + } + + #[test] + fn test_abstract_unix_socket_same_process_connect() { + let _lock = UNIX_REGISTRY_LOCK.lock().unwrap(); + let mut proc = Process::new(1); + let mut host = MockHostIO::new(); + proc.pid = 9024; + let server_fd = sys_socket(&mut proc, &mut host, 1, 1, 0).unwrap(); + let mut addr = [0u8; 16]; + addr[0] = 1; // AF_UNIX + addr[2] = 0; // abstract namespace + addr[3..9].copy_from_slice(b"abs002"); + let addrlen = 9; + sys_bind(&mut proc, &mut host, server_fd, &addr[..addrlen]).unwrap(); + sys_listen(&mut proc, &mut host, server_fd, 5).unwrap(); + + let client_fd = sys_socket(&mut proc, &mut host, 1, 1, 0).unwrap(); + sys_connect(&mut proc, &mut host, client_fd, &addr[..addrlen]).unwrap(); + let accepted_fd = sys_accept(&mut proc, &mut host, server_fd).unwrap(); + + assert_eq!( + sys_write(&mut proc, &mut host, client_fd, b"abstract").unwrap(), + 8, + ); + let mut buf = [0u8; 16]; + let n = sys_read(&mut proc, &mut host, accepted_fd, &mut buf).unwrap(); + assert_eq!(&buf[..n], b"abstract"); + + unsafe { crate::unix_socket::global_unix_socket_registry() }.cleanup_process(9024); + } + #[test] fn test_clock_getres_per_process_cpu_clock() { let proc = Process::new(1); @@ -17572,6 +18680,41 @@ mod tests { assert_eq!(&buf2[..n2], b"reply"); } + #[test] + fn test_accept_preserves_listener_nonblock_status() { + let mut proc = Process::new(1); + let mut host = MockHostIO::new(); + use wasm_posix_shared::fcntl_cmd::F_SETFL; + use wasm_posix_shared::socket::*; + + let server_fd = sys_socket(&mut proc, &mut host, AF_INET, SOCK_STREAM, 0).unwrap(); + sys_fcntl(&mut proc, server_fd, F_SETFL, O_NONBLOCK).unwrap(); + let mut addr = [0u8; 16]; + addr[0] = 2; + addr[2] = 0x23; + addr[3] = 0x8d; // port 9101 + sys_bind(&mut proc, &mut host, server_fd, &addr).unwrap(); + sys_listen(&mut proc, &mut host, server_fd, 5).unwrap(); + + let client_fd = sys_socket(&mut proc, &mut host, AF_INET, SOCK_STREAM, 0).unwrap(); + let mut connect_addr = [0u8; 16]; + connect_addr[0] = 2; + connect_addr[2] = 0x23; + connect_addr[3] = 0x8d; + connect_addr[4] = 127; + connect_addr[7] = 1; + sys_connect(&mut proc, &mut host, client_fd, &connect_addr).unwrap(); + + let accepted_fd = sys_accept(&mut proc, &mut host, server_fd).unwrap(); + let entry = proc.fd_table.get(accepted_fd).unwrap(); + let ofd = proc.ofd_table.get(entry.ofd_ref.0).unwrap(); + assert_ne!( + ofd.status_flags & O_NONBLOCK, + 0, + "accepted socket should preserve listener nonblocking status", + ); + } + #[test] fn test_udp_loopback() { let mut proc = Process::new(1); @@ -17619,6 +18762,327 @@ mod tests { assert_eq!(from_addr[0], 2); // AF_INET } + #[test] + fn test_ipv4_multicast_loopback_membership_and_filters() { + let mut proc = Process::new(1); + let mut host = MockHostIO::new(); + use wasm_posix_shared::socket::*; + + let recv_fd = sys_socket(&mut proc, &mut host, AF_INET, SOCK_DGRAM, 0).unwrap(); + let mut bind_any = [0u8; 16]; + bind_any[0] = 2; + sys_bind(&mut proc, &mut host, recv_fd, &bind_any).unwrap(); + let mut gsa_buf = [0u8; 16]; + sys_getsockname(&proc, recv_fd, &mut gsa_buf).unwrap(); + let port = u16::from_be_bytes([gsa_buf[2], gsa_buf[3]]); + + let send_fd = sys_socket(&mut proc, &mut host, AF_INET, SOCK_DGRAM, 0).unwrap(); + let mut bind_loopback = [0u8; 16]; + bind_loopback[0] = 2; + bind_loopback[4] = 127; + bind_loopback[7] = 1; + sys_bind(&mut proc, &mut host, send_fd, &bind_loopback).unwrap(); + + let group = [224, 0, 0, 23]; + let lo = [127, 0, 0, 1]; + sys_setsockopt_ipv4_multicast( + &mut proc, + recv_fd, + MCAST_JOIN_GROUP, + group, + lo, + None, + ) + .unwrap(); + + let mut dest = [0u8; 16]; + dest[0] = 2; + dest[2..4].copy_from_slice(&port.to_be_bytes()); + dest[4..8].copy_from_slice(&group); + assert_eq!( + sys_sendto(&mut proc, &mut host, send_fd, b"initial", 0, &dest).unwrap(), + 7 + ); + + let mut buf = [0u8; 32]; + let mut from = [0u8; 16]; + let (n, _) = + sys_recvfrom(&mut proc, &mut host, recv_fd, &mut buf, 0, &mut from).unwrap(); + assert_eq!(&buf[..n], b"initial"); + assert_eq!(&from[4..8], &lo); + + sys_setsockopt_ipv4_multicast( + &mut proc, + recv_fd, + MCAST_BLOCK_SOURCE, + group, + lo, + Some(lo), + ) + .unwrap(); + assert_eq!( + sys_sendto(&mut proc, &mut host, send_fd, b"blocked", 0, &dest).unwrap(), + 7 + ); + let recv_idx = { + let entry = proc.fd_table.get(recv_fd).unwrap(); + let ofd = proc.ofd_table.get(entry.ofd_ref.0).unwrap(); + (-(ofd.host_handle + 1)) as usize + }; + assert!( + proc.sockets + .get(recv_idx) + .unwrap() + .dgram_queue + .is_empty(), + "blocked multicast source should not enqueue a datagram" + ); + + sys_setsockopt_ipv4_multicast( + &mut proc, + recv_fd, + MCAST_UNBLOCK_SOURCE, + group, + lo, + Some(lo), + ) + .unwrap(); + assert_eq!( + sys_sendto(&mut proc, &mut host, send_fd, b"unblocked", 0, &dest).unwrap(), + 9 + ); + let (n, _) = + sys_recvfrom(&mut proc, &mut host, recv_fd, &mut buf, 0, &mut from).unwrap(); + assert_eq!(&buf[..n], b"unblocked"); + + sys_setsockopt_ipv4_multicast( + &mut proc, + recv_fd, + MCAST_LEAVE_GROUP, + group, + lo, + None, + ) + .unwrap(); + assert_eq!( + sys_sendto(&mut proc, &mut host, send_fd, b"ignored", 0, &dest).unwrap(), + 7 + ); + assert!( + proc.sockets + .get(recv_idx) + .unwrap() + .dgram_queue + .is_empty(), + "leaving a multicast group should stop group delivery" + ); + } + + #[test] + fn test_ipv4_multicast_source_membership_and_interface_match() { + let mut proc = Process::new(1); + let mut host = MockHostIO::new(); + use wasm_posix_shared::socket::*; + + let recv_fd = sys_socket(&mut proc, &mut host, AF_INET, SOCK_DGRAM, 0).unwrap(); + let mut bind_any = [0u8; 16]; + bind_any[0] = 2; + bind_any[2] = 0x45; + bind_any[3] = 0x67; + sys_bind(&mut proc, &mut host, recv_fd, &bind_any).unwrap(); + + let loop_sender = sys_socket(&mut proc, &mut host, AF_INET, SOCK_DGRAM, 0).unwrap(); + let mut bind_loopback = [0u8; 16]; + bind_loopback[0] = 2; + bind_loopback[4] = 127; + bind_loopback[7] = 1; + sys_bind(&mut proc, &mut host, loop_sender, &bind_loopback).unwrap(); + + let default_sender = sys_socket(&mut proc, &mut host, AF_INET, SOCK_DGRAM, 0).unwrap(); + let group = [224, 0, 0, 23]; + let lo = [127, 0, 0, 1]; + sys_setsockopt_ipv4_multicast( + &mut proc, + recv_fd, + MCAST_JOIN_SOURCE_GROUP, + group, + lo, + Some(lo), + ) + .unwrap(); + + let mut dest = [0u8; 16]; + dest[0] = 2; + dest[2] = 0x45; + dest[3] = 0x67; + dest[4..8].copy_from_slice(&group); + + // The receiver joined the group on loopback only. An unbound sender + // uses the default interface and must not satisfy that membership, + // but the UDP multicast send itself still succeeds. + assert_eq!( + sys_sendto( + &mut proc, + &mut host, + default_sender, + b"default-iface", + 0, + &dest + ) + .unwrap(), + 13 + ); + let recv_idx = { + let entry = proc.fd_table.get(recv_fd).unwrap(); + let ofd = proc.ofd_table.get(entry.ofd_ref.0).unwrap(); + (-(ofd.host_handle + 1)) as usize + }; + assert!( + proc.sockets + .get(recv_idx) + .unwrap() + .dgram_queue + .is_empty() + ); + + assert_eq!( + sys_sendto(&mut proc, &mut host, loop_sender, b"source-match", 0, &dest).unwrap(), + 12 + ); + let mut buf = [0u8; 32]; + let mut from = [0u8; 16]; + let (n, _) = + sys_recvfrom(&mut proc, &mut host, recv_fd, &mut buf, 0, &mut from).unwrap(); + assert_eq!(&buf[..n], b"source-match"); + + sys_setsockopt_ipv4_multicast( + &mut proc, + recv_fd, + MCAST_LEAVE_SOURCE_GROUP, + group, + lo, + Some(lo), + ) + .unwrap(); + assert_eq!( + sys_sendto(&mut proc, &mut host, loop_sender, b"left-source", 0, &dest).unwrap(), + 11 + ); + assert!( + proc.sockets + .get(recv_idx) + .unwrap() + .dgram_queue + .is_empty() + ); + } + + #[test] + fn test_udp_connect_to_inaddr_any_routes_to_loopback() { + let mut proc = Process::new(1); + let mut host = MockHostIO::new(); + use wasm_posix_shared::socket::*; + + let fd = sys_socket(&mut proc, &mut host, AF_INET, SOCK_DGRAM, 0).unwrap(); + let mut addr = [0u8; 16]; + addr[0] = 2; // AF_INET + addr[3] = 80; // port 80, network byte order + + sys_connect(&mut proc, &mut host, fd, &addr).unwrap(); + + let entry = proc.fd_table.get(fd).unwrap(); + let ofd = proc.ofd_table.get(entry.ofd_ref.0).unwrap(); + let sock_idx = (-(ofd.host_handle + 1)) as usize; + let sock = proc.sockets.get(sock_idx).unwrap(); + assert_eq!(sock.peer_addr, [127, 0, 0, 1]); + assert_eq!(sock.bind_addr, [127, 0, 0, 1]); + } + + #[test] + fn test_inet6_udp_loopback_datagram_delivery() { + let mut proc = Process::new(1); + let mut host = MockHostIO::new(); + use wasm_posix_shared::socket::*; + + let server_fd = sys_socket(&mut proc, &mut host, AF_INET6, SOCK_DGRAM, 0).unwrap(); + let mut server_addr = [0u8; 28]; + server_addr[0] = 10; // AF_INET6 + server_addr[2] = 0x56; + server_addr[3] = 0xce; // port 22222 + server_addr[23] = 1; // ::1 + sys_bind(&mut proc, &mut host, server_fd, &server_addr).unwrap(); + + let client_fd = sys_socket(&mut proc, &mut host, AF_INET6, SOCK_DGRAM, 0).unwrap(); + sys_connect(&mut proc, &mut host, client_fd, &server_addr).unwrap(); + assert_eq!(sys_write(&mut proc, &mut host, client_fd, b"udp6").unwrap(), 4); + + let mut buf = [0u8; 8]; + let n = sys_read(&mut proc, &mut host, server_fd, &mut buf).unwrap(); + assert_eq!(&buf[..n], b"udp6"); + } + + #[test] + fn test_unix_dgram_loopback_datagram_delivery() { + let _lock = UNIX_REGISTRY_LOCK.lock().unwrap(); + let mut proc = Process::new(1); + let mut host = MockHostIO::new(); + use wasm_posix_shared::socket::*; + + proc.pid = 9025; + let server_fd = sys_socket(&mut proc, &mut host, AF_UNIX, SOCK_DGRAM, 0).unwrap(); + let mut addr = [0u8; 64]; + addr[0] = 1; // AF_UNIX + let path = b"/tmp/udg-loop.sock"; + addr[2..2 + path.len()].copy_from_slice(path); + let addrlen = 2 + path.len() + 1; + sys_bind(&mut proc, &mut host, server_fd, &addr[..addrlen]).unwrap(); + + let client_fd = sys_socket(&mut proc, &mut host, AF_UNIX, SOCK_DGRAM, 0).unwrap(); + sys_connect(&mut proc, &mut host, client_fd, &addr[..addrlen]).unwrap(); + assert_eq!( + sys_write(&mut proc, &mut host, client_fd, b"unix-dgram").unwrap(), + 10, + ); + + let mut buf = [0u8; 16]; + let n = sys_read(&mut proc, &mut host, server_fd, &mut buf).unwrap(); + assert_eq!(&buf[..n], b"unix-dgram"); + + unsafe { crate::unix_socket::global_unix_socket_registry() }.cleanup_process(9025); + } + + #[test] + fn test_inet6_loopback_bind_getsockname_and_connect_refused() { + let mut proc = Process::new(1); + let mut host = MockHostIO::new(); + use wasm_posix_shared::socket::*; + + let fd = sys_socket(&mut proc, &mut host, AF_INET6, SOCK_STREAM, 0).unwrap(); + let mut addr = [0u8; 28]; + addr[0] = 10; // AF_INET6 + addr[2] = 0x05; + addr[3] = 0x39; // port 1337 + addr[23] = 1; // ::1 + + sys_bind(&mut proc, &mut host, fd, &addr).unwrap(); + sys_listen(&mut proc, &mut host, fd, 5).unwrap(); + + let mut name = [0u8; 28]; + let n = sys_getsockname(&proc, fd, &mut name).unwrap(); + assert_eq!(n, 28); + assert_eq!(&name[..28], &addr); + + let client_fd = sys_socket(&mut proc, &mut host, AF_INET6, SOCK_STREAM, 0).unwrap(); + let mut refused = [0u8; 28]; + refused[0] = 10; + refused[3] = 1; // ::1:1, no listener + refused[23] = 1; + assert_eq!( + sys_connect(&mut proc, &mut host, client_fd, &refused).unwrap_err(), + Errno::ECONNREFUSED, + ); + } + // ── Threading tests ────────────────────────────────────────────── #[test] diff --git a/crates/kernel/src/wasm_api.rs b/crates/kernel/src/wasm_api.rs index 40d7c2473..ff7c0d18a 100644 --- a/crates/kernel/src/wasm_api.rs +++ b/crates/kernel/src/wasm_api.rs @@ -7079,8 +7079,7 @@ pub extern "C" fn kernel_accept4( addrlen_buf.copy_from_slice(&2u32.to_le_bytes()); } } - _ => { - // Existing AF_INET logic + crate::socket::SocketDomain::Inet => { let mut sa = [0u8; 16]; sa[0] = 2; // AF_INET let port_be = sock.peer_port.to_be_bytes(); @@ -7096,6 +7095,19 @@ pub extern "C" fn kernel_accept4( addr_buf.copy_from_slice(&sa[..n]); addrlen_buf.copy_from_slice(&16u32.to_le_bytes()); } + crate::socket::SocketDomain::Inet6 => { + let mut sa = [0u8; 28]; + sa[0] = 10; // AF_INET6 + let port_be = sock.peer_port.to_be_bytes(); + sa[2] = port_be[0]; + sa[3] = port_be[1]; + sa[8..24].copy_from_slice(&sock.peer_addr6); + let n = max_len.min(28); + let addr_buf = + unsafe { slice::from_raw_parts_mut(addr_ptr, n) }; + addr_buf.copy_from_slice(&sa[..n]); + addrlen_buf.copy_from_slice(&28u32.to_le_bytes()); + } } } } @@ -7300,14 +7312,21 @@ fn cross_process_unix_connect(proc: &mut Process, fd: i32, addr: &[u8]) -> Resul return Err(Errno::EINVAL); } let path_bytes = &addr[2..]; - let path_end = path_bytes - .iter() - .position(|&b| b == 0) - .unwrap_or(path_bytes.len()); - if path_end == 0 { - return Err(Errno::ECONNREFUSED); - } - let resolved = crate::path::resolve_path(&path_bytes[..path_end], &proc.cwd); + let resolved = if path_bytes.first().copied() == Some(0) { + if path_bytes.len() < 2 { + return Err(Errno::ECONNREFUSED); + } + path_bytes.to_vec() + } else { + let path_end = path_bytes + .iter() + .position(|&b| b == 0) + .unwrap_or(path_bytes.len()); + if path_end == 0 { + return Err(Errno::ECONNREFUSED); + } + crate::path::resolve_path(&path_bytes[..path_end], &proc.cwd) + }; // Look up in global registry let registry = unsafe { crate::unix_socket::global_unix_socket_registry() }; @@ -7455,6 +7474,102 @@ pub extern "C" fn kernel_getsockopt( return result; } + // Handle struct linger (SO_LINGER). + if level == SOL_SOCKET && optname == SO_LINGER { + let result = match syscalls::sys_getsockopt_linger(proc, fd) { + Ok((l_onoff, l_linger)) => { + let avail = if !optlen_ptr.is_null() { + unsafe { *optlen_ptr as usize } + } else { + 8 + }; + let write_len = avail.min(8); + let mut tmp = [0u8; 8]; + tmp[0..4].copy_from_slice(&l_onoff.to_le_bytes()); + tmp[4..8].copy_from_slice(&l_linger.to_le_bytes()); + if write_len > 0 { + let out = unsafe { slice::from_raw_parts_mut(optval_ptr, write_len) }; + out.copy_from_slice(&tmp[..write_len]); + } + if !optlen_ptr.is_null() { + unsafe { + *optlen_ptr = 8; + } + } + 0 + } + Err(e) => -(e as i32), + }; + let mut host = WasmHostIO; + deliver_pending_signals(proc, &mut host); + return result; + } + + // Handle string-valued SO_BINDTODEVICE. + if level == SOL_SOCKET && optname == SO_BINDTODEVICE { + let result = match syscalls::sys_getsockopt_bindtodevice(proc, fd) { + Ok(name) => { + let needed = if name.is_empty() { 0 } else { name.len() + 1 }; + let avail = if !optlen_ptr.is_null() { + unsafe { *optlen_ptr as usize } + } else { + needed + }; + let write_len = avail.min(needed); + if write_len > 0 { + let out = unsafe { slice::from_raw_parts_mut(optval_ptr, write_len) }; + let name_copy = write_len.min(name.len()); + out[..name_copy].copy_from_slice(&name[..name_copy]); + if write_len > name_copy { + out[name_copy] = 0; + } + } + if !optlen_ptr.is_null() { + unsafe { + *optlen_ptr = needed as u32; + } + } + 0 + } + Err(e) => -(e as i32), + }; + let mut host = WasmHostIO; + deliver_pending_signals(proc, &mut host); + return result; + } + + // Handle string-valued TCP_CONGESTION. + if level == IPPROTO_TCP && optname == TCP_CONGESTION { + let result = match syscalls::sys_getsockopt_tcp_congestion(proc, fd) { + Ok(name) => { + let avail = if !optlen_ptr.is_null() { + unsafe { *optlen_ptr as usize } + } else { + name.len() + 1 + }; + let write_len = avail.min(name.len() + 1); + if write_len > 0 { + let out = unsafe { slice::from_raw_parts_mut(optval_ptr, write_len) }; + let name_copy = write_len.min(name.len()); + out[..name_copy].copy_from_slice(&name[..name_copy]); + if write_len > name_copy { + out[name_copy] = 0; + } + } + if !optlen_ptr.is_null() { + unsafe { + *optlen_ptr = (name.len() + 1) as u32; + } + } + 0 + } + Err(e) => -(e as i32), + }; + let mut host = WasmHostIO; + deliver_pending_signals(proc, &mut host); + return result; + } + // Handle struct timeval options (SO_RCVTIMEO, SO_SNDTIMEO) if level == SOL_SOCKET && (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO) { let result = match syscalls::sys_getsockopt_timeout(proc, fd, optname) { @@ -7538,6 +7653,167 @@ pub extern "C" fn kernel_setsockopt( return result; } + // Handle struct linger (SO_LINGER). + if level == SOL_SOCKET && optname == SO_LINGER { + if optval_ptr.is_null() || optlen < 8 { + let mut host = WasmHostIO; + deliver_pending_signals(proc, &mut host); + return -(Errno::EINVAL as i32); + } + let buf = unsafe { slice::from_raw_parts(optval_ptr, 8) }; + let l_onoff = i32::from_le_bytes(buf[0..4].try_into().unwrap()); + let l_linger = i32::from_le_bytes(buf[4..8].try_into().unwrap()); + let result = match syscalls::sys_setsockopt_linger(proc, fd, l_onoff, l_linger) { + Ok(()) => 0, + Err(e) => -(e as i32), + }; + let mut host = WasmHostIO; + deliver_pending_signals(proc, &mut host); + return result; + } + + // Handle string-valued SO_BINDTODEVICE. + if level == SOL_SOCKET && optname == SO_BINDTODEVICE { + if optval_ptr.is_null() { + let mut host = WasmHostIO; + deliver_pending_signals(proc, &mut host); + return -(Errno::EFAULT as i32); + } + let buf = unsafe { slice::from_raw_parts(optval_ptr, optlen as usize) }; + let result = match syscalls::sys_setsockopt_bindtodevice(proc, fd, buf) { + Ok(()) => 0, + Err(e) => -(e as i32), + }; + let mut host = WasmHostIO; + deliver_pending_signals(proc, &mut host); + return result; + } + + // Handle string-valued TCP_CONGESTION. + if level == IPPROTO_TCP && optname == TCP_CONGESTION { + if optval_ptr.is_null() { + let mut host = WasmHostIO; + deliver_pending_signals(proc, &mut host); + return -(Errno::EFAULT as i32); + } + let buf = unsafe { slice::from_raw_parts(optval_ptr, optlen as usize) }; + let result = match syscalls::sys_setsockopt_tcp_congestion(proc, fd, buf) { + Ok(()) => 0, + Err(e) => -(e as i32), + }; + let mut host = WasmHostIO; + deliver_pending_signals(proc, &mut host); + return result; + } + + // Handle IPv4 multicast membership/source-filter options. These options + // carry struct ip_mreq/ip_mreq_source/group_req/group_source_req payloads, + // not plain integers, so the wrapper must parse the guest ABI buffer. + if level == IPPROTO_IP + && matches!( + optname, + IP_ADD_MEMBERSHIP + | IP_DROP_MEMBERSHIP + | IP_BLOCK_SOURCE + | IP_UNBLOCK_SOURCE + | IP_ADD_SOURCE_MEMBERSHIP + | IP_DROP_SOURCE_MEMBERSHIP + | MCAST_JOIN_GROUP + | MCAST_LEAVE_GROUP + | MCAST_BLOCK_SOURCE + | MCAST_UNBLOCK_SOURCE + | MCAST_JOIN_SOURCE_GROUP + | MCAST_LEAVE_SOURCE_GROUP + ) + { + if optval_ptr.is_null() { + let mut host = WasmHostIO; + deliver_pending_signals(proc, &mut host); + return -(Errno::EFAULT as i32); + } + let buf = unsafe { slice::from_raw_parts(optval_ptr, optlen as usize) }; + + let parse_sockaddr_in_at = |offset: usize| -> Result<[u8; 4], Errno> { + if buf.len() < offset + 8 { + return Err(Errno::EINVAL); + } + let family = u16::from_le_bytes([buf[offset], buf[offset + 1]]); + if family as u32 != AF_INET { + return Err(Errno::EAFNOSUPPORT); + } + Ok([ + buf[offset + 4], + buf[offset + 5], + buf[offset + 6], + buf[offset + 7], + ]) + }; + + let parse_ifindex_at = |offset: usize| -> Result<[u8; 4], Errno> { + if buf.len() < offset + 4 { + return Err(Errno::EINVAL); + } + let ifindex = u32::from_le_bytes(buf[offset..offset + 4].try_into().unwrap()); + syscalls::ipv4_multicast_interface_from_index(ifindex) + }; + + let parsed = (|| -> Result<([u8; 4], [u8; 4], Option<[u8; 4]>), Errno> { + match optname { + IP_ADD_MEMBERSHIP | IP_DROP_MEMBERSHIP => { + if buf.len() < 8 { + Err(Errno::EINVAL) + } else { + Ok(( + [buf[0], buf[1], buf[2], buf[3]], + [buf[4], buf[5], buf[6], buf[7]], + None, + )) + } + } + IP_BLOCK_SOURCE | IP_UNBLOCK_SOURCE | IP_ADD_SOURCE_MEMBERSHIP + | IP_DROP_SOURCE_MEMBERSHIP => { + if buf.len() < 12 { + Err(Errno::EINVAL) + } else { + Ok(( + [buf[0], buf[1], buf[2], buf[3]], + [buf[4], buf[5], buf[6], buf[7]], + Some([buf[8], buf[9], buf[10], buf[11]]), + )) + } + } + MCAST_JOIN_GROUP | MCAST_LEAVE_GROUP => Ok(( + parse_sockaddr_in_at(4)?, + parse_ifindex_at(0)?, + None, + )), + MCAST_BLOCK_SOURCE | MCAST_UNBLOCK_SOURCE | MCAST_JOIN_SOURCE_GROUP + | MCAST_LEAVE_SOURCE_GROUP => Ok(( + parse_sockaddr_in_at(4)?, + parse_ifindex_at(0)?, + Some(parse_sockaddr_in_at(132)?), + )), + _ => unreachable!(), + } + })(); + + let result = match parsed.and_then(|(group, interface_addr, source)| { + syscalls::sys_setsockopt_ipv4_multicast( + proc, + fd, + optname, + group, + interface_addr, + source, + ) + }) { + Ok(()) => 0, + Err(e) => -(e as i32), + }; + let mut host = WasmHostIO; + deliver_pending_signals(proc, &mut host); + return result; + } // Read first 4 bytes as u32 value (covers most int-valued options) let optval = if !optval_ptr.is_null() && optlen >= 4 { let buf = unsafe { slice::from_raw_parts(optval_ptr, 4) }; diff --git a/crates/shared/src/lib.rs b/crates/shared/src/lib.rs index 544a50c24..950098fd4 100644 --- a/crates/shared/src/lib.rs +++ b/crates/shared/src/lib.rs @@ -358,6 +358,7 @@ pub enum Errno { EBUSY = 16, EEXIST = 17, EXDEV = 18, + ENODEV = 19, ENOTDIR = 20, EISDIR = 21, EINVAL = 22, @@ -422,6 +423,7 @@ impl Errno { 16 => Some(Errno::EBUSY), 17 => Some(Errno::EEXIST), 18 => Some(Errno::EXDEV), + 19 => Some(Errno::ENODEV), 20 => Some(Errno::ENOTDIR), 21 => Some(Errno::EISDIR), 22 => Some(Errno::EINVAL), @@ -560,6 +562,7 @@ pub mod socket { pub const SOCK_CLOEXEC: u32 = 0o2000000; pub const SOL_SOCKET: u32 = 1; pub const SCM_RIGHTS: u32 = 1; + pub const SCM_CREDENTIALS: u32 = 2; pub const SO_REUSEADDR: u32 = 2; pub const SO_ERROR: u32 = 4; pub const SO_KEEPALIVE: u32 = 9; @@ -568,15 +571,45 @@ pub mod socket { pub const SO_TYPE: u32 = 3; pub const SO_DOMAIN: u32 = 39; pub const SO_ACCEPTCONN: u32 = 30; + pub const SO_REUSEPORT: u32 = 15; + pub const SO_PASSCRED: u32 = 16; pub const SHUT_RD: u32 = 0; pub const SHUT_WR: u32 = 1; pub const SHUT_RDWR: u32 = 2; pub const SO_BROADCAST: u32 = 6; pub const SO_LINGER: u32 = 13; + pub const SO_BINDTODEVICE: u32 = 25; + pub const SO_ATTACH_REUSEPORT_CBPF: u32 = 51; + pub const SO_ZEROCOPY: u32 = 60; // time64 values used by musl on wasm32 (where __LONG_MAX == 0x7fffffff) pub const SO_RCVTIMEO: u32 = 66; pub const SO_SNDTIMEO: u32 = 67; + pub const IPPROTO_IP: u32 = 0; pub const IPPROTO_TCP: u32 = 6; + pub const IPPROTO_UDP: u32 = 17; + pub const IPPROTO_IPV6: u32 = 41; + pub const IP_TOS: u32 = 1; + pub const IP_PKTINFO: u32 = 8; + pub const IP_MTU_DISCOVER: u32 = 10; + pub const IP_MTU: u32 = 14; + pub const IP_MULTICAST_IF: u32 = 32; + pub const IP_MULTICAST_TTL: u32 = 33; + pub const IP_MULTICAST_LOOP: u32 = 34; + pub const IP_ADD_MEMBERSHIP: u32 = 35; + pub const IP_DROP_MEMBERSHIP: u32 = 36; + pub const IP_UNBLOCK_SOURCE: u32 = 37; + pub const IP_BLOCK_SOURCE: u32 = 38; + pub const IP_ADD_SOURCE_MEMBERSHIP: u32 = 39; + pub const IP_DROP_SOURCE_MEMBERSHIP: u32 = 40; + pub const IP_MSFILTER: u32 = 41; + pub const MCAST_JOIN_GROUP: u32 = 42; + pub const MCAST_BLOCK_SOURCE: u32 = 43; + pub const MCAST_UNBLOCK_SOURCE: u32 = 44; + pub const MCAST_LEAVE_GROUP: u32 = 45; + pub const MCAST_JOIN_SOURCE_GROUP: u32 = 46; + pub const MCAST_LEAVE_SOURCE_GROUP: u32 = 47; + pub const MCAST_MSFILTER: u32 = 48; + pub const IP_MULTICAST_ALL: u32 = 49; pub const TCP_NODELAY: u32 = 1; pub const TCP_CORK: u32 = 3; pub const TCP_KEEPIDLE: u32 = 4; @@ -585,7 +618,17 @@ pub mod socket { pub const TCP_DEFER_ACCEPT: u32 = 9; pub const TCP_INFO: u32 = 11; pub const TCP_QUICKACK: u32 = 12; + pub const TCP_CONGESTION: u32 = 13; pub const TCP_USER_TIMEOUT: u32 = 18; + pub const IPV6_MULTICAST_IF: u32 = 17; + pub const IPV6_MULTICAST_HOPS: u32 = 18; + pub const IPV6_MULTICAST_LOOP: u32 = 19; + pub const IPV6_V6ONLY: u32 = 26; + pub const IPV6_RECVPKTINFO: u32 = 49; + pub const IPV6_PKTINFO: u32 = 50; + pub const IPV6_DONTFRAG: u32 = 62; + pub const IPV6_RECVTCLASS: u32 = 66; + pub const IPV6_TCLASS: u32 = 67; pub const MSG_OOB: u32 = 1; pub const MSG_PEEK: u32 = 2; pub const MSG_DONTWAIT: u32 = 64; diff --git a/host/src/networking/fetch-backend.ts b/host/src/networking/fetch-backend.ts index 717c3850d..07bd51881 100644 --- a/host/src/networking/fetch-backend.ts +++ b/host/src/networking/fetch-backend.ts @@ -6,6 +6,62 @@ export class EagainError extends Error { constructor() { super("EAGAIN"); } } +function nameNotFoundError(hostname: string): Error & { errno: number } { + return Object.assign(new Error(`ENOENT: ${hostname}`), { errno: 2 }); +} + +/** + * Browser networking uses synthetic addresses for DNS names because the host + * fetch API performs the real lookup later. Numeric IPv4 names are different: + * getaddrinfo must treat them as address literals, and malformed numeric + * literals must fail instead of being reinterpreted as DNS names. + */ +export function parseNumericIpv4Hostname(hostname: string): Uint8Array | null { + if (!/^\d+(?:\.\d+)+$/.test(hostname)) return null; + + const parts = hostname.split("."); + if (parts.length > 4) throw nameNotFoundError(hostname); + + const widths = parts.length === 2 + ? [8n, 24n] + : parts.length === 3 + ? [8n, 8n, 16n] + : [8n, 8n, 8n, 8n]; + + let packed = 0n; + for (let i = 0; i < parts.length; i++) { + const value = BigInt(parts[i]); + const width = widths[i]; + if (value > ((1n << width) - 1n)) { + throw nameNotFoundError(hostname); + } + packed = (packed << width) | value; + } + + return new Uint8Array([ + Number((packed >> 24n) & 0xffn), + Number((packed >> 16n) & 0xffn), + Number((packed >> 8n) & 0xffn), + Number(packed & 0xffn), + ]); +} + +export function validateSyntheticDnsHostname(hostname: string): void { + // The browser backends synthesize addresses for DNS names and let fetch() + // perform the real network lookup later. Do not synthesize addresses for + // names that a POSIX resolver would reject before DNS, such as empty labels + // or labels longer than the DNS 63-octet limit. + const absoluteName = hostname.endsWith(".") ? hostname.slice(0, -1) : hostname; + if (absoluteName.length === 0 || absoluteName.length > 253) { + throw nameNotFoundError(hostname); + } + for (const label of absoluteName.split(".")) { + if (label.length === 0 || label.length > 63) { + throw nameNotFoundError(hostname); + } + } +} + const POLLIN = 0x0001; const POLLOUT = 0x0004; const POLLERR = 0x0008; @@ -227,6 +283,10 @@ export class FetchNetworkBackend implements NetworkIO { } getaddrinfo(hostname: string): Uint8Array { + const literalIp = parseNumericIpv4Hostname(hostname); + if (literalIp) return literalIp; + validateSyntheticDnsHostname(hostname); + // In the browser, return a synthetic IP. // The actual connection uses the Host header, not this IP. // Use a deterministic hash to generate a fake IP in the 10.x.x.x range. diff --git a/host/src/networking/tls-network-backend.ts b/host/src/networking/tls-network-backend.ts index 93ab029de..9a8745ba0 100644 --- a/host/src/networking/tls-network-backend.ts +++ b/host/src/networking/tls-network-backend.ts @@ -14,7 +14,7 @@ */ import type { NetworkIO } from "../types"; -import { EagainError } from "./fetch-backend"; +import { EagainError, parseNumericIpv4Hostname, validateSyntheticDnsHostname } from "./fetch-backend"; import { TLS_1_2_Connection } from "../../../packages/registry/openssl/src/tls/1_2/connection"; import { generateCertificate, @@ -208,6 +208,10 @@ export class TlsNetworkBackend implements NetworkIO { // ---- NetworkIO implementation ---- getaddrinfo(hostname: string): Uint8Array { + const literalIp = parseNumericIpv4Hostname(hostname); + if (literalIp) return literalIp; + validateSyntheticDnsHostname(hostname); + const ip = this.syntheticIp(hostname); const ipStr = this.ipKey(ip); this.hostnameMap.set(ipStr, hostname); diff --git a/host/test/fetch-backend.test.ts b/host/test/fetch-backend.test.ts index 09745f1c3..f149e5f84 100644 --- a/host/test/fetch-backend.test.ts +++ b/host/test/fetch-backend.test.ts @@ -65,6 +65,24 @@ describe("FetchNetworkBackend", () => { const addr2 = backend.getaddrinfo("example.com"); expect(addr1).toEqual(addr2); }); + + it("returns numeric IPv4 literals without synthesizing a DNS address", () => { + const backend = new FetchNetworkBackend(); + expect(Array.from(backend.getaddrinfo("127.0.0.1"))).toEqual([127, 0, 0, 1]); + expect(Array.from(backend.getaddrinfo("1.2.3"))).toEqual([1, 2, 0, 3]); + }); + + it("rejects malformed numeric IPv4 literals", () => { + const backend = new FetchNetworkBackend(); + expect(() => backend.getaddrinfo("9999.9999.9999.9999")).toThrow("ENOENT"); + expect(() => backend.getaddrinfo("1.2.3.256")).toThrow("ENOENT"); + }); + + it("rejects syntactically invalid DNS names", () => { + const backend = new FetchNetworkBackend(); + expect(() => backend.getaddrinfo(".toto.toto.toto")).toThrow("ENOENT"); + expect(() => backend.getaddrinfo(`www.${"x".repeat(100)}.com`)).toThrow("ENOENT"); + }); }); describe("connect", () => { @@ -133,6 +151,26 @@ describe("FetchNetworkBackend", () => { }); describe("TlsNetworkBackend HTTP proxy path", () => { + describe("getaddrinfo", () => { + it("returns numeric IPv4 literals without synthesizing a DNS address", () => { + const backend = new TlsNetworkBackend(); + expect(Array.from(backend.getaddrinfo("127.0.0.1"))).toEqual([127, 0, 0, 1]); + expect(Array.from(backend.getaddrinfo("1.2.3"))).toEqual([1, 2, 0, 3]); + }); + + it("rejects malformed numeric IPv4 literals", () => { + const backend = new TlsNetworkBackend(); + expect(() => backend.getaddrinfo("9999.9999.9999.9999")).toThrow("ENOENT"); + expect(() => backend.getaddrinfo("1.2.3.256")).toThrow("ENOENT"); + }); + + it("rejects syntactically invalid DNS names", () => { + const backend = new TlsNetworkBackend(); + expect(() => backend.getaddrinfo(".toto.toto.toto")).toThrow("ENOENT"); + expect(() => backend.getaddrinfo(`www.${"x".repeat(100)}.com`)).toThrow("ENOENT"); + }); + }); + it("resets response state for keep-alive HTTP requests", async () => { let resolveSecond!: (response: Response) => void; const secondResponse = new Promise((resolve) => {