From aa1f788cf34a9c1483316e7d1b9bfa2e0076b374 Mon Sep 17 00:00:00 2001 From: Frando Date: Mon, 29 Jun 2026 11:44:36 +0200 Subject: [PATCH 1/7] tests(iroh): add regression test for transient windows errors --- iroh/src/endpoint.rs | 56 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/iroh/src/endpoint.rs b/iroh/src/endpoint.rs index ec89514ae18..bd9ace6e474 100644 --- a/iroh/src/endpoint.rs +++ b/iroh/src/endpoint.rs @@ -2922,6 +2922,62 @@ mod tests { Ok(()) } + /// Regression test: Don't fail connections with dead relays on Windows. + /// + /// A single client connecting to a single server over a usable direct path + /// must succeed even when both are configured with an unreachable home relay + /// (`https://127.0.0.1:1`, nothing listening). The dead relay should be irrelevant: + /// the direct path works and the connection comes up in milliseconds. + /// + /// This was broken on Windows because QaD sends over the same socket to the dead + /// relay, and the socket would return recv errors on the next recv to report ICMP + /// errors for the previous send. We now skip over these errors, implemented in + /// https://github.com/n0-computer/net-tools/pull/166, so this no longer fails. + #[tokio::test] + async fn endpoint_unreachable_relay_direct_connect_succeeds() -> Result { + // An unreachable relay Nothing listens on 127.0.0.1:1, and its QADv4 probe target + // at 127.0.0.1:7842 is closed too, so probing it draws the ICMP port-unreachable + // that is emitted from the Windows socket on recv. + let dead_relay: RelayUrl = "https://127.0.0.1:1".parse().expect("valid relay url"); + + let bind_endpoint = async || { + Endpoint::builder(presets::Minimal) + // Use the broken relay to trigger the ICMP errors from the QaD sends. + .relay_mode(RelayMode::Custom(RelayMap::from_iter([dead_relay.clone()]))) + .ca_tls_config(CaTlsConfig::insecure_skip_verify()) + .alpns(vec![TEST_ALPN.to_vec()]) + // Bind on IPv4 only to ensure a single socket to not have spurious polls. + .bind_addr((Ipv4Addr::LOCALHOST, 0)) + .expect("valid addr") + .bind() + .await + }; + + let server = bind_endpoint().await?; + let server_addr = server.addr().with_relay_url(dead_relay.clone()); + let client = bind_endpoint().await?; + + // Server accepts the incoming connection and holds it open until the test ends. + let accept = tokio::spawn(async move { + let incoming = server.accept().await.anyerr()?; + let conn = incoming.await.anyerr()?; + conn.closed().await; + server.close().await; + n0_error::Ok(()) + }); + + // The connect must complete over the direct loopback path despite the dead relay. + let _conn = tokio::time::timeout( + Duration::from_secs(10), + client.connect(server_addr, TEST_ALPN), + ) + .await + .expect("connection should succeed")?; + client.close().await; + accept.await.anyerr()??; + Ok(()) + } + #[tokio::test] #[traced_test] async fn test_direct_addresses_no_qad_relay() -> Result { From b207f8129b2df7efe4dd12a533f30f034cf62550 Mon Sep 17 00:00:00 2001 From: Frando Date: Tue, 30 Jun 2026 13:29:35 +0200 Subject: [PATCH 2/7] test: use a bind-then-close port for the dead relay QAD probe Instead of relying on the hard-coded default QUIC port (7842) being unused, claim an ephemeral UDP port and close it. This is a more robust way to get a closed port for the QADv4 probe to draw the ICMP port-unreachable error the Windows recv bug needs. --- iroh/src/endpoint.rs | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/iroh/src/endpoint.rs b/iroh/src/endpoint.rs index 091f3003be9..6bf802e19a4 100644 --- a/iroh/src/endpoint.rs +++ b/iroh/src/endpoint.rs @@ -2004,7 +2004,7 @@ mod tests { use iroh_base::{EndpointAddr, EndpointId, RelayUrl, SecretKey, TransportAddr}; use iroh_dns::endpoint_info::UserData; - use iroh_relay::{RelayConfig, server::Access, tls::CaTlsConfig}; + use iroh_relay::{RelayConfig, RelayQuicConfig, server::Access, tls::CaTlsConfig}; use n0_error::{AnyError as Error, Result, StdResultExt}; use n0_future::{BufferedStreamExt, StreamExt, future::now_or_never, stream, time}; use n0_tracing_test::traced_test; @@ -2937,15 +2937,28 @@ mod tests { /// https://github.com/n0-computer/net-tools/pull/166, so this no longer fails. #[tokio::test] async fn endpoint_unreachable_relay_direct_connect_succeeds() -> Result { - // An unreachable relay Nothing listens on 127.0.0.1:1, and its QADv4 probe target - // at 127.0.0.1:7842 is closed too, so probing it draws the ICMP port-unreachable - // that is emitted from the Windows socket on recv. + // Nothing listens on the relay url (127.0.0.1:1), so the relay is unreachable. let dead_relay: RelayUrl = "https://127.0.0.1:1".parse().expect("valid relay url"); + // The QADv4 probe must hit a closed UDP port so it draws the ICMP port-unreachable + // that the Windows socket reports back on its next recv. Bind a socket to claim an + // ephemeral port, read its address, then close it: the port is now free, so nothing + // answers the probe. There's nothing stopping the kernel from re-using the port + // right away, but on most machines that's unlikely. This beats hoping that a + // hard-coded port happens to be unused. + let closed_quic_port = { + let sock = std::net::UdpSocket::bind((Ipv4Addr::LOCALHOST, 0)).expect("bind"); + sock.local_addr().expect("local addr").port() + }; + let dead_relay_config = + RelayConfig::new(dead_relay.clone(), Some(RelayQuicConfig::new(closed_quic_port))); + let bind_endpoint = async || { Endpoint::builder(presets::Minimal) // Use the broken relay to trigger the ICMP errors from the QaD sends. - .relay_mode(RelayMode::Custom(RelayMap::from_iter([dead_relay.clone()]))) + .relay_mode(RelayMode::Custom(RelayMap::from_iter([ + dead_relay_config.clone(), + ]))) .ca_tls_config(CaTlsConfig::insecure_skip_verify()) .alpns(vec![TEST_ALPN.to_vec()]) // Bind on IPv4 only to ensure a single socket to not have spurious polls. From ea3a2c9f3036d534be9c1dcbc1f0f07d5d9436c1 Mon Sep 17 00:00:00 2001 From: Frando Date: Tue, 30 Jun 2026 13:48:24 +0200 Subject: [PATCH 3/7] test: bind-then-close port for the dead relay url too Derive the relay url port from a bind-then-close ephemeral port as well, so neither the relay url nor the QADv4 probe relies on a hard-coded port happening to be unused. --- iroh/src/endpoint.rs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/iroh/src/endpoint.rs b/iroh/src/endpoint.rs index 6bf802e19a4..033acd07a31 100644 --- a/iroh/src/endpoint.rs +++ b/iroh/src/endpoint.rs @@ -2937,21 +2937,21 @@ mod tests { /// https://github.com/n0-computer/net-tools/pull/166, so this no longer fails. #[tokio::test] async fn endpoint_unreachable_relay_direct_connect_succeeds() -> Result { - // Nothing listens on the relay url (127.0.0.1:1), so the relay is unreachable. - let dead_relay: RelayUrl = "https://127.0.0.1:1".parse().expect("valid relay url"); - - // The QADv4 probe must hit a closed UDP port so it draws the ICMP port-unreachable - // that the Windows socket reports back on its next recv. Bind a socket to claim an - // ephemeral port, read its address, then close it: the port is now free, so nothing - // answers the probe. There's nothing stopping the kernel from re-using the port - // right away, but on most machines that's unlikely. This beats hoping that a - // hard-coded port happens to be unused. - let closed_quic_port = { + // Both the relay url and its QADv4 probe must hit closed ports: the relay must be + // unreachable, and the probe must draw the ICMP port-unreachable that the Windows + // socket reports back on its next recv. Bind a socket to claim an ephemeral port, + // read its address, then close it: the port is now free, so nothing answers. + // There's nothing stopping the kernel from re-using a port right away, but on most + // machines that's unlikely. This beats hoping that a hard-coded port is unused. + let closed_port = || { let sock = std::net::UdpSocket::bind((Ipv4Addr::LOCALHOST, 0)).expect("bind"); sock.local_addr().expect("local addr").port() }; + let dead_relay: RelayUrl = format!("https://127.0.0.1:{}", closed_port()) + .parse() + .expect("valid relay url"); let dead_relay_config = - RelayConfig::new(dead_relay.clone(), Some(RelayQuicConfig::new(closed_quic_port))); + RelayConfig::new(dead_relay.clone(), Some(RelayQuicConfig::new(closed_port()))); let bind_endpoint = async || { Endpoint::builder(presets::Minimal) From b87b85a8b7d3cea06372bcc00699e1a74f72f4c0 Mon Sep 17 00:00:00 2001 From: Frando Date: Tue, 30 Jun 2026 14:09:56 +0200 Subject: [PATCH 4/7] test: claim the dead relay url port with a TCP listener The relay url is dialed over TCP (HTTPS), so a freed UDP port says nothing about it. Claim the url port with a TCP listener and the QADv4 probe port with a UDP socket. --- iroh/src/endpoint.rs | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/iroh/src/endpoint.rs b/iroh/src/endpoint.rs index 033acd07a31..4f849dd85b9 100644 --- a/iroh/src/endpoint.rs +++ b/iroh/src/endpoint.rs @@ -2937,21 +2937,24 @@ mod tests { /// https://github.com/n0-computer/net-tools/pull/166, so this no longer fails. #[tokio::test] async fn endpoint_unreachable_relay_direct_connect_succeeds() -> Result { - // Both the relay url and its QADv4 probe must hit closed ports: the relay must be - // unreachable, and the probe must draw the ICMP port-unreachable that the Windows - // socket reports back on its next recv. Bind a socket to claim an ephemeral port, - // read its address, then close it: the port is now free, so nothing answers. - // There's nothing stopping the kernel from re-using a port right away, but on most - // machines that's unlikely. This beats hoping that a hard-coded port is unused. - let closed_port = || { + // The relay url and its QADv4 probe must both hit closed ports, so the relay is + // unreachable and the probe draws the ICMP port-unreachable the Windows socket + // reports on its next recv. Claim an ephemeral port, then close it: it's now free, + // so nothing answers. The url is dialed over TCP (HTTPS), the probe over UDP, so + // claim each with the matching socket type. + let closed_tcp_port = { + let sock = std::net::TcpListener::bind((Ipv4Addr::LOCALHOST, 0)).expect("bind"); + sock.local_addr().expect("local addr").port() + }; + let closed_udp_port = { let sock = std::net::UdpSocket::bind((Ipv4Addr::LOCALHOST, 0)).expect("bind"); sock.local_addr().expect("local addr").port() }; - let dead_relay: RelayUrl = format!("https://127.0.0.1:{}", closed_port()) + let dead_relay: RelayUrl = format!("https://127.0.0.1:{closed_tcp_port}") .parse() .expect("valid relay url"); let dead_relay_config = - RelayConfig::new(dead_relay.clone(), Some(RelayQuicConfig::new(closed_port()))); + RelayConfig::new(dead_relay.clone(), Some(RelayQuicConfig::new(closed_udp_port))); let bind_endpoint = async || { Endpoint::builder(presets::Minimal) From 48644ff77da71dcc7c9fe19dcab1454fac447200 Mon Sep 17 00:00:00 2001 From: Frando Date: Tue, 30 Jun 2026 14:25:22 +0200 Subject: [PATCH 5/7] test: note kernel port re-use caveat in comment --- iroh/src/endpoint.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/iroh/src/endpoint.rs b/iroh/src/endpoint.rs index 4f849dd85b9..f0223f17fb1 100644 --- a/iroh/src/endpoint.rs +++ b/iroh/src/endpoint.rs @@ -2940,8 +2940,9 @@ mod tests { // The relay url and its QADv4 probe must both hit closed ports, so the relay is // unreachable and the probe draws the ICMP port-unreachable the Windows socket // reports on its next recv. Claim an ephemeral port, then close it: it's now free, - // so nothing answers. The url is dialed over TCP (HTTPS), the probe over UDP, so - // claim each with the matching socket type. + // so nothing answers. There's nothing stopping the kernel from re-using a port + // right away, but on most machines that's unlikely. The url is dialed over TCP + // (HTTPS), the probe over UDP, so claim each with the matching socket type. let closed_tcp_port = { let sock = std::net::TcpListener::bind((Ipv4Addr::LOCALHOST, 0)).expect("bind"); sock.local_addr().expect("local addr").port() From bb59b78ba8402b693ed390fd8e1384879a733027 Mon Sep 17 00:00:00 2001 From: Frando Date: Wed, 1 Jul 2026 11:18:22 +0200 Subject: [PATCH 6/7] chore: fmt --- iroh/src/endpoint.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/iroh/src/endpoint.rs b/iroh/src/endpoint.rs index f0223f17fb1..13d708a242a 100644 --- a/iroh/src/endpoint.rs +++ b/iroh/src/endpoint.rs @@ -2954,14 +2954,16 @@ mod tests { let dead_relay: RelayUrl = format!("https://127.0.0.1:{closed_tcp_port}") .parse() .expect("valid relay url"); - let dead_relay_config = - RelayConfig::new(dead_relay.clone(), Some(RelayQuicConfig::new(closed_udp_port))); + let dead_relay_config = RelayConfig::new( + dead_relay.clone(), + Some(RelayQuicConfig::new(closed_udp_port)), + ); let bind_endpoint = async || { Endpoint::builder(presets::Minimal) // Use the broken relay to trigger the ICMP errors from the QaD sends. .relay_mode(RelayMode::Custom(RelayMap::from_iter([ - dead_relay_config.clone(), + dead_relay_config.clone() ]))) .ca_tls_config(CaTlsConfig::insecure_skip_verify()) .alpns(vec![TEST_ALPN.to_vec()]) From 2be920e58f7c31e9a50ba7932321615d84d47a42 Mon Sep 17 00:00:00 2001 From: Frando Date: Wed, 1 Jul 2026 11:19:42 +0200 Subject: [PATCH 7/7] chore: codespell unrelated --- iroh/src/endpoint.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iroh/src/endpoint.rs b/iroh/src/endpoint.rs index 13d708a242a..0a40fe546d6 100644 --- a/iroh/src/endpoint.rs +++ b/iroh/src/endpoint.rs @@ -2940,7 +2940,7 @@ mod tests { // The relay url and its QADv4 probe must both hit closed ports, so the relay is // unreachable and the probe draws the ICMP port-unreachable the Windows socket // reports on its next recv. Claim an ephemeral port, then close it: it's now free, - // so nothing answers. There's nothing stopping the kernel from re-using a port + // so nothing answers. There's nothing stopping the kernel from reusing a port // right away, but on most machines that's unlikely. The url is dialed over TCP // (HTTPS), the probe over UDP, so claim each with the matching socket type. let closed_tcp_port = {