From e0c698172826de10adffdc1877ae5accf7f05967 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20Kr=C3=BCger?= Date: Tue, 14 Apr 2026 10:07:56 +0200 Subject: [PATCH] fix(proto): Eventually stop resending PATH_CHALLENGEs on revalidations Also: Renames `PathTimer::PathOpenFailed` into `PathTimer::AbandonFromValidation`, now that it serves two purposes. --- noq-proto/src/connection/mod.rs | 57 ++++++++++++++++++++++++------- noq-proto/src/connection/paths.rs | 11 +++++- noq-proto/src/connection/qlog.rs | 4 ++- noq-proto/src/connection/timer.rs | 15 ++++---- 4 files changed, 67 insertions(+), 20 deletions(-) diff --git a/noq-proto/src/connection/mod.rs b/noq-proto/src/connection/mod.rs index 9975617f9..c06b6eefe 100644 --- a/noq-proto/src/connection/mod.rs +++ b/noq-proto/src/connection/mod.rs @@ -725,7 +725,7 @@ impl Connection { // these frames or their timing PathTimer::PathValidationFailed | PathTimer::PathChallengeLost - | PathTimer::PathOpenFailed => false, + | PathTimer::AbandonFromValidation => false, // These timers deal with the lifetime of the path. Now that the path is abandoned, // these are not relevant. PathTimer::PathKeepAlive | PathTimer::PathIdle => false, @@ -1949,8 +1949,6 @@ impl Connection { path_id: PathId, ) -> Option { let (prev_cid, prev_path) = self.paths.get_mut(&path_id)?.prev.as_mut()?; - // TODO (matheus23): We could use !prev_path.is_validating() here instead to - // (possibly) also re-send challenges when they get lost. if !prev_path.pending_on_path_challenge { return None; }; @@ -2445,7 +2443,7 @@ impl Connection { trace!("path challenge deemed lost"); path.data.pending_on_path_challenge = true; } - PathTimer::PathOpenFailed => { + PathTimer::AbandonFromValidation => { let Some(path) = self.paths.get_mut(&path_id) else { continue; }; @@ -4864,7 +4862,7 @@ impl Connection { self.timers .stop(Timer::PerPath(path_id, PathValidationFailed), qlog.clone()); self.timers - .stop(Timer::PerPath(path_id, PathOpenFailed), qlog.clone()); + .stop(Timer::PerPath(path_id, AbandonFromValidation), qlog.clone()); let next_challenge = path .data @@ -5922,13 +5920,26 @@ impl Connection { builder.write_frame(challenge, stats); builder.require_padding(); let pto = self.ack_frequency.max_ack_delay_for_pto() + path.rtt.pto_base(); - if path.open_status == paths::OpenStatus::Pending { - path.open_status = paths::OpenStatus::Sent; - self.timers.set( - Timer::PerPath(path_id, PathTimer::PathOpenFailed), - now + 3 * pto, - self.qlog.with_time(now), - ); + match path.open_status { + paths::OpenStatus::Sent | paths::OpenStatus::Informed => {} + paths::OpenStatus::Pending => { + path.open_status = paths::OpenStatus::Sent; + self.timers.set( + Timer::PerPath(path_id, PathTimer::AbandonFromValidation), + now + 3 * pto, + self.qlog.with_time(now), + ); + } + // The path open status was informed before, we just want to revalidate again. + // For that, we want to make sure we set the PathOpenFailed timer again. + paths::OpenStatus::Revalidating => { + path.open_status = paths::OpenStatus::Informed; + self.timers.set( + Timer::PerPath(path_id, PathTimer::AbandonFromValidation), + now + 3 * pto, + self.qlog.with_time(now), + ); + } } self.timers.set( @@ -6959,7 +6970,29 @@ impl Connection { if path_was_known { trace!(%path_id, %remote, "nat traversal: path existed for remote, revalidating"); if let Some(path) = self.paths.get_mut(&path_id) { + use paths::OpenStatus::*; + path.data.pending_on_path_challenge = true; + path.data.open_status = match path.data.open_status { + // If we just opened the path and have never sent a `PATH_CHALLENGE` yet, + // then we need to keep it at pending, to ensure that + // 1. The PathOpenFailed timer for stopping the PathChallengeLost retries will be set. + // 2. When validation eventually succeeds, then we inform the application layer about this path opening. + Pending => Pending, + // If we had already sent a path challenge in the past, but it hasn't been validated yet (and also not + // failed via the PathOpenFailed timer yet), then we need to go back to pending, to ensure we properly + // re-arm the `PathOpenFailed` timer again. + Sent => Pending, + // If we're already revalidating this path, but haven't sent a `PATH_CHALLENGE` yet, then we just keep + // that state. + Revalidating => Revalidating, + // If we've informed the application layer about the path opening in the past, but we now re-send + // PATH_CHALLENGEs for validation, then using this we ensure: + // 1. The PathOpenFailed timer for stopping the PathChallengeLost retries will be set. + // 2. When validation eventually succeeds, we *don't* inform the application layer about the path + // opening again. + Informed => Revalidating, + } } } Ok(Some((path_id, remote))) diff --git a/noq-proto/src/connection/paths.rs b/noq-proto/src/connection/paths.rs index 54534d3e2..73cb9c761 100644 --- a/noq-proto/src/connection/paths.rs +++ b/noq-proto/src/connection/paths.rs @@ -512,7 +512,10 @@ impl PathData { let prev_status = std::mem::replace(&mut self.open_status, OpenStatus::Informed); OnPathResponseReceived::OnPath { - was_open: prev_status == OpenStatus::Informed, + was_open: matches!( + prev_status, + OpenStatus::Informed | OpenStatus::Revalidating + ), } } // Response to an on-path PathChallenge that does not validate this path @@ -678,6 +681,12 @@ pub(super) enum OpenStatus { Sent, /// The application has been informed of this path. Informed, + /// The path was [`Self::Informed`] before, but we want to trigger path validation again. + /// + /// This is used to ensure we properly stop trying to re-send path challenges eventually, without + /// having to switch to [`Self::Pending`] when re-validating, as that would trigger another + /// application-level event about the path opening once validation succeeds. + Revalidating, } /// Congestion metrics as described in [`recovery_metrics_updated`]. diff --git a/noq-proto/src/connection/qlog.rs b/noq-proto/src/connection/qlog.rs index 3a6a96d1f..5c618aca0 100644 --- a/noq-proto/src/connection/qlog.rs +++ b/noq-proto/src/connection/qlog.rs @@ -322,7 +322,9 @@ impl QlogSink { PathTimer::PathIdle => Some(TimerType::custom("path_idle")), PathTimer::PathValidationFailed => Some(QlogTimerType::PathValidation.into()), PathTimer::PathChallengeLost => Some(TimerType::custom("path_challenge_lost")), - PathTimer::PathOpenFailed => Some(TimerType::custom("path_open")), + PathTimer::AbandonFromValidation => { + Some(TimerType::custom("abandon_from_validation")) + } PathTimer::PathKeepAlive => Some(TimerType::custom("path_keep_alive")), PathTimer::Pacing => Some(TimerType::custom("pacing")), PathTimer::MaxAckDelay => Some(QlogTimerType::Ack.into()), diff --git a/noq-proto/src/connection/timer.rs b/noq-proto/src/connection/timer.rs index 9b7ceb56f..4b2a47ee9 100644 --- a/noq-proto/src/connection/timer.rs +++ b/noq-proto/src/connection/timer.rs @@ -60,12 +60,15 @@ pub(crate) enum PathTimer { PathValidationFailed = 2, /// When to resend an on-path path challenge deemed lost PathChallengeLost = 3, - /// When we give up opening a path. + /// When to abandon a path due to failed validation. /// - /// When opening a path we validate it according to RFC9000 §8.2 as required by the - /// multipath spec. If validation fails we will abandon the path again. This timer fires - /// when we want to give up on this path validation of opening the path. - PathOpenFailed = 4, + /// There are two situations in which we give up a path from validation. + /// 1. When opening a path we validate it according to RFC9000 §8.2 as required by the + /// multipath spec. This timer is armed to time-bound that validation. + /// 2. When validating an already opened multipath path for various reasons in which we + /// expect the path to either work or not work and want to respond correspondingly in + /// a timely manner. + AbandonFromValidation = 4, /// When to send a `PING` frame to keep the path alive PathKeepAlive = 5, /// When pacing will allow us to send a packet @@ -82,7 +85,7 @@ impl PathTimer { Self::PathIdle, Self::PathValidationFailed, Self::PathChallengeLost, - Self::PathOpenFailed, + Self::AbandonFromValidation, Self::PathKeepAlive, Self::Pacing, Self::MaxAckDelay,