From c7ac0b929c9aa61fada4e522ba0abffc84e5a090 Mon Sep 17 00:00:00 2001 From: Shiju Date: Fri, 12 Jun 2026 03:11:51 +0530 Subject: [PATCH 1/3] fix(cli): verify forward listener before success Wait for a connectable local forward listener before reporting foreground forwarding success. Fail background forwarding when the forked SSH process cannot be tracked, probe the listener before writing the PID file, and terminate the tracked SSH process if the listener never opens. Document that forwarded URLs are printed only after listener health is proven. Signed-off-by: Shiju --- crates/openshell-cli/src/ssh.rs | 238 +++++++++++++++--- .../sandbox_create_lifecycle_integration.rs | 119 ++++++++- docs/sandboxes/manage-sandboxes.mdx | 2 + 3 files changed, 326 insertions(+), 33 deletions(-) diff --git a/crates/openshell-cli/src/ssh.rs b/crates/openshell-cli/src/ssh.rs index f5986a1d8..bdfadb5df 100644 --- a/crates/openshell-cli/src/ssh.rs +++ b/crates/openshell-cli/src/ssh.rs @@ -9,8 +9,8 @@ use miette::{IntoDiagnostic, Result, WrapErr}; use nix::sys::signal::{SaFlags, SigAction, SigHandler, SigSet, Signal, sigaction}; use openshell_core::ObjectId; use openshell_core::forward::{ - build_proxy_command, find_ssh_forward_pid, format_gateway_url, resolve_ssh_gateway, - shell_escape, validate_ssh_session_response, write_forward_pid, + ForwardSpec, build_proxy_command, find_ssh_forward_pid, format_gateway_url, + resolve_ssh_gateway, shell_escape, validate_ssh_session_response, write_forward_pid, }; use openshell_core::proto::{ CreateSshSessionRequest, GetSandboxRequest, SshRelayTarget, TcpForwardFrame, TcpForwardInit, @@ -25,10 +25,19 @@ use std::path::{Path, PathBuf}; use std::process::{Command, Stdio}; use std::time::Duration; use tokio::io::{AsyncReadExt, AsyncWriteExt}; -use tokio::process::Command as TokioCommand; +use tokio::net::TcpStream; +use tokio::process::{Child, Command as TokioCommand}; use tokio_stream::wrappers::ReceiverStream; -const FOREGROUND_FORWARD_STARTUP_GRACE_PERIOD: Duration = Duration::from_secs(2); +/// Time budget for a forward to become healthy after `ssh` starts: covers both +/// backgrounded-PID discovery and listener readiness, in foreground and +/// background. +const FORWARD_STARTUP_GRACE_PERIOD: Duration = Duration::from_secs(2); +/// Delay between listener/PID probes within the grace period. +const FORWARD_LISTENER_PROBE_INTERVAL: Duration = Duration::from_millis(50); +/// Per-attempt connect timeout, so one hung probe cannot consume the whole +/// grace period. +const FORWARD_LISTENER_CONNECT_TIMEOUT: Duration = Duration::from_millis(200); #[derive(Clone, Copy, Debug)] pub enum Editor { @@ -320,7 +329,7 @@ pub async fn sandbox_connect_editor( pub async fn sandbox_forward( server: &str, name: &str, - spec: &openshell_core::forward::ForwardSpec, + spec: &ForwardSpec, background: bool, tls: &TlsOptions, ) -> Result<()> { @@ -349,44 +358,167 @@ pub async fn sandbox_forward( let port = spec.port; - let status = if background { - command.status().await.into_diagnostic()? - } else { - let mut child = command.spawn().into_diagnostic()?; - if let Ok(status) = - tokio::time::timeout(FOREGROUND_FORWARD_STARTUP_GRACE_PERIOD, child.wait()).await + if background { + let status = command.status().await.into_diagnostic()?; + if !status.success() { + return Err(miette::miette!("ssh exited with status {status}")); + } + + // Background forwards must be both reachable and tracked. Without a PID + // file, `openshell forward stop/list` cannot manage the tunnel, so this + // path fails closed instead of printing a URL for an orphaned process. + let pid = wait_for_ssh_forward_pid(&session.sandbox_id, port) + .await + .ok_or_else(|| { + miette::miette!( + "could not discover backgrounded SSH process for sandbox {name} port {port}; \ + refusing to report an untracked forward" + ) + })?; + + if let Err(err) = wait_for_forward_listener(spec, FORWARD_STARTUP_GRACE_PERIOD) + .await + .wrap_err("ssh process started but local forward listener was not reachable") { - status.into_diagnostic()? - } else { - eprintln!("{}", foreground_forward_started_message(name, spec)); - child.wait().await.into_diagnostic()? + terminate_forward_pid(pid); + return Err(err); } + + write_forward_pid(name, port, pid, &session.sandbox_id, &spec.bind_addr)?; + return Ok(()); + } + + let status = { + let mut child = command.spawn().into_diagnostic()?; + if let Err(err) = wait_for_foreground_forward_start(&mut child, spec).await { + let _ = child.kill().await; + return Err(err); + } + eprintln!("{}", foreground_forward_started_message(name, spec)); + child.wait().await.into_diagnostic()? }; if !status.success() { return Err(miette::miette!("ssh exited with status {status}")); } - if background { - // SSH has forked — find its PID and record it. - if let Some(pid) = find_ssh_forward_pid(&session.sandbox_id, port) { - write_forward_pid(name, port, pid, &session.sandbox_id, &spec.bind_addr)?; - } else { - eprintln!( - "{} Could not discover backgrounded SSH process; \ - forward may be running but is not tracked", - "!".yellow(), - ); + Ok(()) +} + +/// Wait for the local listener, racing the probe against the `ssh` child +/// exiting. An early exit (e.g. `ExitOnForwardFailure=yes` tearing down the +/// session) means forwarding never came up, so it errors instead of waiting +/// out the grace period. +async fn wait_for_foreground_forward_start(child: &mut Child, spec: &ForwardSpec) -> Result<()> { + let listener = wait_for_forward_listener(spec, FORWARD_STARTUP_GRACE_PERIOD); + tokio::pin!(listener); + tokio::select! { + result = &mut listener => result, + status = child.wait() => { + let status = status.into_diagnostic()?; + if status.success() { + Err(miette::miette!( + "ssh exited before local forward listener opened on {}:{}", + forward_probe_host(spec), + spec.port, + )) + } else { + Err(miette::miette!( + "ssh exited with status {status} before local forward listener opened on {}:{}", + forward_probe_host(spec), + spec.port, + )) + } } } +} - Ok(()) +/// Poll for the backgrounded (`ssh -f`) forward's PID. `-f` forks after auth, +/// so the PID is unknown when `command.status()` returns and must be discovered +/// afterward. Returns `None` if it never appears within the grace period. +async fn wait_for_ssh_forward_pid(sandbox_id: &str, port: u16) -> Option { + let deadline = tokio::time::Instant::now() + FORWARD_STARTUP_GRACE_PERIOD; + loop { + if let Some(pid) = find_ssh_forward_pid(sandbox_id, port) { + return Some(pid); + } + if tokio::time::Instant::now() >= deadline { + return None; + } + tokio::time::sleep(FORWARD_LISTENER_PROBE_INTERVAL).await; + } } -fn foreground_forward_started_message( - name: &str, - spec: &openshell_core::forward::ForwardSpec, -) -> String { +/// Poll the local endpoint until a connect succeeds or `wait_for` elapses. The +/// last probe error is folded into the timeout diagnostic, so a failure reports +/// why the listener never opened, not just that it timed out. +async fn wait_for_forward_listener(spec: &ForwardSpec, wait_for: Duration) -> Result<()> { + let deadline = tokio::time::Instant::now() + wait_for; + loop { + let probe_error = match probe_forward_listener(spec).await { + Ok(()) => return Ok(()), + Err(err) => err, + }; + + if tokio::time::Instant::now() >= deadline { + return Err(miette::miette!( + "local forward listener did not open on {}:{} within {}ms: last probe failed with {probe_error}", + forward_probe_host(spec), + spec.port, + wait_for.as_millis(), + )); + } + + tokio::time::sleep(FORWARD_LISTENER_PROBE_INTERVAL).await; + } +} + +/// One bounded TCP connect to the forward endpoint. Returns a `String` error +/// rather than a `miette` diagnostic to stay cheap in the poll loop. The +/// connection only proves reachability and is dropped at once; SSH forwards +/// this throwaway connect to the sandbox-side target. +async fn probe_forward_listener(spec: &ForwardSpec) -> std::result::Result<(), String> { + match tokio::time::timeout( + FORWARD_LISTENER_CONNECT_TIMEOUT, + TcpStream::connect((forward_probe_host(spec), spec.port)), + ) + .await + { + Ok(Ok(stream)) => { + drop(stream); + Ok(()) + } + Ok(Err(err)) => Err(err.to_string()), + Err(_) => Err(format!( + "connect timed out after {}ms", + FORWARD_LISTENER_CONNECT_TIMEOUT.as_millis() + )), + } +} + +/// Resolve the bind address to a connectable host. Wildcard binds (`0.0.0.0`, +/// `::`, empty) are "any-address" listeners, not valid connect targets, so they +/// map to the matching loopback. Specific addresses are probed as-is. +fn forward_probe_host(spec: &ForwardSpec) -> &str { + match spec.bind_addr.as_str() { + "" | "0.0.0.0" => "127.0.0.1", + "::" => "::1", + host => host, + } +} + +/// Best-effort kill of a forward whose listener never came up. Failures are +/// ignored: the process may already be exiting, and the caller surfaces the +/// original listener error regardless. +fn terminate_forward_pid(pid: u32) { + let _ = Command::new("kill") + .arg(pid.to_string()) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status(); +} + +fn foreground_forward_started_message(name: &str, spec: &ForwardSpec) -> String { format!( "{} Forwarding port {} to sandbox {name}\n Access at: {}\n Press Ctrl+C to stop\n {}", "✓".green().bold(), @@ -1590,7 +1722,7 @@ mod tests { #[test] fn foreground_forward_started_message_includes_port_and_stop_hint() { - let spec = openshell_core::forward::ForwardSpec::new(8080); + let spec = ForwardSpec::new(8080); let message = foreground_forward_started_message("demo", &spec); assert!(message.contains("Forwarding port 8080 to sandbox demo")); assert!(message.contains("Access at: http://127.0.0.1:8080/")); @@ -1603,12 +1735,54 @@ mod tests { #[test] fn foreground_forward_started_message_custom_bind_addr() { - let spec = openshell_core::forward::ForwardSpec::parse("0.0.0.0:3000").unwrap(); + let spec = ForwardSpec::parse("0.0.0.0:3000").unwrap(); let message = foreground_forward_started_message("demo", &spec); assert!(message.contains("Forwarding port 3000 to sandbox demo")); assert!(message.contains("Access at: http://localhost:3000/")); } + #[test] + fn forward_probe_host_uses_connectable_loopback_for_wildcard_binds() { + let ipv4 = ForwardSpec::parse("0.0.0.0:3000").unwrap(); + let ipv6 = ForwardSpec::parse(":::3000").unwrap(); + let loopback = ForwardSpec::new(3000); + + assert_eq!(forward_probe_host(&ipv4), "127.0.0.1"); + assert_eq!(forward_probe_host(&ipv6), "::1"); + assert_eq!(forward_probe_host(&loopback), "127.0.0.1"); + } + + #[tokio::test] + async fn wait_for_forward_listener_accepts_ready_listener() { + let listener = tokio::net::TcpListener::bind(("127.0.0.1", 0)) + .await + .unwrap(); + let port = listener.local_addr().unwrap().port(); + let accept = tokio::spawn(async move { + let _ = listener.accept().await; + }); + let spec = ForwardSpec::new(port); + + wait_for_forward_listener(&spec, Duration::from_secs(1)) + .await + .unwrap(); + accept.await.unwrap(); + } + + #[tokio::test] + async fn wait_for_forward_listener_rejects_missing_listener() { + let listener = std::net::TcpListener::bind(("127.0.0.1", 0)).unwrap(); + let port = listener.local_addr().unwrap().port(); + drop(listener); + let spec = ForwardSpec::new(port); + + let err = wait_for_forward_listener(&spec, Duration::from_millis(20)) + .await + .unwrap_err(); + let text = format!("{err:?}"); + assert!(text.contains("local forward listener did not open")); + } + #[test] fn split_sandbox_path_separates_parent_and_basename() { assert_eq!( diff --git a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs index 7061614cb..2b61e9b52 100644 --- a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs +++ b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs @@ -715,6 +715,118 @@ fn install_fake_ssh(dir: &TempDir) -> std::path::PathBuf { ssh_path } +fn install_fake_forwarding_ssh(dir: &TempDir) -> std::path::PathBuf { + let ssh_path = dir.path().join("ssh"); + let pid_path = dir.path().join("fake-forward.pid"); + fs::write( + &ssh_path, + r#"#!/bin/sh +set -eu + +forward="" +sandbox_id="" +previous="" + +for arg in "$@"; do + if [ "$previous" = "-L" ]; then + forward="$arg" + previous="" + continue + fi + + if [ "$previous" = "-o" ]; then + case "$arg" in + ProxyCommand=*) + sandbox_id="$(printf '%s\n' "$arg" | sed -n 's/.*--sandbox-id \([^ ]*\).*/\1/p')" + ;; + esac + previous="" + continue + fi + + case "$arg" in + -L|-o) + previous="$arg" + ;; + esac +done + +if [ -z "$forward" ]; then + exit 0 +fi + +first="${forward%%:*}" +rest="${forward#*:}" +case "$first" in + ''|*[!0-9]*) + port="${rest%%:*}" + ;; + *) + port="$first" + ;; +esac + +if [ -z "$port" ] || [ -z "$sandbox_id" ]; then + exit 1 +fi + +nohup python3 -c ' +import signal +import socket +import sys + +port = int(sys.argv[1]) +sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) +sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) +sock.bind(("127.0.0.1", port)) +sock.listen(16) + +def stop(_signum, _frame): + sock.close() + raise SystemExit(0) + +signal.signal(signal.SIGTERM, stop) +signal.signal(signal.SIGINT, stop) +signal.signal(signal.SIGHUP, signal.SIG_IGN) + +while True: + conn, _addr = sock.accept() + conn.close() +' "$port" ssh ssh-proxy --sandbox-id "$sandbox_id" -L "$forward" >/dev/null 2>&1 & +echo $! > '@PID_PATH@' + +exit 0 +"# + .replace("@PID_PATH@", &pid_path.display().to_string()), + ) + .unwrap(); + let mut perms = fs::metadata(&ssh_path).unwrap().permissions(); + perms.set_mode(0o755); + fs::set_permissions(&ssh_path, perms).unwrap(); + + let pgrep_path = dir.path().join("pgrep"); + fs::write( + &pgrep_path, + format!( + r"#!/bin/sh +if [ -s '{}' ]; then + cat '{}' + exit 0 +fi +exit 1 +", + pid_path.display(), + pid_path.display() + ), + ) + .unwrap(); + let mut perms = fs::metadata(&pgrep_path).unwrap().permissions(); + perms.set_mode(0o755); + fs::set_permissions(&pgrep_path, perms).unwrap(); + + ssh_path +} + fn test_env(fake_ssh_dir: &TempDir, xdg_dir: &TempDir) -> EnvVarGuard { test_env_with(fake_ssh_dir, xdg_dir, &[]) } @@ -1302,7 +1414,7 @@ async fn sandbox_create_keeps_sandbox_with_forwarding() { let xdg_dir = tempfile::tempdir().unwrap(); let _env = test_env(&fake_ssh_dir, &xdg_dir); let tls = test_tls(&server); - install_fake_ssh(&fake_ssh_dir); + install_fake_forwarding_ssh(&fake_ssh_dir); let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); let forward_port = listener.local_addr().unwrap().port(); drop(listener); @@ -1334,6 +1446,11 @@ async fn sandbox_create_keeps_sandbox_with_forwarding() { .expect("sandbox create with forward should succeed"); assert!(deleted_names(&server).await.is_empty()); + let record = openshell_core::forward::read_forward_pid("persistent-forward", forward_port) + .expect("fake forward should be tracked"); + let _ = std::process::Command::new("kill") + .arg(record.pid.to_string()) + .status(); } #[tokio::test] diff --git a/docs/sandboxes/manage-sandboxes.mdx b/docs/sandboxes/manage-sandboxes.mdx index 1a54d0a06..dede9bba7 100644 --- a/docs/sandboxes/manage-sandboxes.mdx +++ b/docs/sandboxes/manage-sandboxes.mdx @@ -310,6 +310,8 @@ openshell forward start 8000 my-sandbox openshell forward start 8000 my-sandbox -d # run in background ``` +OpenShell prints the local URL only after the forward listener is reachable. Background forwards must be tracked locally so `openshell forward list` and `openshell forward stop` can manage them. + List and stop active forwards: ```shell From 301909f13c11d6dce811c7c0b8f005bf54a08774 Mon Sep 17 00:00:00 2001 From: Shiju Date: Fri, 12 Jun 2026 11:32:50 +0530 Subject: [PATCH 2/3] fix(cli): harden background forward cleanup Signed-off-by: Shiju --- crates/openshell-cli/src/ssh.rs | 24 ++- .../sandbox_create_lifecycle_integration.rs | 175 +++++++++++++++--- 2 files changed, 170 insertions(+), 29 deletions(-) diff --git a/crates/openshell-cli/src/ssh.rs b/crates/openshell-cli/src/ssh.rs index bdfadb5df..c70794347 100644 --- a/crates/openshell-cli/src/ssh.rs +++ b/crates/openshell-cli/src/ssh.rs @@ -7,6 +7,8 @@ use crate::tls::{TlsOptions, grpc_client}; use miette::{IntoDiagnostic, Result, WrapErr}; #[cfg(unix)] use nix::sys::signal::{SaFlags, SigAction, SigHandler, SigSet, Signal, sigaction}; +#[cfg(unix)] +use nix::unistd::Pid; use openshell_core::ObjectId; use openshell_core::forward::{ ForwardSpec, build_proxy_command, find_ssh_forward_pid, format_gateway_url, @@ -507,17 +509,25 @@ fn forward_probe_host(spec: &ForwardSpec) -> &str { } } -/// Best-effort kill of a forward whose listener never came up. Failures are -/// ignored: the process may already be exiting, and the caller surfaces the +/// Best-effort termination of a forward whose listener never came up. Failures +/// are ignored: the process may already be exiting, and the caller surfaces the /// original listener error regardless. +#[cfg(unix)] fn terminate_forward_pid(pid: u32) { - let _ = Command::new("kill") - .arg(pid.to_string()) - .stdout(Stdio::null()) - .stderr(Stdio::null()) - .status(); + let Ok(raw_pid) = i32::try_from(pid) else { + return; + }; + if raw_pid <= 0 { + return; + } + + let _ = nix::sys::signal::kill(Pid::from_raw(raw_pid), Signal::SIGTERM); } +/// Non-Unix builds cannot manage OpenSSH process IDs with Unix signals. +#[cfg(not(unix))] +fn terminate_forward_pid(_pid: u32) {} + fn foreground_forward_started_message(name: &str, spec: &ForwardSpec) -> String { format!( "{} Forwarding port {} to sandbox {name}\n Access at: {}\n Press Ctrl+C to stop\n {}", diff --git a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs index 2b61e9b52..8223ff463 100644 --- a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs +++ b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs @@ -706,20 +706,45 @@ async fn run_server() -> TestServer { } } -fn install_fake_ssh(dir: &TempDir) -> std::path::PathBuf { - let ssh_path = dir.path().join("ssh"); - fs::write(&ssh_path, "#!/bin/sh\nexit 0\n").unwrap(); - let mut perms = fs::metadata(&ssh_path).unwrap().permissions(); +fn install_executable_script( + dir: &TempDir, + name: &str, + contents: impl AsRef<[u8]>, +) -> std::path::PathBuf { + let path = dir.path().join(name); + fs::write(&path, contents).unwrap(); + let mut perms = fs::metadata(&path).unwrap().permissions(); perms.set_mode(0o755); - fs::set_permissions(&ssh_path, perms).unwrap(); - ssh_path + fs::set_permissions(&path, perms).unwrap(); + path +} + +fn install_fake_ssh(dir: &TempDir) -> std::path::PathBuf { + install_executable_script(dir, "ssh", "#!/bin/sh\nexit 0\n") +} + +fn install_fake_pgrep_no_match(dir: &TempDir) -> std::path::PathBuf { + install_executable_script(dir, "pgrep", "#!/bin/sh\nexit 1\n") +} + +async fn wait_for_file(path: &std::path::Path, timeout: Duration) -> bool { + let deadline = Instant::now() + timeout; + loop { + if path.exists() { + return true; + } + if Instant::now() >= deadline { + return false; + } + tokio::time::sleep(Duration::from_millis(50)).await; + } } fn install_fake_forwarding_ssh(dir: &TempDir) -> std::path::PathBuf { - let ssh_path = dir.path().join("ssh"); let pid_path = dir.path().join("fake-forward.pid"); - fs::write( - &ssh_path, + let ssh_path = install_executable_script( + dir, + "ssh", r#"#!/bin/sh set -eu @@ -798,15 +823,11 @@ echo $! > '@PID_PATH@' exit 0 "# .replace("@PID_PATH@", &pid_path.display().to_string()), - ) - .unwrap(); - let mut perms = fs::metadata(&ssh_path).unwrap().permissions(); - perms.set_mode(0o755); - fs::set_permissions(&ssh_path, perms).unwrap(); + ); - let pgrep_path = dir.path().join("pgrep"); - fs::write( - &pgrep_path, + install_executable_script( + dir, + "pgrep", format!( r"#!/bin/sh if [ -s '{}' ]; then @@ -818,15 +839,66 @@ exit 1 pid_path.display(), pid_path.display() ), - ) - .unwrap(); - let mut perms = fs::metadata(&pgrep_path).unwrap().permissions(); - perms.set_mode(0o755); - fs::set_permissions(&pgrep_path, perms).unwrap(); + ); ssh_path } +fn install_fake_unreachable_forwarding_ssh(dir: &TempDir) -> std::path::PathBuf { + let pid_path = dir.path().join("fake-forward.pid"); + let terminated_path = dir.path().join("fake-forward.terminated"); + install_executable_script( + dir, + "ssh", + r#"#!/bin/sh +set -eu + +nohup python3 -c ' +import pathlib +import signal +import sys +import time + +terminated_path = pathlib.Path(sys.argv[1]) + +def stop(_signum, _frame): + terminated_path.write_text("terminated") + raise SystemExit(0) + +signal.signal(signal.SIGTERM, stop) +signal.signal(signal.SIGINT, stop) +signal.signal(signal.SIGHUP, signal.SIG_IGN) + +while True: + time.sleep(1) +' '@TERMINATED_PATH@' >/dev/null 2>&1 & +echo $! > '@PID_PATH@' + +exit 0 +"# + .replace("@TERMINATED_PATH@", &terminated_path.display().to_string()) + .replace("@PID_PATH@", &pid_path.display().to_string()), + ); + + install_executable_script( + dir, + "pgrep", + format!( + r"#!/bin/sh +if [ -s '{}' ]; then + cat '{}' + exit 0 +fi +exit 1 +", + pid_path.display(), + pid_path.display() + ), + ); + + terminated_path +} + fn test_env(fake_ssh_dir: &TempDir, xdg_dir: &TempDir) -> EnvVarGuard { test_env_with(fake_ssh_dir, xdg_dir, &[]) } @@ -1453,6 +1525,65 @@ async fn sandbox_create_keeps_sandbox_with_forwarding() { .status(); } +#[tokio::test] +async fn sandbox_forward_background_fails_when_pid_is_not_discoverable() { + let server = run_server().await; + let fake_ssh_dir = tempfile::tempdir().unwrap(); + let xdg_dir = tempfile::tempdir().unwrap(); + let _env = test_env(&fake_ssh_dir, &xdg_dir); + let tls = test_tls(&server); + install_fake_ssh(&fake_ssh_dir); + install_fake_pgrep_no_match(&fake_ssh_dir); + let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); + let forward_port = listener.local_addr().unwrap().port(); + drop(listener); + + let spec = openshell_core::forward::ForwardSpec::new(forward_port); + let err = run::sandbox_forward(&server.endpoint, "untracked-forward", &spec, true, &tls) + .await + .expect_err("background forward should fail without a discoverable SSH PID"); + let msg = format!("{err}"); + assert!( + msg.contains("could not discover backgrounded SSH process"), + "error should explain the missing tracked SSH process, got: {msg}", + ); + assert!( + openshell_core::forward::read_forward_pid("untracked-forward", forward_port).is_none(), + "untracked background forwards must not write a PID file", + ); +} + +#[tokio::test] +async fn sandbox_forward_background_terminates_discovered_pid_when_listener_never_opens() { + let server = run_server().await; + let fake_ssh_dir = tempfile::tempdir().unwrap(); + let xdg_dir = tempfile::tempdir().unwrap(); + let _env = test_env(&fake_ssh_dir, &xdg_dir); + let tls = test_tls(&server); + let terminated_path = install_fake_unreachable_forwarding_ssh(&fake_ssh_dir); + let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); + let forward_port = listener.local_addr().unwrap().port(); + drop(listener); + + let spec = openshell_core::forward::ForwardSpec::new(forward_port); + let err = run::sandbox_forward(&server.endpoint, "unreachable-forward", &spec, true, &tls) + .await + .expect_err("background forward should fail when the listener never opens"); + let msg = format!("{err}"); + assert!( + msg.contains("ssh process started but local forward listener was not reachable"), + "error should preserve listener startup context, got: {msg}", + ); + assert!( + openshell_core::forward::read_forward_pid("unreachable-forward", forward_port).is_none(), + "unreachable background forwards must not write a PID file", + ); + assert!( + wait_for_file(&terminated_path, Duration::from_secs(2)).await, + "discovered background SSH process should be terminated after listener failure", + ); +} + #[tokio::test] async fn sandbox_create_sends_environment_variables() { let server = run_server().await; From 4583b5d23c2fccab9b87eb78d3212633ae424cd2 Mon Sep 17 00:00:00 2001 From: Shiju Date: Fri, 12 Jun 2026 13:04:10 +0530 Subject: [PATCH 3/3] fix(cli): validate background forward cleanup pid Signed-off-by: Shiju --- crates/openshell-cli/src/ssh.rs | 76 +++++++-- .../sandbox_create_lifecycle_integration.rs | 150 +++++++++++++++--- 2 files changed, 194 insertions(+), 32 deletions(-) diff --git a/crates/openshell-cli/src/ssh.rs b/crates/openshell-cli/src/ssh.rs index c70794347..02ab914a5 100644 --- a/crates/openshell-cli/src/ssh.rs +++ b/crates/openshell-cli/src/ssh.rs @@ -10,6 +10,8 @@ use nix::sys::signal::{SaFlags, SigAction, SigHandler, SigSet, Signal, sigaction #[cfg(unix)] use nix::unistd::Pid; use openshell_core::ObjectId; +#[cfg(unix)] +use openshell_core::forward::pid_matches_forward; use openshell_core::forward::{ ForwardSpec, build_proxy_command, find_ssh_forward_pid, format_gateway_url, resolve_ssh_gateway, shell_escape, validate_ssh_session_response, write_forward_pid, @@ -31,11 +33,15 @@ use tokio::net::TcpStream; use tokio::process::{Child, Command as TokioCommand}; use tokio_stream::wrappers::ReceiverStream; -/// Time budget for a forward to become healthy after `ssh` starts: covers both -/// backgrounded-PID discovery and listener readiness, in foreground and -/// background. -const FORWARD_STARTUP_GRACE_PERIOD: Duration = Duration::from_secs(2); -/// Delay between listener/PID probes within the grace period. +/// Time budget for finding the OpenSSH background process after `ssh -f` +/// returns. PID discovery is separate from listener readiness so missing +/// process tracking still fails quickly. +const FORWARD_PID_DISCOVERY_TIMEOUT: Duration = Duration::from_secs(2); +/// Time budget for the local listener to become reachable after `ssh` starts. +/// This is a user-visible readiness deadline for both foreground and background +/// forwards, not a soft cleanup grace period. +const FORWARD_LISTENER_READINESS_TIMEOUT: Duration = Duration::from_secs(10); +/// Delay between listener/PID probes within the configured timeout. const FORWARD_LISTENER_PROBE_INTERVAL: Duration = Duration::from_millis(50); /// Per-attempt connect timeout, so one hung probe cannot consume the whole /// grace period. @@ -378,11 +384,11 @@ pub async fn sandbox_forward( ) })?; - if let Err(err) = wait_for_forward_listener(spec, FORWARD_STARTUP_GRACE_PERIOD) + if let Err(err) = wait_for_forward_listener(spec, FORWARD_LISTENER_READINESS_TIMEOUT) .await .wrap_err("ssh process started but local forward listener was not reachable") { - terminate_forward_pid(pid); + terminate_forward_pid(pid, port, &session.sandbox_id); return Err(err); } @@ -412,7 +418,7 @@ pub async fn sandbox_forward( /// session) means forwarding never came up, so it errors instead of waiting /// out the grace period. async fn wait_for_foreground_forward_start(child: &mut Child, spec: &ForwardSpec) -> Result<()> { - let listener = wait_for_forward_listener(spec, FORWARD_STARTUP_GRACE_PERIOD); + let listener = wait_for_forward_listener(spec, FORWARD_LISTENER_READINESS_TIMEOUT); tokio::pin!(listener); tokio::select! { result = &mut listener => result, @@ -439,7 +445,7 @@ async fn wait_for_foreground_forward_start(child: &mut Child, spec: &ForwardSpec /// so the PID is unknown when `command.status()` returns and must be discovered /// afterward. Returns `None` if it never appears within the grace period. async fn wait_for_ssh_forward_pid(sandbox_id: &str, port: u16) -> Option { - let deadline = tokio::time::Instant::now() + FORWARD_STARTUP_GRACE_PERIOD; + let deadline = tokio::time::Instant::now() + FORWARD_PID_DISCOVERY_TIMEOUT; loop { if let Some(pid) = find_ssh_forward_pid(sandbox_id, port) { return Some(pid); @@ -513,20 +519,26 @@ fn forward_probe_host(spec: &ForwardSpec) -> &str { /// are ignored: the process may already be exiting, and the caller surfaces the /// original listener error regardless. #[cfg(unix)] -fn terminate_forward_pid(pid: u32) { +fn terminate_forward_pid(pid: u32, port: u16, sandbox_id: &str) { let Ok(raw_pid) = i32::try_from(pid) else { return; }; if raw_pid <= 0 { return; } + if !pid_matches_forward(pid, port, Some(sandbox_id)) { + // The PID came from a process-table scan, not a file we own. Re-check + // immediately before signaling so a stale or spoofed match is left + // untouched instead of terminating an unrelated process. + return; + } let _ = nix::sys::signal::kill(Pid::from_raw(raw_pid), Signal::SIGTERM); } /// Non-Unix builds cannot manage OpenSSH process IDs with Unix signals. #[cfg(not(unix))] -fn terminate_forward_pid(_pid: u32) {} +fn terminate_forward_pid(_pid: u32, _port: u16, _sandbox_id: &str) {} fn foreground_forward_started_message(name: &str, spec: &ForwardSpec) -> String { format!( @@ -1793,6 +1805,48 @@ mod tests { assert!(text.contains("local forward listener did not open")); } + #[cfg(unix)] + #[test] + fn terminate_forward_pid_skips_process_that_no_longer_matches_forward() { + let dir = tempfile::tempdir().unwrap(); + let terminated_path = dir.path().join("terminated"); + let mut child = Command::new("python3") + .arg("-c") + .arg( + r#" +import pathlib +import signal +import sys +import time + +terminated_path = pathlib.Path(sys.argv[1]) + +def stop(_signum, _frame): + terminated_path.write_text("terminated") + raise SystemExit(0) + +signal.signal(signal.SIGTERM, stop) + +while True: + time.sleep(1) +"#, + ) + .arg(&terminated_path) + .spawn() + .unwrap(); + std::thread::sleep(Duration::from_millis(100)); + + terminate_forward_pid(child.id(), 43210, "id-spoofed-forward"); + std::thread::sleep(Duration::from_millis(200)); + + assert!( + !terminated_path.exists(), + "mismatched process should not receive SIGTERM" + ); + let _ = child.kill(); + let _ = child.wait(); + } + #[test] fn split_sandbox_path_separates_parent_and_basename() { assert_eq!( diff --git a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs index 8223ff463..ae23fa6aa 100644 --- a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs +++ b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs @@ -727,10 +727,27 @@ fn install_fake_pgrep_no_match(dir: &TempDir) -> std::path::PathBuf { install_executable_script(dir, "pgrep", "#!/bin/sh\nexit 1\n") } -async fn wait_for_file(path: &std::path::Path, timeout: Duration) -> bool { +async fn wait_for_process_exit(pid: u32, timeout: Duration) -> bool { let deadline = Instant::now() + timeout; loop { - if path.exists() { + let output = std::process::Command::new("ps") + .arg("-o") + .arg("stat=") + .arg("-p") + .arg(pid.to_string()) + .stderr(std::process::Stdio::null()) + .output(); + let alive = output.is_ok_and(|output| { + if !output.status.success() { + return false; + } + let stat = String::from_utf8_lossy(&output.stdout); + // Linux can leave the orphaned fake forward as a short-lived zombie + // until the container's init process reaps it. A zombie has already + // exited, so it satisfies this cleanup assertion. + !stat.trim_start().starts_with('Z') + }); + if !alive { return true; } if Instant::now() >= deadline { @@ -844,39 +861,87 @@ exit 1 ssh_path } -fn install_fake_unreachable_forwarding_ssh(dir: &TempDir) -> std::path::PathBuf { +struct FakeUnreachableForward { + log_path: std::path::PathBuf, + pid_path: std::path::PathBuf, +} + +fn install_fake_unreachable_forwarding_ssh(dir: &TempDir) -> FakeUnreachableForward { + let log_path = dir.path().join("fake-forward.log"); let pid_path = dir.path().join("fake-forward.pid"); - let terminated_path = dir.path().join("fake-forward.terminated"); + let ready_path = dir.path().join("fake-forward.ready"); install_executable_script( dir, "ssh", r#"#!/bin/sh set -eu -nohup python3 -c ' +forward="" +sandbox_id="" +previous="" + +for arg in "$@"; do + if [ "$previous" = "-L" ]; then + forward="$arg" + previous="" + continue + fi + + if [ "$previous" = "-o" ]; then + case "$arg" in + ProxyCommand=*) + sandbox_id="$(printf '%s\n' "$arg" | sed -n 's/.*--sandbox-id \([^ ]*\).*/\1/p')" + ;; + esac + previous="" + continue + fi + + case "$arg" in + -L|-o) + previous="$arg" + ;; + esac +done + +if [ -z "$forward" ] || [ -z "$sandbox_id" ]; then + exit 1 +fi + +trap '' HUP +python3 -c ' import pathlib import signal import sys import time -terminated_path = pathlib.Path(sys.argv[1]) - -def stop(_signum, _frame): - terminated_path.write_text("terminated") - raise SystemExit(0) +ready_path = pathlib.Path(sys.argv[1]) -signal.signal(signal.SIGTERM, stop) -signal.signal(signal.SIGINT, stop) signal.signal(signal.SIGHUP, signal.SIG_IGN) +ready_path.write_text("ready") + while True: time.sleep(1) -' '@TERMINATED_PATH@' >/dev/null 2>&1 & -echo $! > '@PID_PATH@' +' '@READY_PATH@' ssh ssh-proxy --sandbox-id "$sandbox_id" -L "$forward" >'@LOG_PATH@' 2>&1 & +pid="$!" +i=0 +while [ "$i" -lt 100 ]; do + if [ -e '@READY_PATH@' ]; then + break + fi + i=$((i + 1)) + sleep 0.05 +done +if [ ! -e '@READY_PATH@' ]; then + exit 1 +fi +echo "$pid" > '@PID_PATH@' exit 0 "# - .replace("@TERMINATED_PATH@", &terminated_path.display().to_string()) + .replace("@LOG_PATH@", &log_path.display().to_string()) + .replace("@READY_PATH@", &ready_path.display().to_string()) .replace("@PID_PATH@", &pid_path.display().to_string()), ); @@ -896,7 +961,7 @@ exit 1 ), ); - terminated_path + FakeUnreachableForward { log_path, pid_path } } fn test_env(fake_ssh_dir: &TempDir, xdg_dir: &TempDir) -> EnvVarGuard { @@ -1553,6 +1618,29 @@ async fn sandbox_forward_background_fails_when_pid_is_not_discoverable() { ); } +#[tokio::test] +async fn sandbox_forward_foreground_fails_when_ssh_exits_before_listener_opens() { + let server = run_server().await; + let fake_ssh_dir = tempfile::tempdir().unwrap(); + let xdg_dir = tempfile::tempdir().unwrap(); + let _env = test_env(&fake_ssh_dir, &xdg_dir); + let tls = test_tls(&server); + install_fake_ssh(&fake_ssh_dir); + let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); + let forward_port = listener.local_addr().unwrap().port(); + drop(listener); + + let spec = openshell_core::forward::ForwardSpec::new(forward_port); + let err = run::sandbox_forward(&server.endpoint, "foreground-forward", &spec, false, &tls) + .await + .expect_err("foreground forward should fail when ssh exits before listener readiness"); + let msg = format!("{err}"); + assert!( + msg.contains("ssh exited before local forward listener opened"), + "error should explain that ssh exited before listener readiness, got: {msg}", + ); +} + #[tokio::test] async fn sandbox_forward_background_terminates_discovered_pid_when_listener_never_opens() { let server = run_server().await; @@ -1560,7 +1648,7 @@ async fn sandbox_forward_background_terminates_discovered_pid_when_listener_neve let xdg_dir = tempfile::tempdir().unwrap(); let _env = test_env(&fake_ssh_dir, &xdg_dir); let tls = test_tls(&server); - let terminated_path = install_fake_unreachable_forwarding_ssh(&fake_ssh_dir); + let fake_forward = install_fake_unreachable_forwarding_ssh(&fake_ssh_dir); let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); let forward_port = listener.local_addr().unwrap().port(); drop(listener); @@ -1578,10 +1666,30 @@ async fn sandbox_forward_background_terminates_discovered_pid_when_listener_neve openshell_core::forward::read_forward_pid("unreachable-forward", forward_port).is_none(), "unreachable background forwards must not write a PID file", ); - assert!( - wait_for_file(&terminated_path, Duration::from_secs(2)).await, - "discovered background SSH process should be terminated after listener failure", - ); + let pid = fs::read_to_string(&fake_forward.pid_path) + .expect("fake forward should record a PID") + .trim() + .parse::() + .expect("fake forward PID should be numeric"); + if !wait_for_process_exit(pid, Duration::from_secs(2)).await { + let log = fs::read_to_string(&fake_forward.log_path).unwrap_or_default(); + let command = std::process::Command::new("ps") + .arg("-ww") + .arg("-o") + .arg("command=") + .arg("-p") + .arg(pid.to_string()) + .output() + .ok() + .map(|output| String::from_utf8_lossy(&output.stdout).to_string()) + .unwrap_or_default(); + panic!( + "discovered background SSH process should exit after listener failure cleanup; pid={}, command={}, log={}", + pid, + command.trim(), + log.trim(), + ); + } } #[tokio::test]