diff --git a/.oxfmtrc.json b/.oxfmtrc.json index 87ba4177..d85079db 100644 --- a/.oxfmtrc.json +++ b/.oxfmtrc.json @@ -24,6 +24,7 @@ "**/playwright-report/", "**/test-results/", "**/coverage/", + "apps/desktop/test/smb-servers/.compose/", "apps/website/src/pages/pricing.astro" ] } diff --git a/Cargo.lock b/Cargo.lock index d7e7a46a..f32c4f8b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -243,7 +243,7 @@ dependencies = [ "objc2-foundation 0.3.2", "parking_lot", "percent-encoding", - "windows-sys 0.52.0", + "windows-sys 0.60.2", "wl-clipboard-rs", "x11rb", ] @@ -1975,7 +1975,7 @@ dependencies = [ "libc", "option-ext", "redox_users 0.5.2", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -2248,7 +2248,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -2985,7 +2985,7 @@ dependencies = [ "gobject-sys 0.21.5", "libc", "system-deps 7.0.8", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -4786,7 +4786,7 @@ checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" dependencies = [ "hermit-abi", "libc", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -4864,7 +4864,7 @@ dependencies = [ "portable-atomic", "portable-atomic-util", "serde_core", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -5720,7 +5720,7 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "680998035259dcfcafe653688bf2aa6d3e2dc05e98be6ab46afb089dc84f1df8" dependencies = [ - "proc-macro-crate 1.3.1", + "proc-macro-crate 3.5.0", "proc-macro2", "quote", "syn 2.0.117", @@ -6240,7 +6240,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7d8fae84b431384b68627d0f9b3b1245fcf9f46f6c0e3dc902e9dce64edd1967" dependencies = [ "libc", - "windows-sys 0.45.0", + "windows-sys 0.61.2", ] [[package]] @@ -6995,7 +6995,7 @@ dependencies = [ "once_cell", "socket2", "tracing", - "windows-sys 0.52.0", + "windows-sys 0.60.2", ] [[package]] @@ -7458,7 +7458,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -7516,7 +7516,7 @@ dependencies = [ "security-framework", "security-framework-sys", "webpki-root-certs", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -8064,9 +8064,9 @@ checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] name = "smb2" -version = "0.8.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac712ad8fd6e9712cb44782bb4d084afa4081cdce296b39fbaade28e0cfd0a1c" +checksum = "f584485452a389b86e87b15740b3645a08c1a81f599cf3f5a346f1604180faa4" dependencies = [ "aes 0.9.0-rc.4", "aes-gcm 0.11.0-rc.3", @@ -8991,7 +8991,7 @@ dependencies = [ "getrandom 0.4.2", "once_cell", "rustix", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -10169,7 +10169,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.61.2", ] [[package]] diff --git a/apps/desktop/src-tauri/Cargo.toml b/apps/desktop/src-tauri/Cargo.toml index cad0344d..3de5e42c 100644 --- a/apps/desktop/src-tauri/Cargo.toml +++ b/apps/desktop/src-tauri/Cargo.toml @@ -170,7 +170,7 @@ mdns-sd = { version = "0.19", features = ["logging"] } # SMB2/3 protocol client for share enumeration (pure Rust, pipelined I/O). # When bumping: also re-vendor the test containers per # apps/desktop/test/smb-servers/.compose/VENDORED.md -smb2 = "0.8.0" +smb2 = "0.9.1" # NFD normalization for APFS collation and SMB path normalization unicode-normalization = "0.1" diff --git a/apps/desktop/src-tauri/src/commands/ui.rs b/apps/desktop/src-tauri/src/commands/ui.rs index 9f3744a2..3afd86a7 100644 --- a/apps/desktop/src-tauri/src/commands/ui.rs +++ b/apps/desktop/src-tauri/src/commands/ui.rs @@ -118,6 +118,24 @@ pub fn show_breadcrumb_context_menu(window: Window, shortcut: Str #[tauri::command] #[specta::specta] pub fn show_main_window(window: Window) -> Result<(), String> { + // E2E: on macOS, use `orderFront:` instead of `makeKeyAndOrderFront:` so the + // window appears without stealing focus from whatever the user is currently + // working in. `window.show()` calls the latter on macOS, which always grabs + // OS focus. Linux/Windows test runs happen in headless containers, so the + // standard show is fine there. + #[cfg(target_os = "macos")] + if crate::test_mode::is_e2e_mode() { + use objc2::msg_send; + use objc2::runtime::AnyObject; + let ns_window = window.ns_window().map_err(|e| e.to_string())? as *mut AnyObject; + if ns_window.is_null() { + return Err("NSWindow pointer is null".into()); + } + unsafe { + let _: () = msg_send![ns_window, orderFront: std::ptr::null_mut::()]; + } + return Ok(()); + } window.show().map_err(|e| e.to_string()) } diff --git a/apps/desktop/src-tauri/src/file_system/volume/CLAUDE.md b/apps/desktop/src-tauri/src/file_system/volume/CLAUDE.md index a6ed10ac..a25783d0 100644 --- a/apps/desktop/src-tauri/src/file_system/volume/CLAUDE.md +++ b/apps/desktop/src-tauri/src/file_system/volume/CLAUDE.md @@ -128,7 +128,7 @@ When adding a new volume, add a column for it and fill in each row. The matrix d Reads and writes have different shapes because the consumer relationship is different: - **Reads** return a `VolumeReadStream` that an external caller polls. The download handle has to live past the function call and cross async contexts. That's where the lifetime/ownership gymnastics below come from. -- **Writes** consume a stream (or a local file) inside the method itself. The chunk loop is the consumer, so there's nothing to hand off. Just hold the session lock for the duration, pull chunks from the source, push them into the backend's chunk-by-chunk writer. `SmbVolume::import_single_file_with_progress` and `SmbVolume::write_from_stream` are the reference implementations: open the smb2 `FileWriter`, loop `write_chunk`, call `finish()` on success or `abort()` on cancel. No task spawn, no channel, no self-referential struct. +- **Writes** consume a stream (or a local file) inside the method itself. The chunk loop is the consumer, so there's nothing to hand off. For backends with a `'static` writer (smb2 0.9's owned `FileWriter`, mtp-rs's `upload_stream`), drive the writer directly on a cloned session handle — no lock held across I/O. For backends whose writer borrows from the session, hold the session lock for the chunk loop's duration. `SmbVolume::write_from_stream` is the reference implementation: clone the session once, open the smb2 `FileWriter` on the clone, loop `write_chunk`, call `finish()` on success or `abort()` on cancel. No task spawn, no channel, no self-referential struct, no client mutex held while WRITEs are in flight. The rest of this section is about **read-side** lifetime handling. Which pattern to pick depends on whether your protocol SDK's download handle is `'static` or borrowed. @@ -387,7 +387,7 @@ spawned detached task. This is safe because the stream always lives in an async **Why**: `SmbVolume` now handles listing updates via `notify_mutation` using its own smb2 `get_metadata`. The old `std::fs`-based synthetic diff path (`emit_synthetic_entry_diff`) is redundant and goes through the slow OS mount. Returning `false` skips it. **Decision**: `SmbVolume` splits session storage: `Arc>>` + `Arc>>>` -**Why**: Phase 4 Fix 2 unblocks concurrency on the hot copy path. Previously the session lived in one `Mutex>`, which the streaming-read producer and the compound read/write fast-paths held for the entire transfer, serializing every concurrent copy through the mutex. With smb2 0.7.x, `Connection` is `Clone` (cheap `Arc::clone`, all clones multiplex frames over one SMB session). Splitting the Tree out lets us briefly lock the client, clone its `Connection`, and release the lock, then drive `Tree::download` / `Tree::read_file_compound` / `Tree::write_file_compound` on the cloned `Connection` with no lock held. N concurrent copies on one `SmbVolume` now pipeline N operations over the single session instead of queuing on the mutex. Tree lives in a `RwLock` because we only take read locks in the hot path (cloning an `Arc`) and only write on disconnect. The legacy large-file streaming writer (`FileWriter<'a>`) still takes the client mutex for its duration because its lifetime borrows `&'a mut Connection` from the `SmbClient`; that's documented as a Gotcha below. Large files are rare in the hot path, the compound fast-path covers every small write. +**Why**: Phase 4 Fix 2 unblocks concurrency on the hot copy path. Previously the session lived in one `Mutex>`, which the streaming-read producer and the compound read/write fast-paths held for the entire transfer, serializing every concurrent copy through the mutex. With smb2 0.7.x, `Connection` is `Clone` (cheap `Arc::clone`, all clones multiplex frames over one SMB session). Splitting the Tree out lets us briefly lock the client, clone its `Connection`, and release the lock, then drive `Tree::download` / `Tree::read_file_compound` / `Tree::write_file_compound` on the cloned `Connection` with no lock held. N concurrent copies on one `SmbVolume` now pipeline N operations over the single session instead of queuing on the mutex. Tree lives in a `RwLock` because we only take read locks in the hot path (cloning an `Arc`) and only write on disconnect. Since smb2 0.9, the streaming-write path also uses this clone-and-release shape (see the `write_from_stream` Decision below), so the client mutex is no longer held across any I/O. **Decision**: `SmbVolume::local_path()` returns `None` **Why**: `local_path()` is checked in `volume_copy.rs` to decide whether to use native OS copy APIs. If SmbVolume returned `Some(mount_path)`, copies would go through the slow OS mount, which is exactly what we're trying to avoid. `root()` still returns the mount path for frontend path resolution. @@ -410,8 +410,11 @@ spawned detached task. This is safe because the stream always lives in an async **Gotcha**: Watcher filenames are NFC (from server) but macOS mount paths are NFD **Why**: SMB servers return NFC-normalized filenames. macOS filesystem paths use NFD. The watcher NFD-normalizes filenames before constructing display paths used for cache lookups. -**Gotcha**: SMB write streaming fallback still holds the client mutex for the whole upload -**Why**: `SmbClient::create_file_writer` / `Tree::create_file_writer` both return `FileWriter<'a>` which borrows `&'a mut Connection`. We can't release the client mutex while the writer is alive, so the streaming (large-file) write path serializes concurrent writes on one `SmbVolume`. This is acceptable because the compound fast-path in `write_from_stream` handles every file ≤ `max_write_size` (typically 1 MB on QNAP, the SMB2 spec ceiling is 8 MB) without touching the client mutex for the actual write. Large files are rare in the hot copy path; if this ever becomes a bottleneck, the fix is a future `smb2` release that exposes a `FileWriter` built from a cloned `Connection` + `Arc` (both owned inside the writer) rather than borrowing. +**Gotcha (no longer applicable as of smb2 0.9)**: SMB write streaming fallback used to hold the client mutex for the whole upload +**Why**: Historically `FileWriter<'a>` borrowed `&'a mut Connection` from the `SmbClient`, so `write_from_stream` had to hold the client mutex for the duration of the streaming write. Under sustained concurrent pressure this two-phase pattern (brief `clone_session` fast-path probe → drop → long mutex-guarded streaming fallback) deadlocked. smb2 0.9 rebuilt `FileWriter` to own its `Connection` and `Arc`, removing the borrow. The regression is pinned by `smb_integration_concurrent_streaming_writes_no_deadlock`. See the new Decision below (`write_from_stream` uses a cloned Connection + Arc) for the current design. + +**Decision**: `write_from_stream` uses a cloned `Connection` + `Arc` via smb2 0.9's owned `FileWriter` +**Why**: smb2 0.9 made `FileWriter` own its `Connection` (cheap `Arc::clone`) and `Arc` instead of borrowing `&'a mut Connection`. `write_from_stream` now calls `clone_session` once up front and drives both the compound fast-path AND the streaming fallback on the same owned `Connection` clone. The client mutex is held only for the few microseconds of `clone_session()`, never across I/O. The previous shape — fast-path on a clone, then drop and re-acquire the client for the streaming fallback — was the deadlock reproducer in Phase C against QNAP. The architectural property we get for free: N concurrent streaming writes on one `SmbVolume` pipeline N WRITE chains over a single SMB session, multiplexed by `MessageId` in smb2's receiver task. No external locking, no mutex contention on the hot copy path. **Decision**: `SmbVolume` overrides `scan_for_copy_batch` to pipeline per-path stats over a single SMB session **Why**: The copy pipeline's scan phase used to loop `scan_for_copy` per top-level source, N sequential RTTs on the wire before the copy phase could even start. For a 100-file copy over a ~60 ms Tailscale link that's ~5 s of serial stats. Fix 4 overrides `scan_for_copy_batch` to clone `smb2::Connection` per path under a brief client-mutex acquire (cheap `Arc::clone`, all clones multiplex over the same SMB session), release the lock, then drive `tree.stat(&mut conn, path)` on each clone inside a `FuturesUnordered`. Empty root paths skip the stat. Single-path batches fall through to `scan_recursive` so one-file drag-drops don't pay the batch machinery cost. Directories found during the stat phase recurse sequentially afterward. Parallel directory recursion is a future "Fix 5". Measured 6.5× wall-clock win at 100 × 10 KB: 6.11 s → 947 ms. See `docs/notes/phase4-rtt-investigation.md` for the wire trace. diff --git a/apps/desktop/src-tauri/src/file_system/volume/friendly_error/kinds.rs b/apps/desktop/src-tauri/src/file_system/volume/friendly_error/kinds.rs index fceff939..0c7708d9 100644 --- a/apps/desktop/src-tauri/src/file_system/volume/friendly_error/kinds.rs +++ b/apps/desktop/src-tauri/src/file_system/volume/friendly_error/kinds.rs @@ -206,6 +206,29 @@ pub(super) fn not_supported(raw_detail: String) -> FriendlyError { } } +/// `STATUS_DELETE_PENDING`: the file has been marked for deletion on the server +/// but at least one open handle is keeping it alive. The file disappears the +/// moment the last handle closes, so retry-after-a-moment is the right hint. +pub(super) fn delete_pending(path_display: &str, raw_detail: String) -> FriendlyError { + FriendlyError { + category: ErrorCategory::Transient, + title: "File is being removed".into(), + explanation: format!( + "`{}` is on its way out. The server marked it for deletion, but another \ + open handle is keeping it around until that handle closes.", + path_display + ), + suggestion: "Here's what to try:\n\ + - Wait a moment and try again — once the last handle closes, the file disappears\n\ + - Close any other apps that might have this file open\n\ + - If it sticks around, restart Cmdr to drop any handles it might still hold" + .into(), + raw_detail, + retry_hint: true, + action_kind: None, + } +} + pub(super) fn io_serious(path_display: &str, message: &str, raw_detail: String) -> FriendlyError { FriendlyError { category: ErrorCategory::Serious, diff --git a/apps/desktop/src-tauri/src/file_system/volume/friendly_error/mod.rs b/apps/desktop/src-tauri/src/file_system/volume/friendly_error/mod.rs index f9826227..075df64c 100644 --- a/apps/desktop/src-tauri/src/file_system/volume/friendly_error/mod.rs +++ b/apps/desktop/src-tauri/src/file_system/volume/friendly_error/mod.rs @@ -211,6 +211,7 @@ mod tests { true, ), (VolumeError::Cancelled("x".into()), ErrorCategory::Transient, true), + (VolumeError::DeletePending("x".into()), ErrorCategory::Transient, true), ( VolumeError::IoError { message: "x".into(), @@ -296,6 +297,7 @@ mod tests { VolumeError::NotFound("x".into()), VolumeError::PermissionDenied("x".into()), VolumeError::ConnectionTimeout("x".into()), + VolumeError::DeletePending("x".into()), VolumeError::IoError { message: "x".into(), raw_os_error: None, @@ -357,6 +359,36 @@ mod tests { } } + #[test] + fn delete_pending_uses_dedicated_copy() { + let path = Path::new("/Volumes/share/photo.jpg"); + let err = VolumeError::DeletePending("Protocol error: STATUS_DELETE_PENDING during Create".into()); + let friendly = friendly_error_from_volume_error(&err, path); + + assert_eq!(friendly.category, ErrorCategory::Transient); + assert!( + friendly.retry_hint, + "DeletePending is transient — user should see a retry hint" + ); + assert!( + friendly.title.contains("being removed"), + "DeletePending title should say the file is being removed, got: {:?}", + friendly.title, + ); + // The path is interpolated into the explanation so the user knows which file. + assert!( + friendly.explanation.contains("photo.jpg"), + "DeletePending explanation should include the path, got: {:?}", + friendly.explanation, + ); + // raw_detail preserves the underlying NTSTATUS for the technical-details disclosure. + assert!( + friendly.raw_detail.contains("DELETE_PENDING"), + "raw_detail should preserve the NTSTATUS code, got: {:?}", + friendly.raw_detail, + ); + } + // ── action_kind tests ─────────────────────────────────────────────── #[cfg(target_os = "macos")] diff --git a/apps/desktop/src-tauri/src/file_system/volume/friendly_error/volume_error.rs b/apps/desktop/src-tauri/src/file_system/volume/friendly_error/volume_error.rs index 3cab2ef3..3b21ed46 100644 --- a/apps/desktop/src-tauri/src/file_system/volume/friendly_error/volume_error.rs +++ b/apps/desktop/src-tauri/src/file_system/volume/friendly_error/volume_error.rs @@ -49,6 +49,7 @@ pub fn friendly_error_from_volume_error(err: &VolumeError, path: &Path) -> Frien VolumeError::StorageFull { .. } => kinds::storage_full(raw), VolumeError::ConnectionTimeout(_) => kinds::connection_timeout(raw), VolumeError::Cancelled(_) => kinds::cancelled(raw), + VolumeError::DeletePending(_) => kinds::delete_pending(&path_display, raw), VolumeError::IsADirectory(_) => FriendlyError { category: ErrorCategory::NeedsAction, title: "This is a folder, not a file".into(), diff --git a/apps/desktop/src-tauri/src/file_system/volume/friendly_error/write_error.rs b/apps/desktop/src-tauri/src/file_system/volume/friendly_error/write_error.rs index 39a152a3..8e607dcb 100644 --- a/apps/desktop/src-tauri/src/file_system/volume/friendly_error/write_error.rs +++ b/apps/desktop/src-tauri/src/file_system/volume/friendly_error/write_error.rs @@ -142,6 +142,7 @@ pub fn friendly_from_write_error(err: &crate::file_system::write_operations::Wri retry_hint: false, action_kind: None, }, + W::DeletePending { path } => kinds::delete_pending(path, raw), W::IoError { path, message } => kinds::io_serious(path, message, raw), } } diff --git a/apps/desktop/src-tauri/src/file_system/volume/mod.rs b/apps/desktop/src-tauri/src/file_system/volume/mod.rs index 05070d75..5286562e 100644 --- a/apps/desktop/src-tauri/src/file_system/volume/mod.rs +++ b/apps/desktop/src-tauri/src/file_system/volume/mod.rs @@ -166,6 +166,11 @@ pub enum VolumeError { Cancelled(String), /// The path is a directory, not a file (for example, SMB STATUS_FILE_IS_A_DIRECTORY). IsADirectory(String), + /// The file is in `STATUS_DELETE_PENDING`: a delete has been requested on the server + /// but at least one open handle is keeping the file alive. The file will disappear + /// once the last handle closes; any new `Create` (stat, open, write) on the path + /// fails with this status in the meantime. SMB-only today. + DeletePending(String), IoError { message: String, raw_os_error: Option, @@ -194,6 +199,7 @@ impl std::fmt::Display for VolumeError { Self::ConnectionTimeout(msg) => write!(f, "Connection timed out: {}", msg), Self::Cancelled(msg) => write!(f, "Cancelled: {}", msg), Self::IsADirectory(path) => write!(f, "Is a directory: {}", path), + Self::DeletePending(path) => write!(f, "Delete pending: {}", path), Self::IoError { message, .. } => write!(f, "I/O error: {}", message), Self::FriendlyGit(err) => write!(f, "git: {}", err), } diff --git a/apps/desktop/src-tauri/src/file_system/volume/smb.rs b/apps/desktop/src-tauri/src/file_system/volume/smb.rs index dd00e091..c6b00e71 100644 --- a/apps/desktop/src-tauri/src/file_system/volume/smb.rs +++ b/apps/desktop/src-tauri/src/file_system/volume/smb.rs @@ -94,6 +94,8 @@ impl ConnectionState { } } +static CLIENT_LOCK_TICKET: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(0); + // ── Type mapping helpers ──────────────────────────────────────────── /// Converts an `smb2::FileTime` to seconds since the Unix epoch, matching @@ -134,6 +136,15 @@ fn fs_info_to_space_info(info: &smb2::client::tree::FsInfo) -> SpaceInfo { /// Converts an `smb2::Error` to `VolumeError`. fn map_smb_error(err: smb2::Error) -> VolumeError { use smb2::ErrorKind; + use smb2::types::status::NtStatus; + + // `STATUS_DELETE_PENDING` currently classifies as `ErrorKind::Other` in + // smb2 (no typed variant yet), so we detect it via the raw NTSTATUS before + // falling through to the generic kind match. + if err.status() == Some(NtStatus::DELETE_PENDING) { + return VolumeError::DeletePending(err.to_string()); + } + match err.kind() { ErrorKind::NotFound => VolumeError::NotFound(err.to_string()), ErrorKind::AlreadyExists => VolumeError::AlreadyExists(err.to_string()), @@ -454,21 +465,6 @@ impl SmbVolume { } } - /// Acquires the client mutex and returns a guard over the `Option`. - /// Checks connection state first, then verifies the client is present. - /// - /// Most methods still go through this (stat, list_directory, rename, etc.); - /// only the hot streaming-read / compound-write paths use the cheaper - /// `clone_connection` helper that releases the lock before driving the op. - async fn acquire_client(&self) -> Result>, VolumeError> { - self.check_connection()?; - let guard = self.client.lock().await; - if guard.is_none() { - return Err(VolumeError::DeviceDisconnected("SMB session not available".to_string())); - } - Ok(guard) - } - /// Reads out a clone of `Arc`. Cheap (`Arc::clone`). async fn tree_arc(&self) -> Result, VolumeError> { self.check_connection()?; @@ -489,12 +485,37 @@ impl SmbVolume { async fn clone_session(&self) -> Result<(Arc, smb2::client::Connection), VolumeError> { self.check_connection()?; let tree = self.tree_arc().await?; + let ticket = CLIENT_LOCK_TICKET.fetch_add(1, Ordering::Relaxed); + let start = std::time::Instant::now(); + log::debug!( + "client-mutex: waiting ticket={} caller=clone_session share={}", + ticket, + self.share_name + ); let conn = { let mut guard = self.client.lock().await; - let client = guard - .as_mut() - .ok_or_else(|| VolumeError::DeviceDisconnected("SMB session not available".to_string()))?; - client.connection_mut().clone() + log::debug!( + "client-mutex: acquired ticket={} caller=clone_session share={} waited={:?}", + ticket, + self.share_name, + start.elapsed() + ); + let acquired_at = std::time::Instant::now(); + let client = guard.as_mut().ok_or_else(|| { + log::debug!( + "client-mutex: released ticket={} caller=clone_session held_for={:?} (no-session-bail)", + ticket, + acquired_at.elapsed() + ); + VolumeError::DeviceDisconnected("SMB session not available".to_string()) + })?; + let c = client.connection_mut().clone(); + log::debug!( + "client-mutex: released ticket={} caller=clone_session held_for={:?}", + ticket, + acquired_at.elapsed() + ); + c }; Ok((tree, conn)) } @@ -1699,6 +1720,23 @@ impl Volume for SmbVolume { mut stream: Box, on_progress: &'a (dyn Fn(u64, u64) -> std::ops::ControlFlow<()> + Sync), ) -> Pin> + Send + 'a>> { + // Lock-free streaming write path. + // + // Both branches below drive the upload on a cloned `Connection` + // (cheap `Arc::clone`) and an `Arc`. The client mutex is + // held only for the few microseconds of `clone_session()`, never + // for the upload itself. With smb2 0.9's owned `FileWriter`, N + // concurrent `write_from_stream` calls on one `SmbVolume` + // pipeline N WRITE chains over a single SMB session: smb2's + // receiver task multiplexes responses by `MessageId`. + // + // This collapses the historical two-phase pattern (brief + // `clone_session` for the fast-path → drop → long + // session-mutex hold for the streaming fallback) into a single + // clone. The old shape deadlocked under sustained concurrent + // pressure; the regression test + // `smb_integration_concurrent_streaming_writes_no_deadlock` + // pins this shape. Box::pin(async move { let smb_path = self.to_smb_path(dest); @@ -1707,15 +1745,18 @@ impl Volume for SmbVolume { self.share_name, smb_path, size ); - // Compound fast-path: when the caller promised a size that fits in - // one WRITE, drain the source stream into a buffer and send + // Acquire a cloned session once, up front. Both the compound + // fast-path and the streaming fallback drive their write on + // this same clone — no second `clone_session` needed. + let (tree, conn) = self.clone_session().await?; + + // Compound fast-path: when the caller promised a size that fits + // in one WRITE, drain the source stream into a buffer and send // CREATE+WRITE+FLUSH+CLOSE as a single compound frame (1 RTT - // instead of 4). Runs on a cloned `Connection` with no lock held, - // so N concurrent small writes pipeline over one SMB session. - // Small files are the hot case; for anything larger we fall - // through to the streaming writer below. + // instead of 4). Small files are the hot case; we fall through + // to the streaming writer for anything larger or when the source + // returns short. if size > 0 { - let (tree, mut conn) = self.clone_session().await?; let max_write = conn.params().map(|p| p.max_write_size).unwrap_or(65536) as u64; if size <= max_write { let mut buffer = Vec::with_capacity(size as usize); @@ -1736,48 +1777,38 @@ impl Volume for SmbVolume { "SmbVolume::write_from_stream: using compound fast-path ({} bytes)", buffer.len() ); + let mut conn = conn; let write_result = tree.write_file_compound(&mut conn, &smb_path, &buffer).await; let bytes_written = self.handle_smb_result("write_from_stream(compound)", write_result)?; return Ok(bytes_written); } - // Size mismatch: drop the cloned conn and re-feed the - // already-drained buffer through the streaming writer - // below (which needs the client mutex because - // `FileWriter` borrows `&'a mut Connection` from the - // `SmbClient`). + // Size mismatch: feed the already-drained buffer through + // the streaming writer on the same cloned connection. + // No lock acquired; this is the rare path. debug!( - "SmbVolume::write_from_stream: compound fast-path source yielded {} bytes, expected {}; falling back", + "SmbVolume::write_from_stream: compound fast-path source yielded {} bytes, expected {}; falling back to streaming writer", buffer.len(), size ); - drop(conn); - let tree_arc = tree; - let mut guard = self.acquire_client().await?; - let client = guard.as_mut().unwrap(); - let writer_result = client.create_file_writer(&tree_arc, &smb_path).await; + let writer_result = tree.create_file_writer(conn, &smb_path).await; let mut writer = self.handle_smb_result("write_from_stream(open)", writer_result)?; if !buffer.is_empty() { let write_result = writer.write_chunk(&buffer).await; self.handle_smb_result("write_from_stream(write_chunk)", write_result)?; } + // The source signalled end-of-stream by returning None + // above (we exited the drain loop). No further chunks. let finish_result = writer.finish().await; self.handle_smb_result("write_from_stream(finish)", finish_result)?; return Ok(buffer.len() as u64); } } - // Streaming fallback for large / unknown-size writes. Holds the - // client mutex for the duration of the transfer because - // `FileWriter<'a>` borrows `&'a mut Connection` from the - // `SmbClient` we create it from. Large files are rare in the hot - // copy path, so this doesn't hurt concurrency in practice; the - // compound fast-path above handles every small file without - // touching the client mutex for the write itself. - let tree_arc = self.tree_arc().await?; - let mut guard = self.acquire_client().await?; - let client = guard.as_mut().unwrap(); - - let writer_result = client.create_file_writer(&tree_arc, &smb_path).await; + // Streaming path for large / unknown-size writes. Drives the + // owned `FileWriter` on the cloned `Connection` directly — + // no client mutex is held while WRITEs are in flight, so N + // concurrent large copies pipeline over one SMB session. + let writer_result = tree.create_file_writer(conn, &smb_path).await; let mut writer = self.handle_smb_result("write_from_stream(open)", writer_result)?; let mut bytes_read = 0u64; @@ -1800,7 +1831,11 @@ impl Volume for SmbVolume { // anyway). Dropping directly would leave stale responses // on the connection and poison the next op. let _ = writer.abort().await; - let _ = tree_arc.delete_file(client.connection_mut(), &smb_path).await; + // Best-effort delete of the partial file on its own + // cloned connection (the writer's connection is gone). + if let Ok((tree_for_delete, mut conn_for_delete)) = self.clone_session().await { + let _ = tree_for_delete.delete_file(&mut conn_for_delete, &smb_path).await; + } return Err(VolumeError::Cancelled("Operation cancelled by user".to_string())); } } @@ -1989,6 +2024,26 @@ mod tests { assert!(matches!(ve, VolumeError::NotFound(_))); } + #[test] + fn map_smb_error_delete_pending() { + // STATUS_DELETE_PENDING surfaces when a delete has been requested but at + // least one open handle is keeping the file alive. smb2 currently classifies + // it as `ErrorKind::Other`, so `map_smb_error` must dispatch on the raw + // NTSTATUS to produce the typed `VolumeError::DeletePending` variant — + // otherwise the FE falls back to the generic "disk needs attention" copy + // instead of the transient "file is being removed" message. + let err = smb2::Error::Protocol { + status: smb2::types::status::NtStatus::DELETE_PENDING, + command: smb2::types::Command::Create, + }; + let ve = map_smb_error(err); + assert!( + matches!(ve, VolumeError::DeletePending(_)), + "STATUS_DELETE_PENDING should map to VolumeError::DeletePending, got: {:?}", + ve, + ); + } + #[test] fn map_smb_error_access_denied() { let err = smb2::Error::Protocol { @@ -4056,4 +4111,353 @@ mod tests { final_fds ); } + + // ── SMB streaming-write regression test ──────────────────────────── + // + // Helpers + one `#[ignore]`d integration test that guards against the + // streaming-write deadlock fixed in commit `efb15479`. See the docstring + // on `smb_integration_concurrent_streaming_writes_no_deadlock` for the + // full story. + + /// All test artifacts on the SMB share live under this prefix. The + /// cleanup helper refuses to delete anything that doesn't start with it. + const TEST_PREFIX_ROOT: &str = "_test/cmdr-regression-"; + + /// Captures `client-mutex:` (cmdr) and `recv:` (smb2 receiver loop) + /// debug lines into bounded ring buffers so a hung test's panic message + /// can include the last ~30 lines from each stream. That's invaluable + /// for diagnosing a future regression. Installed via `log::set_logger` + /// once per process; subsequent installs are no-ops. + struct MutexCaptureLogger { + mutex_lines: std::sync::Mutex>, + recv_lines: std::sync::Mutex>, + } + impl log::Log for MutexCaptureLogger { + fn enabled(&self, _md: &log::Metadata) -> bool { + true + } + fn log(&self, record: &log::Record) { + let msg = format!("{}", record.args()); + let target = record.target(); + // `client-mutex:` lines come from smb.rs via `log::debug!` with + // the module-path target (`cmdr_lib::file_system::volume::smb`). + // `recv:` lines come from the smb2 receiver loop with an `smb2::*` + // target. + if msg.starts_with("client-mutex:") { + let mut q = self.mutex_lines.lock().unwrap(); + if q.len() >= 200 { + q.pop_front(); + } + q.push_back(format!("[{}] {}", target, msg)); + } else if msg.starts_with("recv:") || (target.starts_with("smb2") && msg.contains("recv")) { + let mut q = self.recv_lines.lock().unwrap(); + if q.len() >= 200 { + q.pop_front(); + } + q.push_back(format!("[{}] {}", target, msg)); + } + // The captured ring buffers are the diagnostic. We deliberately + // skip mirroring to stderr: `eprintln!` is denied crate-wide, + // and re-emitting through `log::*` would recurse into this same + // logger (and the mutex above) on every call. + } + fn flush(&self) {} + } + + static MUTEX_CAPTURE_LOGGER: OnceLock<&'static MutexCaptureLogger> = OnceLock::new(); + + fn install_mutex_capture_logger() -> &'static MutexCaptureLogger { + if let Some(l) = MUTEX_CAPTURE_LOGGER.get() { + return l; + } + let leaked: &'static MutexCaptureLogger = Box::leak(Box::new(MutexCaptureLogger { + mutex_lines: std::sync::Mutex::new(std::collections::VecDeque::with_capacity(200)), + recv_lines: std::sync::Mutex::new(std::collections::VecDeque::with_capacity(200)), + })); + // Best-effort: if another logger is already installed, ignore. + let _ = log::set_logger(leaked); + log::set_max_level(log::LevelFilter::Debug); + let _ = MUTEX_CAPTURE_LOGGER.set(leaked); + leaked + } + + /// Deletes every file under `unique_prefix_smb` and then the directory + /// itself. Safety: refuses any path that doesn't start with + /// `TEST_PREFIX_ROOT`, both at the top level and per entry, so a logic + /// bug in the caller can never reach outside the regression sandbox. + /// Called explicitly at the end of each pass (best effort: logs but + /// never overrides the test outcome). + async fn cleanup_test_prefix(vol: &SmbVolume, mount_path: &Path, unique_prefix_smb: &str) { + assert!( + unique_prefix_smb.starts_with(TEST_PREFIX_ROOT), + "cleanup_test_prefix: refusing to clean a prefix outside {TEST_PREFIX_ROOT:?}: {unique_prefix_smb:?}" + ); + let dir_abs = mount_path.join(unique_prefix_smb.trim_start_matches('/')); + let rel_of = |abs: &Path| -> String { + abs.to_string_lossy() + .strip_prefix(mount_path.to_string_lossy().as_ref()) + .map(|s| s.trim_start_matches('/').to_string()) + .unwrap_or_else(|| abs.to_string_lossy().to_string()) + }; + match vol.list_directory_impl(&dir_abs).await { + Ok(entries) => { + for entry in entries { + let abs = dir_abs.join(&entry.name); + let rel = rel_of(&abs); + if !rel.starts_with(TEST_PREFIX_ROOT) { + log::warn!("cleanup_test_prefix: refusing to delete {rel} (outside prefix)"); + continue; + } + if let Err(e) = vol.delete(&abs).await { + log::warn!("cleanup_test_prefix: failed to delete {rel}: {e:?}"); + } + } + } + Err(e) => log::warn!("cleanup_test_prefix: list_directory_impl failed for {dir_abs:?}: {e:?}"), + } + let rel_dir = rel_of(&dir_abs); + if rel_dir.starts_with(TEST_PREFIX_ROOT) + && let Err(e) = vol.delete(&dir_abs).await + { + log::warn!("cleanup_test_prefix: failed to delete prefix dir {rel_dir}: {e:?}"); + } + } + + /// Connects to a Docker SMB fixture's `public` share at `127.0.0.1:port` + /// as guest. `mount_label` becomes the synthetic mount path + /// (`/Volumes/