Skip to content

Commit 4a45be5

Browse files
authored
perf(pet-fs): memoize norm_case to avoid repeated GetLongPathNameW syscalls (#457)
Fixes #455. `norm_case` on Windows wraps `GetLongPathNameW`, which is a real filesystem syscall (Defender-intercepted). Hot paths normalize the same inputs many times per `refresh` — the user home, common parents, every `PATH` entry, every registry `env_path`/`executable`. This PR adds a per-process memoization cache so each unique absolute path costs at most one `GetLongPathNameW` for the lifetime of the process. ### Key design choices - Cache is keyed on the **computed absolute path**, not the caller-supplied input — so relative inputs stay correct even if the process CWD changes between calls. - Only **successful** normalizations are cached. Failures (typically non-existent paths) are NOT cached, so a path created later in the process can still be normalized. - The cache is `RwLock<HashMap<PathBuf, PathBuf>>`. Read-lock fast path is held only long enough to clone the cached value out. - Lock poisoning is treated as a soft-miss (recompute) rather than a panic — `norm_case` is hot and side-effect-free. ### Tests - `test_norm_case_windows_memoizes_existing_path` — cache hit on second call. - `test_norm_case_windows_does_not_cache_failures` — non-existent paths are not cached. - `test_norm_case_windows_cache_key_is_absolute_path` — cache is keyed on absolute, not raw input.
1 parent 34b2d47 commit 4a45be5

3 files changed

Lines changed: 305 additions & 6 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/pet-fs/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ license.workspace = true
77
[target.'cfg(target_os = "windows")'.dependencies]
88
msvc_spectre_libs = { version = "0.1.1", features = ["error"] }
99
windows-sys = { version = "0.59", features = ["Win32_Storage_FileSystem", "Win32_Foundation"] }
10+
lazy_static = "1.4.0"
1011

1112
[dependencies]
1213
glob = "0.3.1"

crates/pet-fs/src/path.rs

Lines changed: 303 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,29 @@ use std::{
99
#[cfg(unix)]
1010
use std::path::MAIN_SEPARATOR;
1111

12+
#[cfg(windows)]
13+
use std::{collections::HashMap, sync::RwLock};
14+
15+
#[cfg(windows)]
16+
use lazy_static::lazy_static;
17+
18+
#[cfg(windows)]
19+
lazy_static! {
20+
/// Per-process memoization cache for `norm_case` results on Windows.
21+
///
22+
/// Each `norm_case` call invokes `GetLongPathNameW`, which is a real
23+
/// filesystem syscall (Defender-intercepted on Windows). Many hot paths
24+
/// — the user home directory, common parents, every `PATH` entry, every
25+
/// registry `env_path`/`executable` — get normalized repeatedly per
26+
/// refresh. Memoizing eliminates the repeat syscalls.
27+
///
28+
/// We cache only successful normalizations: a failure (`GetLongPathNameW`
29+
/// returning 0, typically because the path doesn't exist yet) is *not*
30+
/// cached so a path that gets created later can still be normalized.
31+
static ref NORM_CASE_CACHE: RwLock<HashMap<PathBuf, PathBuf>> =
32+
RwLock::new(HashMap::new());
33+
}
34+
1235
/// Strips trailing path separators from a path, preserving root paths.
1336
///
1437
/// This function removes trailing `/` or `\` from paths while ensuring that root paths
@@ -91,6 +114,14 @@ pub fn strip_trailing_separator<P: AsRef<Path>>(path: P) -> PathBuf {
91114
/// - On Windows, this function uses `GetLongPathNameW` which **preserves junction paths**
92115
/// unlike `fs::canonicalize` which would resolve them to their target.
93116
/// - For symlink resolution, use `resolve_symlink()` instead.
117+
/// - On Windows, results are memoized in a per-process cache. The key is
118+
/// the computed *absolute* path with separators canonicalized (forward
119+
/// → backslash) and trailing separators stripped, so the same logical
120+
/// path is cached only once even when callers vary in style. Only
121+
/// successful normalizations are cached, so paths that do not yet exist
122+
/// on disk can still be normalized later. The cache has a generous soft
123+
/// cap (`NORM_CASE_CACHE_MAX_ENTRIES`) and is cleared when exceeded; it
124+
/// is otherwise never invalidated for the lifetime of the process.
94125
///
95126
/// # Related
96127
/// - `strip_trailing_separator()` - Just removes trailing separators
@@ -109,21 +140,130 @@ pub fn norm_case<P: AsRef<Path>>(path: P) -> PathBuf {
109140

110141
#[cfg(windows)]
111142
{
112-
// First, convert to absolute path if relative, without resolving symlinks/junctions
113-
let absolute_path = if path.as_ref().is_absolute() {
114-
path.as_ref().to_path_buf()
143+
// First, convert to absolute path if relative, without resolving symlinks/junctions.
144+
// We key the cache on this *absolute* path rather than the caller's input so
145+
// a relative input like "./foo" stays correct if the process CWD changes
146+
// between calls. If `current_dir()` itself fails (extremely rare), we
147+
// proceed with the original path but skip caching — see below.
148+
let (absolute_path, abs_ok) = if path.as_ref().is_absolute() {
149+
(path.as_ref().to_path_buf(), true)
115150
} else if let Ok(abs) = std::env::current_dir() {
116-
abs.join(path.as_ref())
151+
(abs.join(path.as_ref()), true)
117152
} else {
118-
path.as_ref().to_path_buf()
153+
(path.as_ref().to_path_buf(), false)
119154
};
120155

156+
// Apply a cheap, deterministic pre-normalization (slash style + trailing
157+
// separator) to the cache key so the same logical path is not cached under
158+
// multiple keys (e.g. `C:\Windows`, `C:\Windows\`, and `C:/Windows`).
159+
// This is purely an in-memory transform — no syscalls.
160+
let cache_key = cache_key_for(&absolute_path);
161+
162+
// Fast path: cache hit. Read lock is held only while we copy the
163+
// already-normalized PathBuf out. If the lock is poisoned we recover
164+
// the inner map via `into_inner()` so the cache keeps working for
165+
// the rest of the process — silently disabling the cache after a
166+
// single bad caller would reintroduce the syscall storm we're
167+
// trying to avoid.
168+
let cache_read = NORM_CASE_CACHE
169+
.read()
170+
.unwrap_or_else(|poisoned| poisoned.into_inner());
171+
if let Some(cached) = cache_read.get(&cache_key) {
172+
return cached.clone();
173+
}
174+
// Explicitly drop the read guard before we try to acquire the
175+
// write guard below: `std::sync::RwLock` does not guarantee
176+
// safe same-thread read → write upgrades, and holding both at
177+
// once can deadlock on some platforms.
178+
drop(cache_read);
179+
121180
// Use GetLongPathNameW to normalize case without resolving junctions.
122181
// If normalization fails, fall back to the computed absolute path to keep behavior consistent.
123-
normalize_case_windows(&absolute_path).unwrap_or(absolute_path)
182+
match normalize_case_windows(&absolute_path) {
183+
Some(normalized) => {
184+
// Only cache successful normalizations whose key is a real
185+
// absolute path:
186+
// * Failures (None) are skipped because the path may be
187+
// created later in the process and should normalize then.
188+
// * !abs_ok means we never resolved CWD, so the key is a
189+
// relative path — caching it could become wrong if CWD
190+
// changes later. This branch is essentially unreachable
191+
// in practice (current_dir() failing) but we'd rather
192+
// pay the syscall again than poison the cache.
193+
if abs_ok {
194+
let mut cache = NORM_CASE_CACHE
195+
.write()
196+
.unwrap_or_else(|poisoned| poisoned.into_inner());
197+
// Soft cap: if the cache grows unreasonably large
198+
// (e.g. a long-lived JSONRPC server scanning many
199+
// workspaces), reset rather than tracking a true LRU.
200+
// The working set per-session is small, so re-warming
201+
// is cheap.
202+
if cache.len() >= NORM_CASE_CACHE_MAX_ENTRIES {
203+
cache.clear();
204+
}
205+
cache.insert(cache_key, normalized.clone());
206+
}
207+
normalized
208+
}
209+
None => absolute_path,
210+
}
124211
}
125212
}
126213

214+
/// Soft upper bound on the number of entries `NORM_CASE_CACHE` will hold
215+
/// before being cleared. The working set of paths a single PET refresh
216+
/// touches is at most low hundreds (PATH entries, registry installs,
217+
/// conda envs, virtualenv prefixes, etc.). 4096 leaves ample headroom
218+
/// for unusual workspaces while keeping the worst-case memory footprint
219+
/// bounded for long-lived JSONRPC server processes.
220+
#[cfg(windows)]
221+
const NORM_CASE_CACHE_MAX_ENTRIES: usize = 4096;
222+
223+
/// Builds a deterministic cache key for `norm_case` from an absolute path.
224+
///
225+
/// We canonicalize separators (forward → backslash) and strip trailing
226+
/// separators (preserving roots) so that, for example, `C:\Windows`,
227+
/// `C:\Windows\`, and `C:/Windows` all collapse to the same key. This is
228+
/// purely an in-memory transform — no syscalls — and exists only to
229+
/// improve cache hit rate, not to change the result of `norm_case`.
230+
///
231+
/// We operate on the UTF-16 (`encode_wide`) representation rather than
232+
/// going through `to_string_lossy()` so the transform is lossless for
233+
/// all `Path` values, including the rare paths that contain non-Unicode
234+
/// bytes (unpaired surrogates) which `to_string_lossy()` would map to
235+
/// `U+FFFD` and so could collapse distinct paths to the same key.
236+
#[cfg(windows)]
237+
fn cache_key_for(absolute_path: &Path) -> PathBuf {
238+
use std::ffi::OsString;
239+
use std::os::windows::ffi::{OsStrExt, OsStringExt};
240+
241+
const FWD: u16 = b'/' as u16;
242+
const BACK: u16 = b'\\' as u16;
243+
244+
// Replace forward slashes with backslashes in-place on the wide form.
245+
let mut wide: Vec<u16> = absolute_path
246+
.as_os_str()
247+
.encode_wide()
248+
.map(|c| if c == FWD { BACK } else { c })
249+
.collect();
250+
251+
// Strip trailing separators while preserving root paths (drive roots,
252+
// UNC roots, network paths, extended-length roots). We rebuild a
253+
// `PathBuf` only as a probe for `Path::parent()`, which already
254+
// handles all the Windows root shapes correctly.
255+
while wide.last() == Some(&BACK) {
256+
let probe = PathBuf::from(OsString::from_wide(&wide));
257+
if probe.parent().is_some() {
258+
wide.pop();
259+
} else {
260+
break;
261+
}
262+
}
263+
264+
PathBuf::from(OsString::from_wide(&wide))
265+
}
266+
127267
/// Windows-specific path case normalization using GetLongPathNameW.
128268
/// This normalizes the case of path components but does NOT resolve junctions or symlinks.
129269
/// Note: GetLongPathNameW requires the path to exist on the filesystem to normalize it.
@@ -619,6 +759,163 @@ mod tests {
619759
);
620760
}
621761

762+
/// Helper: returns an existing Windows directory derived from
763+
/// `SystemRoot` (e.g. `C:\Windows`) so tests don't hardcode a drive
764+
/// letter or the `\System32` subfolder. Both can vary on custom
765+
/// installs / CI setups.
766+
#[cfg(windows)]
767+
fn system_root_dir() -> PathBuf {
768+
let root = std::env::var("SystemRoot")
769+
.or_else(|_| std::env::var("WINDIR"))
770+
.expect("SystemRoot or WINDIR must be set on Windows");
771+
PathBuf::from(root)
772+
}
773+
774+
#[test]
775+
#[cfg(windows)]
776+
fn test_norm_case_windows_memoizes_existing_path() {
777+
// Successful normalizations should be cached so subsequent calls
778+
// for the same input do not re-issue GetLongPathNameW.
779+
// The cache is keyed on the canonicalized absolute path, so we
780+
// pass an already-canonical absolute path here to make the
781+
// assertion deterministic.
782+
let path = system_root_dir();
783+
let key = cache_key_for(&path);
784+
// Tests run in parallel and share NORM_CASE_CACHE; clear the key
785+
// first so this test really exercises insertion-on-miss.
786+
NORM_CASE_CACHE
787+
.write()
788+
.expect("norm_case cache poisoned")
789+
.remove(&key);
790+
791+
let first = norm_case(&path);
792+
// Cache must contain the canonicalized form of the input.
793+
let cached = NORM_CASE_CACHE
794+
.read()
795+
.expect("norm_case cache poisoned")
796+
.get(&key)
797+
.cloned();
798+
assert_eq!(
799+
cached.as_ref(),
800+
Some(&first),
801+
"expected norm_case to insert the result into the cache"
802+
);
803+
// Second call returns the same result (and goes through the cache).
804+
let second = norm_case(&path);
805+
assert_eq!(first, second);
806+
}
807+
808+
#[test]
809+
#[cfg(windows)]
810+
fn test_norm_case_windows_does_not_cache_failures() {
811+
// GetLongPathNameW fails for non-existent paths. We must NOT cache
812+
// those, otherwise a path that gets created later in the process
813+
// would forever return its non-normalized fallback form.
814+
//
815+
// Build the test path inside `std::env::temp_dir()` so it is
816+
// anchored to a real, writable location regardless of which drive
817+
// Windows is installed on, and use a unique name with a low
818+
// collision rate. Then make sure it does not exist on disk so
819+
// GetLongPathNameW deterministically fails.
820+
let nonexistent = std::env::temp_dir().join(format!(
821+
"pet_test_norm_case_no_cache_{}_{}",
822+
std::process::id(),
823+
// Wall-clock nanoseconds — process id alone isn't enough on
824+
// hosts that recycle PIDs quickly.
825+
std::time::SystemTime::now()
826+
.duration_since(std::time::UNIX_EPOCH)
827+
.map(|d| d.as_nanos())
828+
.unwrap_or_default()
829+
));
830+
assert!(
831+
!nonexistent.exists(),
832+
"test setup expected a non-existent path"
833+
);
834+
835+
let key = cache_key_for(&nonexistent);
836+
// Make sure we start from a clean slate for this key.
837+
NORM_CASE_CACHE
838+
.write()
839+
.expect("norm_case cache poisoned")
840+
.remove(&key);
841+
842+
let _ = norm_case(&nonexistent);
843+
844+
let has_entry = NORM_CASE_CACHE
845+
.read()
846+
.expect("norm_case cache poisoned")
847+
.contains_key(&key);
848+
assert!(
849+
!has_entry,
850+
"failed normalizations must not be inserted into the cache"
851+
);
852+
}
853+
854+
#[test]
855+
#[cfg(windows)]
856+
fn test_norm_case_windows_cache_key_is_absolute_path() {
857+
// The cache is keyed on the computed absolute path, not the
858+
// caller-supplied input. Calling norm_case on a relative path
859+
// must NOT insert an entry under that relative key; the entry
860+
// must instead live under the resolved absolute path.
861+
//
862+
// We avoid mutating the process CWD here because cargo runs tests
863+
// in parallel and a global chdir would race with other tests.
864+
let relative = PathBuf::from(".");
865+
let result = norm_case(&relative);
866+
867+
assert!(result.is_absolute(), "result must be absolute");
868+
let cache = NORM_CASE_CACHE.read().expect("norm_case cache poisoned");
869+
assert!(
870+
!cache.contains_key(&relative),
871+
"cache must not be keyed on the caller-supplied relative path"
872+
);
873+
// The canonicalized absolute form must be present (assuming the
874+
// CWD exists, which it always does for a running test process).
875+
let abs = std::env::current_dir()
876+
.expect("cwd must exist")
877+
.join(&relative);
878+
let key = cache_key_for(&abs);
879+
assert_eq!(
880+
cache.get(&key).cloned(),
881+
Some(result),
882+
"cache must be keyed on the canonicalized absolute path"
883+
);
884+
}
885+
886+
#[test]
887+
#[cfg(windows)]
888+
fn test_norm_case_windows_cache_key_canonicalizes_separators_and_trailing_slash() {
889+
// Variant inputs (mixed separators, trailing slash) should all share
890+
// a single cache entry once one of them populates it.
891+
// Use SystemRoot rather than hardcoding a drive letter, since
892+
// Windows can be installed elsewhere.
893+
let canonical = system_root_dir();
894+
let canonical_str = canonical
895+
.to_str()
896+
.expect("SystemRoot is expected to be valid UTF-8");
897+
let with_forward_slashes = PathBuf::from(canonical_str.replace('\\', "/"));
898+
let with_trailing_slash = {
899+
let mut s = canonical_str.to_string();
900+
s.push('\\');
901+
PathBuf::from(s)
902+
};
903+
904+
// Pre-populate by calling norm_case on the canonical form.
905+
let expected = norm_case(&canonical);
906+
907+
// All variants compute to the same cache key.
908+
let key_canonical = cache_key_for(&canonical);
909+
let key_forward = cache_key_for(&with_forward_slashes);
910+
let key_trailing = cache_key_for(&with_trailing_slash);
911+
assert_eq!(key_canonical, key_forward);
912+
assert_eq!(key_canonical, key_trailing);
913+
914+
// And subsequent calls for the variants return the same cached result.
915+
assert_eq!(norm_case(&with_forward_slashes), expected);
916+
assert_eq!(norm_case(&with_trailing_slash), expected);
917+
}
918+
622919
// ==================== resolve_any_symlink tests ====================
623920

624921
#[test]

0 commit comments

Comments
 (0)