@@ -9,6 +9,29 @@ use std::{
99#[ cfg( unix) ]
1010use std:: path:: MAIN_SEPARATOR ;
1111
12+ #[ cfg( windows) ]
13+ use std:: { collections:: HashMap , sync:: RwLock } ;
14+
15+ #[ cfg( windows) ]
16+ use lazy_static:: lazy_static;
17+
18+ #[ cfg( windows) ]
19+ lazy_static ! {
20+ /// Per-process memoization cache for `norm_case` results on Windows.
21+ ///
22+ /// Each `norm_case` call invokes `GetLongPathNameW`, which is a real
23+ /// filesystem syscall (Defender-intercepted on Windows). Many hot paths
24+ /// — the user home directory, common parents, every `PATH` entry, every
25+ /// registry `env_path`/`executable` — get normalized repeatedly per
26+ /// refresh. Memoizing eliminates the repeat syscalls.
27+ ///
28+ /// We cache only successful normalizations: a failure (`GetLongPathNameW`
29+ /// returning 0, typically because the path doesn't exist yet) is *not*
30+ /// cached so a path that gets created later can still be normalized.
31+ static ref NORM_CASE_CACHE : RwLock <HashMap <PathBuf , PathBuf >> =
32+ RwLock :: new( HashMap :: new( ) ) ;
33+ }
34+
1235/// Strips trailing path separators from a path, preserving root paths.
1336///
1437/// This function removes trailing `/` or `\` from paths while ensuring that root paths
@@ -91,6 +114,14 @@ pub fn strip_trailing_separator<P: AsRef<Path>>(path: P) -> PathBuf {
91114/// - On Windows, this function uses `GetLongPathNameW` which **preserves junction paths**
92115/// unlike `fs::canonicalize` which would resolve them to their target.
93116/// - For symlink resolution, use `resolve_symlink()` instead.
117+ /// - On Windows, results are memoized in a per-process cache. The key is
118+ /// the computed *absolute* path with separators canonicalized (forward
119+ /// → backslash) and trailing separators stripped, so the same logical
120+ /// path is cached only once even when callers vary in style. Only
121+ /// successful normalizations are cached, so paths that do not yet exist
122+ /// on disk can still be normalized later. The cache has a generous soft
123+ /// cap (`NORM_CASE_CACHE_MAX_ENTRIES`) and is cleared when exceeded; it
124+ /// is otherwise never invalidated for the lifetime of the process.
94125///
95126/// # Related
96127/// - `strip_trailing_separator()` - Just removes trailing separators
@@ -109,21 +140,130 @@ pub fn norm_case<P: AsRef<Path>>(path: P) -> PathBuf {
109140
110141 #[ cfg( windows) ]
111142 {
112- // First, convert to absolute path if relative, without resolving symlinks/junctions
113- let absolute_path = if path. as_ref ( ) . is_absolute ( ) {
114- path. as_ref ( ) . to_path_buf ( )
143+ // First, convert to absolute path if relative, without resolving symlinks/junctions.
144+ // We key the cache on this *absolute* path rather than the caller's input so
145+ // a relative input like "./foo" stays correct if the process CWD changes
146+ // between calls. If `current_dir()` itself fails (extremely rare), we
147+ // proceed with the original path but skip caching — see below.
148+ let ( absolute_path, abs_ok) = if path. as_ref ( ) . is_absolute ( ) {
149+ ( path. as_ref ( ) . to_path_buf ( ) , true )
115150 } else if let Ok ( abs) = std:: env:: current_dir ( ) {
116- abs. join ( path. as_ref ( ) )
151+ ( abs. join ( path. as_ref ( ) ) , true )
117152 } else {
118- path. as_ref ( ) . to_path_buf ( )
153+ ( path. as_ref ( ) . to_path_buf ( ) , false )
119154 } ;
120155
156+ // Apply a cheap, deterministic pre-normalization (slash style + trailing
157+ // separator) to the cache key so the same logical path is not cached under
158+ // multiple keys (e.g. `C:\Windows`, `C:\Windows\`, and `C:/Windows`).
159+ // This is purely an in-memory transform — no syscalls.
160+ let cache_key = cache_key_for ( & absolute_path) ;
161+
162+ // Fast path: cache hit. Read lock is held only while we copy the
163+ // already-normalized PathBuf out. If the lock is poisoned we recover
164+ // the inner map via `into_inner()` so the cache keeps working for
165+ // the rest of the process — silently disabling the cache after a
166+ // single bad caller would reintroduce the syscall storm we're
167+ // trying to avoid.
168+ let cache_read = NORM_CASE_CACHE
169+ . read ( )
170+ . unwrap_or_else ( |poisoned| poisoned. into_inner ( ) ) ;
171+ if let Some ( cached) = cache_read. get ( & cache_key) {
172+ return cached. clone ( ) ;
173+ }
174+ // Explicitly drop the read guard before we try to acquire the
175+ // write guard below: `std::sync::RwLock` does not guarantee
176+ // safe same-thread read → write upgrades, and holding both at
177+ // once can deadlock on some platforms.
178+ drop ( cache_read) ;
179+
121180 // Use GetLongPathNameW to normalize case without resolving junctions.
122181 // If normalization fails, fall back to the computed absolute path to keep behavior consistent.
123- normalize_case_windows ( & absolute_path) . unwrap_or ( absolute_path)
182+ match normalize_case_windows ( & absolute_path) {
183+ Some ( normalized) => {
184+ // Only cache successful normalizations whose key is a real
185+ // absolute path:
186+ // * Failures (None) are skipped because the path may be
187+ // created later in the process and should normalize then.
188+ // * !abs_ok means we never resolved CWD, so the key is a
189+ // relative path — caching it could become wrong if CWD
190+ // changes later. This branch is essentially unreachable
191+ // in practice (current_dir() failing) but we'd rather
192+ // pay the syscall again than poison the cache.
193+ if abs_ok {
194+ let mut cache = NORM_CASE_CACHE
195+ . write ( )
196+ . unwrap_or_else ( |poisoned| poisoned. into_inner ( ) ) ;
197+ // Soft cap: if the cache grows unreasonably large
198+ // (e.g. a long-lived JSONRPC server scanning many
199+ // workspaces), reset rather than tracking a true LRU.
200+ // The working set per-session is small, so re-warming
201+ // is cheap.
202+ if cache. len ( ) >= NORM_CASE_CACHE_MAX_ENTRIES {
203+ cache. clear ( ) ;
204+ }
205+ cache. insert ( cache_key, normalized. clone ( ) ) ;
206+ }
207+ normalized
208+ }
209+ None => absolute_path,
210+ }
124211 }
125212}
126213
214+ /// Soft upper bound on the number of entries `NORM_CASE_CACHE` will hold
215+ /// before being cleared. The working set of paths a single PET refresh
216+ /// touches is at most low hundreds (PATH entries, registry installs,
217+ /// conda envs, virtualenv prefixes, etc.). 4096 leaves ample headroom
218+ /// for unusual workspaces while keeping the worst-case memory footprint
219+ /// bounded for long-lived JSONRPC server processes.
220+ #[ cfg( windows) ]
221+ const NORM_CASE_CACHE_MAX_ENTRIES : usize = 4096 ;
222+
223+ /// Builds a deterministic cache key for `norm_case` from an absolute path.
224+ ///
225+ /// We canonicalize separators (forward → backslash) and strip trailing
226+ /// separators (preserving roots) so that, for example, `C:\Windows`,
227+ /// `C:\Windows\`, and `C:/Windows` all collapse to the same key. This is
228+ /// purely an in-memory transform — no syscalls — and exists only to
229+ /// improve cache hit rate, not to change the result of `norm_case`.
230+ ///
231+ /// We operate on the UTF-16 (`encode_wide`) representation rather than
232+ /// going through `to_string_lossy()` so the transform is lossless for
233+ /// all `Path` values, including the rare paths that contain non-Unicode
234+ /// bytes (unpaired surrogates) which `to_string_lossy()` would map to
235+ /// `U+FFFD` and so could collapse distinct paths to the same key.
236+ #[ cfg( windows) ]
237+ fn cache_key_for ( absolute_path : & Path ) -> PathBuf {
238+ use std:: ffi:: OsString ;
239+ use std:: os:: windows:: ffi:: { OsStrExt , OsStringExt } ;
240+
241+ const FWD : u16 = b'/' as u16 ;
242+ const BACK : u16 = b'\\' as u16 ;
243+
244+ // Replace forward slashes with backslashes in-place on the wide form.
245+ let mut wide: Vec < u16 > = absolute_path
246+ . as_os_str ( )
247+ . encode_wide ( )
248+ . map ( |c| if c == FWD { BACK } else { c } )
249+ . collect ( ) ;
250+
251+ // Strip trailing separators while preserving root paths (drive roots,
252+ // UNC roots, network paths, extended-length roots). We rebuild a
253+ // `PathBuf` only as a probe for `Path::parent()`, which already
254+ // handles all the Windows root shapes correctly.
255+ while wide. last ( ) == Some ( & BACK ) {
256+ let probe = PathBuf :: from ( OsString :: from_wide ( & wide) ) ;
257+ if probe. parent ( ) . is_some ( ) {
258+ wide. pop ( ) ;
259+ } else {
260+ break ;
261+ }
262+ }
263+
264+ PathBuf :: from ( OsString :: from_wide ( & wide) )
265+ }
266+
127267/// Windows-specific path case normalization using GetLongPathNameW.
128268/// This normalizes the case of path components but does NOT resolve junctions or symlinks.
129269/// Note: GetLongPathNameW requires the path to exist on the filesystem to normalize it.
@@ -619,6 +759,163 @@ mod tests {
619759 ) ;
620760 }
621761
762+ /// Helper: returns an existing Windows directory derived from
763+ /// `SystemRoot` (e.g. `C:\Windows`) so tests don't hardcode a drive
764+ /// letter or the `\System32` subfolder. Both can vary on custom
765+ /// installs / CI setups.
766+ #[ cfg( windows) ]
767+ fn system_root_dir ( ) -> PathBuf {
768+ let root = std:: env:: var ( "SystemRoot" )
769+ . or_else ( |_| std:: env:: var ( "WINDIR" ) )
770+ . expect ( "SystemRoot or WINDIR must be set on Windows" ) ;
771+ PathBuf :: from ( root)
772+ }
773+
774+ #[ test]
775+ #[ cfg( windows) ]
776+ fn test_norm_case_windows_memoizes_existing_path ( ) {
777+ // Successful normalizations should be cached so subsequent calls
778+ // for the same input do not re-issue GetLongPathNameW.
779+ // The cache is keyed on the canonicalized absolute path, so we
780+ // pass an already-canonical absolute path here to make the
781+ // assertion deterministic.
782+ let path = system_root_dir ( ) ;
783+ let key = cache_key_for ( & path) ;
784+ // Tests run in parallel and share NORM_CASE_CACHE; clear the key
785+ // first so this test really exercises insertion-on-miss.
786+ NORM_CASE_CACHE
787+ . write ( )
788+ . expect ( "norm_case cache poisoned" )
789+ . remove ( & key) ;
790+
791+ let first = norm_case ( & path) ;
792+ // Cache must contain the canonicalized form of the input.
793+ let cached = NORM_CASE_CACHE
794+ . read ( )
795+ . expect ( "norm_case cache poisoned" )
796+ . get ( & key)
797+ . cloned ( ) ;
798+ assert_eq ! (
799+ cached. as_ref( ) ,
800+ Some ( & first) ,
801+ "expected norm_case to insert the result into the cache"
802+ ) ;
803+ // Second call returns the same result (and goes through the cache).
804+ let second = norm_case ( & path) ;
805+ assert_eq ! ( first, second) ;
806+ }
807+
808+ #[ test]
809+ #[ cfg( windows) ]
810+ fn test_norm_case_windows_does_not_cache_failures ( ) {
811+ // GetLongPathNameW fails for non-existent paths. We must NOT cache
812+ // those, otherwise a path that gets created later in the process
813+ // would forever return its non-normalized fallback form.
814+ //
815+ // Build the test path inside `std::env::temp_dir()` so it is
816+ // anchored to a real, writable location regardless of which drive
817+ // Windows is installed on, and use a unique name with a low
818+ // collision rate. Then make sure it does not exist on disk so
819+ // GetLongPathNameW deterministically fails.
820+ let nonexistent = std:: env:: temp_dir ( ) . join ( format ! (
821+ "pet_test_norm_case_no_cache_{}_{}" ,
822+ std:: process:: id( ) ,
823+ // Wall-clock nanoseconds — process id alone isn't enough on
824+ // hosts that recycle PIDs quickly.
825+ std:: time:: SystemTime :: now( )
826+ . duration_since( std:: time:: UNIX_EPOCH )
827+ . map( |d| d. as_nanos( ) )
828+ . unwrap_or_default( )
829+ ) ) ;
830+ assert ! (
831+ !nonexistent. exists( ) ,
832+ "test setup expected a non-existent path"
833+ ) ;
834+
835+ let key = cache_key_for ( & nonexistent) ;
836+ // Make sure we start from a clean slate for this key.
837+ NORM_CASE_CACHE
838+ . write ( )
839+ . expect ( "norm_case cache poisoned" )
840+ . remove ( & key) ;
841+
842+ let _ = norm_case ( & nonexistent) ;
843+
844+ let has_entry = NORM_CASE_CACHE
845+ . read ( )
846+ . expect ( "norm_case cache poisoned" )
847+ . contains_key ( & key) ;
848+ assert ! (
849+ !has_entry,
850+ "failed normalizations must not be inserted into the cache"
851+ ) ;
852+ }
853+
854+ #[ test]
855+ #[ cfg( windows) ]
856+ fn test_norm_case_windows_cache_key_is_absolute_path ( ) {
857+ // The cache is keyed on the computed absolute path, not the
858+ // caller-supplied input. Calling norm_case on a relative path
859+ // must NOT insert an entry under that relative key; the entry
860+ // must instead live under the resolved absolute path.
861+ //
862+ // We avoid mutating the process CWD here because cargo runs tests
863+ // in parallel and a global chdir would race with other tests.
864+ let relative = PathBuf :: from ( "." ) ;
865+ let result = norm_case ( & relative) ;
866+
867+ assert ! ( result. is_absolute( ) , "result must be absolute" ) ;
868+ let cache = NORM_CASE_CACHE . read ( ) . expect ( "norm_case cache poisoned" ) ;
869+ assert ! (
870+ !cache. contains_key( & relative) ,
871+ "cache must not be keyed on the caller-supplied relative path"
872+ ) ;
873+ // The canonicalized absolute form must be present (assuming the
874+ // CWD exists, which it always does for a running test process).
875+ let abs = std:: env:: current_dir ( )
876+ . expect ( "cwd must exist" )
877+ . join ( & relative) ;
878+ let key = cache_key_for ( & abs) ;
879+ assert_eq ! (
880+ cache. get( & key) . cloned( ) ,
881+ Some ( result) ,
882+ "cache must be keyed on the canonicalized absolute path"
883+ ) ;
884+ }
885+
886+ #[ test]
887+ #[ cfg( windows) ]
888+ fn test_norm_case_windows_cache_key_canonicalizes_separators_and_trailing_slash ( ) {
889+ // Variant inputs (mixed separators, trailing slash) should all share
890+ // a single cache entry once one of them populates it.
891+ // Use SystemRoot rather than hardcoding a drive letter, since
892+ // Windows can be installed elsewhere.
893+ let canonical = system_root_dir ( ) ;
894+ let canonical_str = canonical
895+ . to_str ( )
896+ . expect ( "SystemRoot is expected to be valid UTF-8" ) ;
897+ let with_forward_slashes = PathBuf :: from ( canonical_str. replace ( '\\' , "/" ) ) ;
898+ let with_trailing_slash = {
899+ let mut s = canonical_str. to_string ( ) ;
900+ s. push ( '\\' ) ;
901+ PathBuf :: from ( s)
902+ } ;
903+
904+ // Pre-populate by calling norm_case on the canonical form.
905+ let expected = norm_case ( & canonical) ;
906+
907+ // All variants compute to the same cache key.
908+ let key_canonical = cache_key_for ( & canonical) ;
909+ let key_forward = cache_key_for ( & with_forward_slashes) ;
910+ let key_trailing = cache_key_for ( & with_trailing_slash) ;
911+ assert_eq ! ( key_canonical, key_forward) ;
912+ assert_eq ! ( key_canonical, key_trailing) ;
913+
914+ // And subsequent calls for the variants return the same cached result.
915+ assert_eq ! ( norm_case( & with_forward_slashes) , expected) ;
916+ assert_eq ! ( norm_case( & with_trailing_slash) , expected) ;
917+ }
918+
622919 // ==================== resolve_any_symlink tests ====================
623920
624921 #[ test]
0 commit comments