Skip to content

Commit 93285a8

Browse files
authored
perf(pet-windows-registry): parallelize HKLM/HKCU walk and reuse cache across refresh (#458)
Fixes #454. The `WindowsRegistry` locator walked HKLM and HKCU **serially** on every `refresh`, with no per-company parallelism either, despite `pet-pyenv` / `pet-homebrew` / `pet-conda` already establishing the `thread::scope` pattern. On Windows hosts with Defender intercepting every registry read, this was a measurable contributor to the steady-state p90 refresh latency (#454). ## Changes - **Parallelize HKLM and HKCU** using `thread::scope` (pyenv pattern). Each hive runs in its own scoped thread; results are stitched back in a deterministic HKLM-then-HKCU order. - **Parallelize per-company within each hive.** Companies are enumerated and their subkeys opened serially (cheap handle ops), then each is moved into its own scoped thread. `winreg::RegKey: Send + !Sync`, so each thread owns its handle by value — no `&RegKey` crosses a thread boundary. - **Drop the `self.clear()` at the top of `find()`.** `find_with_cache` already short-circuits on a cache hit, and `find()` runs on transient locators per refresh (the cache is empty by construction the first time), so the unconditional clear was forcing every `refresh` RPC to pay full registry-walk cost. Makes `find()` idempotent. - **Don't silently swallow panics in spawned threads.** New `join_or_warn(...)` helper logs the panic payload via `warn!` before substituting `empty_result()` so a thread crash in one hive/company doesn't make the entire walk silently disappear. ## Tests - `test_find_reuses_cached_results_within_locator_lifetime` — pre-populates the cache, runs `find()`, asserts entries survived (pins down the `clear()` removal). - `test_find_on_fresh_locator_completes_parallel_walk` — runs `find()` on an empty cache and asserts the cache becomes `Some(_)`. Smoke-test for the new parallel scaffolding; doesn't depend on registry contents so passes on CI hosts without Python installs. ## Out of scope The "split `WindowsRegistry` timing into HKLM vs HKCU in `RefreshPerformance`" telemetry follow-up mentioned in #454 is a separate change; happy to file as its own issue if useful.
1 parent 4a45be5 commit 93285a8

2 files changed

Lines changed: 544 additions & 117 deletions

File tree

crates/pet-windows-registry/src/environments.rs

Lines changed: 250 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ use pet_core::reporter::Reporter;
88
#[cfg(windows)]
99
use pet_core::{
1010
arch::Architecture,
11-
manager::EnvManager,
1211
python_environment::{PythonEnvironmentBuilder, PythonEnvironmentKind},
1312
LocatorResult,
1413
};
@@ -19,64 +18,250 @@ use std::{path::PathBuf, sync::Arc};
1918
#[cfg(windows)]
2019
use winreg::RegKey;
2120

21+
#[cfg(windows)]
22+
fn empty_result() -> LocatorResult {
23+
LocatorResult {
24+
environments: vec![],
25+
managers: vec![],
26+
}
27+
}
28+
29+
/// What a single company-level worker thread produces: the registry-derived
30+
/// `PythonEnvironment`s plus the install dirs that turned out to be conda
31+
/// roots. We capture conda dirs (instead of just calling
32+
/// `conda_locator.find_and_report` and forgetting) so that subsequent
33+
/// `find()` calls on a cache hit can replay them — otherwise registry-only
34+
/// conda installs would silently disappear after the first refresh.
35+
#[cfg(windows)]
36+
struct CompanyWalkOutcome {
37+
result: LocatorResult,
38+
conda_install_dirs: Vec<PathBuf>,
39+
}
40+
41+
#[cfg(windows)]
42+
impl CompanyWalkOutcome {
43+
fn empty() -> Self {
44+
Self {
45+
result: empty_result(),
46+
conda_install_dirs: vec![],
47+
}
48+
}
49+
}
50+
51+
/// Logs a warning if a spawned registry-walk thread panicked, then
52+
/// substitutes an empty outcome so the surviving hive/companies still
53+
/// surface their environments. Without this we'd silently lose the entire
54+
/// hive when one company's walk panics — exactly the kind of regression
55+
/// that's hardest to debug after the fact.
56+
///
57+
/// Returns `(outcome, had_panic)` so callers can decide not to cache a
58+
/// partial result (otherwise a single transient panic would persist as a
59+
/// stale empty/partial cache across refreshes — see issue #454).
60+
#[cfg(windows)]
61+
fn join_or_warn(
62+
join_result: std::thread::Result<CompanyWalkOutcome>,
63+
label: &str,
64+
) -> (CompanyWalkOutcome, bool) {
65+
use log::warn;
66+
match join_result {
67+
Ok(outcome) => (outcome, false),
68+
Err(panic_payload) => {
69+
// Try to render the payload for the log; payloads are commonly
70+
// either a `&'static str` or a `String`.
71+
let message = panic_payload
72+
.downcast_ref::<&'static str>()
73+
.map(|s| (*s).to_string())
74+
.or_else(|| panic_payload.downcast_ref::<String>().cloned())
75+
.unwrap_or_else(|| "<non-string panic payload>".to_string());
76+
warn!("Registry walk thread for {} panicked: {}", label, message);
77+
(CompanyWalkOutcome::empty(), true)
78+
}
79+
}
80+
}
81+
82+
/// Outcome of a registry walk: the discovered environments/managers, the
83+
/// conda install dirs found via the registry (so cache-hit replays can
84+
/// re-invoke `conda_locator.find_and_report`), and a flag indicating
85+
/// whether any worker thread panicked. Callers should avoid caching a
86+
/// result with `had_panic = true` so a transient failure can be retried
87+
/// on the next refresh instead of becoming sticky.
88+
#[cfg(windows)]
89+
pub struct RegistryWalkOutcome {
90+
pub result: LocatorResult,
91+
pub conda_install_dirs: Vec<PathBuf>,
92+
pub had_panic: bool,
93+
}
94+
2295
#[cfg(windows)]
2396
pub fn get_registry_pythons(
2497
conda_locator: &Arc<dyn CondaLocator>,
2598
reporter: &Option<&dyn Reporter>,
26-
) -> LocatorResult {
27-
use log::{trace, warn};
99+
) -> RegistryWalkOutcome {
100+
use std::thread;
28101

29-
let mut environments = vec![];
30-
let mut managers: Vec<EnvManager> = vec![];
102+
// Walk both hives in parallel. Each hive walks its companies in parallel
103+
// too (see `get_registry_pythons_for_hive`). HKLM and HKCU sit on
104+
// independent registry trees and Defender intercepts every read, so the
105+
// serial baseline was paying for both round-trips back to back; the
106+
// scope-spawn pattern matches `pet-pyenv` / `pet-homebrew` / `pet-conda`.
107+
let (hklm_outcome, hkcu_outcome) = thread::scope(|s| {
108+
let hklm = s.spawn(|| {
109+
get_registry_pythons_for_hive(
110+
"HKLM",
111+
RegKey::predef(winreg::enums::HKEY_LOCAL_MACHINE),
112+
conda_locator,
113+
reporter,
114+
)
115+
});
116+
let hkcu = s.spawn(|| {
117+
get_registry_pythons_for_hive(
118+
"HKCU",
119+
RegKey::predef(winreg::enums::HKEY_CURRENT_USER),
120+
conda_locator,
121+
reporter,
122+
)
123+
});
124+
(
125+
join_hive_outcome(hklm.join(), "HKLM"),
126+
join_hive_outcome(hkcu.join(), "HKCU"),
127+
)
128+
});
31129

32-
struct RegistryKey {
33-
pub name: &'static str,
34-
pub key: winreg::RegKey,
35-
}
36-
let search_keys = [
37-
RegistryKey {
38-
name: "HKLM",
39-
key: winreg::RegKey::predef(winreg::enums::HKEY_LOCAL_MACHINE),
40-
},
41-
RegistryKey {
42-
name: "HKCU",
43-
key: winreg::RegKey::predef(winreg::enums::HKEY_CURRENT_USER),
130+
let mut environments = hklm_outcome.result.environments;
131+
environments.extend(hkcu_outcome.result.environments);
132+
let mut managers = hklm_outcome.result.managers;
133+
managers.extend(hkcu_outcome.result.managers);
134+
let mut conda_install_dirs = hklm_outcome.conda_install_dirs;
135+
conda_install_dirs.extend(hkcu_outcome.conda_install_dirs);
136+
137+
RegistryWalkOutcome {
138+
result: LocatorResult {
139+
environments,
140+
managers,
44141
},
45-
];
46-
for (name, key) in search_keys.iter().map(|f| (f.name, &f.key)) {
47-
match key.open_subkey("Software\\Python") {
48-
Ok(python_key) => {
49-
for company in python_key.enum_keys().filter_map(Result::ok) {
50-
trace!("Searching {}\\Software\\Python\\{}", name, company);
51-
match python_key.open_subkey(&company) {
52-
Ok(company_key) => {
53-
let result = get_registry_pythons_from_key_for_company(
54-
name,
55-
&company_key,
56-
&company,
57-
conda_locator,
58-
reporter,
59-
);
60-
managers.extend(result.managers);
61-
environments.extend(result.environments);
62-
}
63-
Err(err) => {
64-
warn!(
65-
"Failed to open {}\\Software\\Python\\{}, {:?}",
66-
name, company, err
67-
);
68-
}
69-
}
70-
}
142+
conda_install_dirs,
143+
had_panic: hklm_outcome.had_panic || hkcu_outcome.had_panic,
144+
}
145+
}
146+
147+
/// Sibling of `join_or_warn` for hive-level threads. Returns the recovered
148+
/// outcome (already aggregating any company-level panics) so the top
149+
/// level can OR the panic flags and concatenate the conda install dirs.
150+
#[cfg(windows)]
151+
fn join_hive_outcome(
152+
join_result: std::thread::Result<RegistryWalkOutcome>,
153+
label: &str,
154+
) -> RegistryWalkOutcome {
155+
use log::warn;
156+
match join_result {
157+
Ok(outcome) => outcome,
158+
Err(panic_payload) => {
159+
let message = panic_payload
160+
.downcast_ref::<&'static str>()
161+
.map(|s| (*s).to_string())
162+
.or_else(|| panic_payload.downcast_ref::<String>().cloned())
163+
.unwrap_or_else(|| "<non-string panic payload>".to_string());
164+
warn!("Registry walk thread for {} panicked: {}", label, message);
165+
RegistryWalkOutcome {
166+
result: empty_result(),
167+
conda_install_dirs: vec![],
168+
had_panic: true,
71169
}
170+
}
171+
}
172+
}
173+
174+
/// Walks `<hive>\Software\Python\<company>` for every company in the given
175+
/// hive. Companies are processed in parallel; each spawned thread owns its
176+
/// own `RegKey` handle (which is `Send` but not `Sync` in `winreg`).
177+
#[cfg(windows)]
178+
fn get_registry_pythons_for_hive(
179+
name: &'static str,
180+
hive: RegKey,
181+
conda_locator: &Arc<dyn CondaLocator>,
182+
reporter: &Option<&dyn Reporter>,
183+
) -> RegistryWalkOutcome {
184+
use log::{trace, warn};
185+
use std::thread;
186+
187+
let python_key = match hive.open_subkey("Software\\Python") {
188+
Ok(k) => k,
189+
Err(err) => {
190+
warn!("Failed to open {}\\Software\\Python, {:?}", name, err);
191+
return RegistryWalkOutcome {
192+
result: empty_result(),
193+
conda_install_dirs: vec![],
194+
had_panic: false,
195+
};
196+
}
197+
};
198+
199+
// Open each company subkey serially. Opening a registry handle is cheap
200+
// (no recursive enumeration); the heavy work happens once we start
201+
// pulling values out of `<company>\<install>\InstallPath`. Collecting
202+
// owned `(String, RegKey)` pairs lets us hand each company to its own
203+
// thread without sharing a `RegKey` (which is `Send` but not `Sync`).
204+
let companies: Vec<(String, RegKey)> = python_key
205+
.enum_keys()
206+
.filter_map(Result::ok)
207+
.filter_map(|company| match python_key.open_subkey(&company) {
208+
Ok(company_key) => Some((company, company_key)),
72209
Err(err) => {
73-
warn!("Failed to open {}\\Software\\Python, {:?}", name, err)
210+
warn!(
211+
"Failed to open {}\\Software\\Python\\{}, {:?}",
212+
name, company, err
213+
);
214+
None
74215
}
75-
}
216+
})
217+
.collect();
218+
219+
let results: Vec<(CompanyWalkOutcome, bool)> = thread::scope(|s| {
220+
let handles: Vec<_> = companies
221+
.into_iter()
222+
.map(|(company, company_key)| {
223+
// Build the panic-warning label up-front so a panicking
224+
// company thread is identifiable in logs (issue #454).
225+
let label = format!("{name}\\Software\\Python\\{company}");
226+
let handle = s.spawn(move || {
227+
// Trace order is intentionally relaxed: companies are
228+
// walked in parallel, so this line interleaves with the
229+
// others from the same hive.
230+
trace!("Searching {}\\Software\\Python\\{}", name, company);
231+
get_registry_pythons_from_key_for_company(
232+
name,
233+
&company_key,
234+
&company,
235+
conda_locator,
236+
reporter,
237+
)
238+
});
239+
(label, handle)
240+
})
241+
.collect();
242+
handles
243+
.into_iter()
244+
.map(|(label, h)| join_or_warn(h.join(), &label))
245+
.collect()
246+
});
247+
248+
let mut environments = vec![];
249+
let mut managers = vec![];
250+
let mut conda_install_dirs = vec![];
251+
let mut had_panic = false;
252+
for (outcome, panicked) in results {
253+
environments.extend(outcome.result.environments);
254+
managers.extend(outcome.result.managers);
255+
conda_install_dirs.extend(outcome.conda_install_dirs);
256+
had_panic |= panicked;
76257
}
77-
LocatorResult {
78-
environments,
79-
managers,
258+
RegistryWalkOutcome {
259+
result: LocatorResult {
260+
environments,
261+
managers,
262+
},
263+
conda_install_dirs,
264+
had_panic,
80265
}
81266
}
82267

@@ -87,12 +272,13 @@ fn get_registry_pythons_from_key_for_company(
87272
company: &str,
88273
conda_locator: &Arc<dyn CondaLocator>,
89274
reporter: &Option<&dyn Reporter>,
90-
) -> LocatorResult {
275+
) -> CompanyWalkOutcome {
91276
use log::{trace, warn};
92277
use pet_conda::utils::is_conda_env;
93278
use pet_fs::path::norm_case;
94279

95280
let mut environments = vec![];
281+
let mut conda_install_dirs = vec![];
96282
// let company_display_name: Option<String> = company_key.get_value("DisplayName").ok();
97283
for installed_python in company_key.enum_keys().filter_map(Result::ok) {
98284
match company_key.open_subkey(installed_python.clone()) {
@@ -132,6 +318,15 @@ fn get_registry_pythons_from_key_for_company(
132318
if let Some(reporter) = reporter {
133319
conda_locator.find_and_report(*reporter, &env_path);
134320
}
321+
// Capture the dir even when no reporter is
322+
// attached (e.g. cache-warming via
323+
// `find_with_cache(None)`) so a later cache
324+
// hit in `find()` can replay
325+
// `conda_locator.find_and_report` against
326+
// each dir; without this, registry-only conda
327+
// installs would silently disappear from
328+
// every refresh after the first (#454).
329+
conda_install_dirs.push(env_path);
135330
continue;
136331
}
137332

@@ -217,8 +412,11 @@ fn get_registry_pythons_from_key_for_company(
217412
}
218413
}
219414

220-
LocatorResult {
221-
environments,
222-
managers: vec![],
415+
CompanyWalkOutcome {
416+
result: LocatorResult {
417+
environments,
418+
managers: vec![],
419+
},
420+
conda_install_dirs,
223421
}
224422
}

0 commit comments

Comments
 (0)